Tag Analysis

Analysis of OpenStreetMap changesets that use tag prefixes to indicate the types of data being edited.

Monthly Top 10 Tag Prefixes: Edits, Contributors, and Percentage Analysis

code
# Get top 10 tag prefixes by total edits
df = duckdb.sql("""
WITH tags_expanded AS (
    SELECT 
        year,
        month,
        user_name,
        edit_count,
        unnest(all_tags) as tag_prefix
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
),
top_tags AS (
    SELECT tag_prefix
    FROM (
        SELECT
            tag_prefix,
            SUM(edit_count) as total_edits
        FROM tags_expanded
        GROUP BY tag_prefix
        ORDER BY total_edits DESC
        LIMIT 10
    )
),
monthly_tag_data AS (
    SELECT 
        te.year,
        te.month,
        CONCAT(te.year, '-', LPAD(CAST(te.month as VARCHAR), 2, '0')) as months,
        te.tag_prefix,
        COUNT(DISTINCT te.user_name) as contributors,
        SUM(te.edit_count) as edits
    FROM tags_expanded te
    WHERE te.tag_prefix IN (SELECT tag_prefix FROM top_tags)
    GROUP BY te.year, te.month, te.tag_prefix
),
monthly_total_contributors AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as total_contributors
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
    GROUP BY year, month
),
monthly_total_edits AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        SUM(edit_count) as total_edits
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
    GROUP BY year, month
)
SELECT 
    mtd.months,
    mtd.tag_prefix,
    mtd.contributors as "Contributors",
    mtd.edits as "Edits",
    ROUND((mtd.contributors * 100.0) / mtc.total_contributors, 2) as 'Percentage of Contributors',
    ROUND((mtd.edits * 100.0) / mte.total_edits, 2) as 'Percentage of Edits'
FROM monthly_tag_data mtd
JOIN monthly_total_contributors mtc ON mtd.months = mtc.months
JOIN monthly_total_edits mte ON mtd.months = mte.months
ORDER BY mtd.year, mtd.month, mtd.tag_prefix
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Edits by Top 10 Tag Prefixes",
            label="Edits",
            x_col="months",
            y_col="Edits",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Percentage of Edits by Top 10 Tag Prefixes",
            label="Percentage of Edits",
            x_col="months",
            y_col="Percentage of Edits",
            y_unit_hover_template="%",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Contributors by Top 10 Tag Prefixes",
            label="Contributors",
            x_col="months",
            y_col="Contributors",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Percentage of Contributors by Top 10 Tag Prefixes",
            label="Percentage of Contributors",
            x_col="months",
            y_col="Percentage of Contributors",
            y_unit_hover_template="%",
            group_col="tag_prefix",
            query_or_df=df,
        ),
    ]
)

Top 100 Tag Prefixes Yearly

code
query = """
WITH tags_expanded AS (
    SELECT 
        year,
        user_name,
        edit_count,
        unnest(all_tags) as tag_prefix
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
),
user_first_year AS (
    SELECT 
        user_name,
        tag_prefix,
        MIN(year) as first_year
    FROM tags_expanded
    GROUP BY user_name, tag_prefix
),
tag_totals AS (
    SELECT
        tag_prefix as "Tag Prefix",
        CAST(SUM(edit_count) as BIGINT) as total_edits_all_time,
        CAST(SUM(CASE WHEN year >= 2021 THEN edit_count ELSE 0 END) as BIGINT) as total_edits_2021_now,
        CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors_all_time,
        CAST(COUNT(DISTINCT CASE WHEN year >= 2021 THEN user_name END) as BIGINT) as total_contributors_2021_now
    FROM tags_expanded
    GROUP BY tag_prefix
),
yearly_metrics AS (
    SELECT
        te.year,
        te.tag_prefix as "Tag Prefix",
        CAST(SUM(te.edit_count) as BIGINT) as "Edits",
        CAST(COUNT(DISTINCT te.user_name) as BIGINT) as "Contributors",
        CAST(COUNT(DISTINCT CASE WHEN ufy.first_year = te.year THEN te.user_name END) as BIGINT) as "New Contributors"
    FROM tags_expanded te
    LEFT JOIN user_first_year ufy 
        ON te.user_name = ufy.user_name AND te.tag_prefix = ufy.tag_prefix
    GROUP BY te.year, te.tag_prefix
)
SELECT 
    ym.year,
    ym."Tag Prefix",
    ym."Edits",
    ym."New Contributors",
    ym."Contributors",
    tt.total_edits_all_time as "Total Edits",
    tt.total_edits_2021_now as "Total Edits (2021 - Now)",
    tt.total_contributors_all_time as "Total Contributors",
    tt.total_contributors_2021_now as "Total Contributors (2021 - Now)"
FROM yearly_metrics ym
JOIN tag_totals tt
    ON ym."Tag Prefix" = tt."Tag Prefix"
ORDER BY year DESC, "Edits" DESC
"""
df = duckdb.sql(query).df()

top_100_contributors = df.groupby("Tag Prefix")["Total Contributors"].first().nlargest(100)
top_100_contributors_2021_now = df.groupby("Tag Prefix")["Total Contributors (2021 - Now)"].first().nlargest(100)
top_100_edits = df.groupby("Tag Prefix")["Total Edits"].first().nlargest(100)
top_100_edits_2021_now = df.groupby("Tag Prefix")["Total Edits (2021 - Now)"].first().nlargest(100)

table_configs = [
    util.TableConfig(
        title="Top 100 Tag Prefixes by Contributors",
        query_or_df=df[df["Tag Prefix"].isin(top_100_contributors.index)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Contributors",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Contributors",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Contributors 2021 - Now",
        query_or_df=df[(df["Tag Prefix"].isin(top_100_contributors_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Contributors",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Contributors (2021 - Now)",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Edits",
        query_or_df=df[df["Tag Prefix"].isin(top_100_edits.index)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Edits",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Edits",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Edits 2021 - Now",
        query_or_df=df[(df["Tag Prefix"].isin(top_100_edits_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Edits",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Edits (2021 - Now)",
    ),
]

util.show_tables(table_configs)
Rank Tag Prefix 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 Total Contributors
1 created_by 64,271 79,548 94,631 122,416 127,132 155,904 163,504 261,946 318,851 307,995 281,778 302,771 292,540 258,556 273,523 263,199 257,288 2,370,521
2 comment 38,981 61,602 76,523 100,702 96,185 95,195 110,950 254,853 312,573 301,664 275,756 300,447 290,998 256,925 271,704 262,001 256,409 2,197,409
3 imagery_used 0 0 0 0 56,447 125,548 134,093 148,792 195,614 216,254 206,481 245,587 243,206 209,843 219,588 216,398 205,068 1,672,177
4 locale 0 0 0 0 0 0 97,179 148,482 195,195 215,384 205,222 250,333 255,078 224,522 236,992 234,477 226,323 1,575,604
5 host 0 0 0 0 0 0 97,178 148,481 194,558 213,957 203,646 242,552 240,327 207,088 216,679 213,305 202,623 1,529,391
6 changesets_count 0 0 0 0 0 0 0 0 87,312 213,851 203,704 242,421 240,027 205,019 214,397 206,437 201,430 1,288,523
7 review_requested 0 0 0 0 0 0 0 0 27,653 64,721 70,171 89,607 88,709 68,719 72,617 70,705 59,961 527,346
8 ideditor 0 0 0 0 0 0 0 0 20,577 60,617 58,015 73,159 64,522 53,378 53,806 51,293 49,914 443,664
9 hashtags 0 0 0 0 0 0 0 0 26,592 53,296 54,135 87,224 89,990 59,699 60,736 54,550 40,342 439,519
10 source 144 592 864 992 3,385 17,755 19,981 20,293 35,191 52,333 51,976 52,119 61,802 62,124 78,860 129,950 140,959 433,004
11 bundle_id 0 0 0 0 0 0 0 96,586 102,387 71,207 55,724 35,484 22,740 21,630 26,569 20,366 24,233 358,267
12 warnings 0 0 0 0 0 0 0 0 0 0 45,784 71,226 75,750 61,559 63,662 60,045 56,322 322,726
13 version 0 3,786 66,591 102,013 73,254 30,299 24,730 15,138 10,235 6,476 4,920 3,431 605 350 234 148 98 257,754
14 build 0 3,690 66,592 102,007 73,253 30,299 24,730 15,125 10,235 6,468 4,913 3,431 606 351 234 148 96 257,715
15 resolved 0 0 0 0 0 0 0 0 0 0 20,093 84,805 60,939 46,967 48,426 45,438 44,198 243,874
16 StreetComplete 0 0 0 0 0 0 0 7 9,572 9,764 8,966 11,694 21,935 23,525 27,366 28,392 31,526 90,260
17 platform 0 0 0 0 0 0 26,306 53 29 10 5 0 0 0 0 0 0 26,365
18 browser 0 0 0 0 0 0 26,305 53 29 10 5 0 0 0 0 0 0 26,364
19 info 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 1,559 18,988 19,668
20 closed 0 0 0 0 0 0 0 0 0 0 1,707 2,302 3,006 2,709 3,133 3,980 6,744 15,940
21 theme 0 0 0 0 0 0 0 0 0 0 0 184 718 1,253 1,410 1,545 2,522 5,836
22 v 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1,021 5,131 5,296
23 answer 0 0 0 0 0 0 0 0 0 0 0 0 190 1,100 1,244 1,427 2,316 4,888
24 merge_conflict_resolved 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4,700 4,700
25 create_by 0 1 577 1,185 1,851 378 49 0 0 0 0 0 0 0 0 0 0 3,510
26 website 0 7 9 11 17 12 8 6 15 18 346 480 464 505 882 972 401 3,325
27 data_used 0 0 0 0 0 0 0 0 0 0 0 0 0 0 139 1,998 1,903 3,279
28 create 0 0 0 0 0 0 0 0 0 0 0 0 96 610 678 801 1,329 2,892
29 imagery 0 1 1 0 2 2 1 1 3 4 6 8 701 1,247 880 24 4 2,364
30 type 1 1 3 8 21 173 230 256 304 344 445 524 427 358 393 371 369 2,260
31 git_commit 0 0 0 0 0 0 0 0 0 0 0 0 0 0 176 578 1,198 1,805
32 mapwithai 0 0 0 0 0 0 0 0 0 0 44 406 376 478 625 491 334 1,766
33 revert 1 1 3 5 4 3 3 3 2 3 2 3 1 1 462 832 906 1,651
34 add-image 0 0 0 0 0 0 0 0 0 0 0 0 49 267 347 367 755 1,406
35 change_over_5000m 0 0 0 0 0 0 0 0 0 0 0 0 27 230 356 321 716 1,323
36 StreetComplete_ee 0 0 0 0 0 0 0 0 0 0 0 0 0 414 788 16 0 1,060
37 rapid 0 0 0 0 0 0 0 0 0 0 0 0 0 0 29 455 623 1,023
38 language 0 0 0 0 0 0 0 0 0 0 0 125 714 162 70 12 0 936
39 os 0 0 0 0 0 0 0 0 0 0 0 26 517 350 234 147 96 847
40 move 0 0 0 0 0 0 0 0 0 0 0 0 29 162 164 227 352 786
41 (new key) 0 8 178 239 154 52 43 32 33 19 12 11 3 3 1 0 0 762
42 theme-creator 0 0 0 0 0 0 0 0 0 0 0 172 500 69 71 12 0 726
43 photos_used 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 439 392 698
44 change_within_25m 0 0 0 0 0 0 0 0 0 0 0 0 34 152 205 120 347 666
45 note 29 41 61 52 78 58 55 53 46 48 41 63 77 78 79 71 67 657
46 deletion 0 0 0 0 0 0 0 0 0 0 0 0 11 124 104 167 226 537
47 url 3 8 8 15 21 45 34 69 28 68 71 120 119 96 75 83 67 525
48 import 3 3 6 10 26 56 55 63 29 16 28 67 74 105 43 70 59 487
49 bot 0 13 12 29 30 32 36 42 43 269 37 37 29 38 38 35 28 485
50 change_within_5000m 0 0 0 0 0 0 0 0 0 0 0 0 14 125 137 86 155 399
51 change_within_500m 0 0 0 0 0 0 0 0 0 0 0 0 19 122 112 84 158 388
52 name 9 14 11 8 15 5 2 3 14 26 19 31 15 30 33 23 50 304
53 change_within_1000m 0 0 0 0 0 0 0 0 0 0 0 0 10 99 95 55 116 298
54 change_within_50m 0 0 0 0 0 0 0 0 0 0 0 0 17 95 92 54 112 286
55 building 0 2 5 6 7 5 1 4 9 11 11 41 35 26 38 29 26 250
56 change_within_100m 0 0 0 0 0 0 0 0 0 0 0 0 15 79 85 45 96 243
57 description 1 5 2 10 9 3 5 9 24 26 22 20 36 28 22 35 30 229
58 link-image 0 0 0 0 0 0 0 0 0 0 0 0 0 12 26 103 117 228
59 submitted_from 0 0 0 0 0 0 0 0 0 0 0 0 13 18 16 57 136 203
60 soft-delete 0 0 0 0 0 0 0 0 0 0 0 0 0 12 21 69 120 200
61 delete-image 0 0 0 0 0 0 0 0 0 0 0 0 0 1 16 53 148 193
62 split 0 0 0 0 0 0 0 0 0 0 0 0 4 41 38 37 70 169
63 fixme 0 4 4 11 9 11 4 6 4 10 11 14 21 13 24 14 13 158
64 survey 1 1 1 9 15 11 12 15 13 17 13 25 36 33 23 30 33 149
65 source_ref 3 15 13 13 18 26 15 16 9 13 11 10 19 9 15 17 9 138
66 maproulette 0 0 0 0 1 0 0 0 0 0 0 0 0 0 40 37 75 136
67 highway 2 8 3 1 2 6 2 2 9 9 7 24 18 13 14 12 11 135
68 Source 2 17 31 30 30 7 2 3 6 1 3 4 6 5 2 2 1 134
69 contact 0 1 0 1 2 0 1 0 0 0 1 0 0 0 30 49 55 125
70 relation-fix 0 0 0 0 0 0 0 0 0 0 0 0 3 29 25 27 51 119
71 0 3 23 23 12 10 12 1 3 3 0 5 7 2 4 5 3 110
72 hashtag 0 0 0 0 0 0 0 0 1 8 12 13 16 21 12 12 16 101
73 amenity 2 3 2 2 4 2 2 2 9 10 2 13 7 2 9 13 15 98
74 author 6 31 16 17 8 4 5 1 0 1 0 1 0 1 1 1 1 92
75 path 0 1 0 0 0 0 0 0 2 0 1 1 62 29 1 1 4 91
76 generator 0 0 0 0 1 0 0 1 1 1 1 0 18 26 37 39 34 85
77 parking 0 0 1 0 0 0 0 0 0 1 0 4 2 4 2 61 13 80
78 mechanical 0 0 0 4 3 4 5 5 9 7 5 6 3 5 14 19 19 74
79 leisure 1 1 0 1 1 0 0 1 4 20 8 8 4 5 6 4 4 68
80 addr 1 5 1 2 2 3 1 2 3 2 7 10 2 2 12 6 9 67
81 special-delete 0 0 0 0 0 0 0 0 0 0 0 0 0 1 7 21 42 65
82 remove-image 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 20 31 58
83 plantnet-ai-detection 0 0 0 0 0 0 0 0 0 0 0 0 0 10 21 12 23 57
84 attribution 2 7 4 3 6 3 1 1 5 9 3 4 10 10 7 5 5 54
85 souce 1 3 10 12 8 3 1 2 1 3 0 5 6 2 0 0 1 53
86 license 0 1 0 1 11 27 18 17 14 13 7 4 0 1 1 0 0 52
87 image 0 0 1 0 2 3 4 0 1 2 10 5 3 5 8 8 2 50
88 campaign 0 0 0 0 0 0 0 0 49 0 0 0 1 0 0 0 0 50
89 surface 0 6 3 7 3 1 1 2 1 3 2 4 3 2 3 4 7 49
90 ref 2 3 4 3 5 1 1 0 2 2 4 8 4 2 1 5 8 49
91 organisation 0 1 0 0 0 0 0 0 44 0 0 0 0 1 1 1 0 46
92 operator 0 2 0 0 0 0 0 1 3 1 2 9 3 4 1 4 16 45
93 natural 3 0 0 1 3 1 1 2 2 5 1 5 5 4 3 1 8 45
94 payment 0 1 0 0 4 9 2 0 2 2 2 0 0 0 4 7 12 45
95 relocated 0 0 0 0 0 0 0 0 0 0 0 0 0 2 11 19 19 42
96 landuse 1 0 0 5 0 0 0 2 3 6 3 4 7 2 7 2 0 42
97 clacks_overhead 0 0 0 0 0 0 0 0 1 1 1 2 7 19 8 7 11 39
98 mapillary 0 0 0 0 0 0 1 2 2 2 4 6 4 9 3 8 5 39
99 organization 0 0 0 1 0 0 0 0 38 0 0 0 0 0 0 0 0 39
100 user 4 5 5 8 4 3 2 2 0 0 0 1 1 2 1 0 1 36