Tag Analysis

Analysis of OpenStreetMap changesets that use tag prefixes to indicate the types of data being edited.

Monthly Top 10 Tag Prefixes: Edits, Contributors, and Percentage Analysis

code
# Get top 10 tag prefixes by total edits
df = duckdb.sql("""
WITH tags_expanded AS (
    SELECT 
        year,
        month,
        user_name,
        edit_count,
        unnest(all_tags) as tag_prefix
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
),
top_tags AS (
    SELECT tag_prefix
    FROM (
        SELECT
            tag_prefix,
            SUM(edit_count) as total_edits
        FROM tags_expanded
        GROUP BY tag_prefix
        ORDER BY total_edits DESC
        LIMIT 10
    )
),
monthly_tag_data AS (
    SELECT 
        te.year,
        te.month,
        CONCAT(te.year, '-', LPAD(CAST(te.month as VARCHAR), 2, '0')) as months,
        te.tag_prefix,
        COUNT(DISTINCT te.user_name) as contributors,
        SUM(te.edit_count) as edits
    FROM tags_expanded te
    WHERE te.tag_prefix IN (SELECT tag_prefix FROM top_tags)
    GROUP BY te.year, te.month, te.tag_prefix
),
monthly_total_contributors AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as total_contributors
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
    GROUP BY year, month
),
monthly_total_edits AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        SUM(edit_count) as total_edits
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
    GROUP BY year, month
)
SELECT 
    mtd.months,
    mtd.tag_prefix,
    mtd.contributors as "Contributors",
    mtd.edits as "Edits",
    ROUND((mtd.contributors * 100.0) / mtc.total_contributors, 2) as 'Percentage of Contributors',
    ROUND((mtd.edits * 100.0) / mte.total_edits, 2) as 'Percentage of Edits'
FROM monthly_tag_data mtd
JOIN monthly_total_contributors mtc ON mtd.months = mtc.months
JOIN monthly_total_edits mte ON mtd.months = mte.months
ORDER BY mtd.year, mtd.month, mtd.tag_prefix
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Edits by Top 10 Tag Prefixes",
            label="Edits",
            x_col="months",
            y_col="Edits",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Percentage of Edits by Top 10 Tag Prefixes",
            label="Percentage of Edits",
            x_col="months",
            y_col="Percentage of Edits",
            y_unit_hover_template="%",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Contributors by Top 10 Tag Prefixes",
            label="Contributors",
            x_col="months",
            y_col="Contributors",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Percentage of Contributors by Top 10 Tag Prefixes",
            label="Percentage of Contributors",
            x_col="months",
            y_col="Percentage of Contributors",
            y_unit_hover_template="%",
            group_col="tag_prefix",
            query_or_df=df,
        ),
    ]
)

Top 100 Tag Prefixes Yearly

code
query = """
WITH tags_expanded AS (
    SELECT 
        year,
        user_name,
        edit_count,
        unnest(all_tags) as tag_prefix
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
),
user_first_year AS (
    SELECT 
        user_name,
        tag_prefix,
        MIN(year) as first_year
    FROM tags_expanded
    GROUP BY user_name, tag_prefix
),
tag_totals AS (
    SELECT
        tag_prefix as "Tag Prefix",
        CAST(SUM(edit_count) as BIGINT) as total_edits_all_time,
        CAST(SUM(CASE WHEN year >= 2021 THEN edit_count ELSE 0 END) as BIGINT) as total_edits_2021_now,
        CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors_all_time,
        CAST(COUNT(DISTINCT CASE WHEN year >= 2021 THEN user_name END) as BIGINT) as total_contributors_2021_now
    FROM tags_expanded
    GROUP BY tag_prefix
),
yearly_metrics AS (
    SELECT
        te.year,
        te.tag_prefix as "Tag Prefix",
        CAST(SUM(te.edit_count) as BIGINT) as "Edits",
        CAST(COUNT(DISTINCT te.user_name) as BIGINT) as "Contributors",
        CAST(COUNT(DISTINCT CASE WHEN ufy.first_year = te.year THEN te.user_name END) as BIGINT) as "New Contributors"
    FROM tags_expanded te
    LEFT JOIN user_first_year ufy 
        ON te.user_name = ufy.user_name AND te.tag_prefix = ufy.tag_prefix
    GROUP BY te.year, te.tag_prefix
)
SELECT 
    ym.year,
    ym."Tag Prefix",
    ym."Edits",
    ym."New Contributors",
    ym."Contributors",
    tt.total_edits_all_time as "Total Edits",
    tt.total_edits_2021_now as "Total Edits (2021 - Now)",
    tt.total_contributors_all_time as "Total Contributors",
    tt.total_contributors_2021_now as "Total Contributors (2021 - Now)"
FROM yearly_metrics ym
JOIN tag_totals tt
    ON ym."Tag Prefix" = tt."Tag Prefix"
ORDER BY year DESC, "Edits" DESC
"""
df = duckdb.sql(query).df()

top_100_contributors = df.groupby("Tag Prefix")["Total Contributors"].first().nlargest(100)
top_100_contributors_2021_now = df.groupby("Tag Prefix")["Total Contributors (2021 - Now)"].first().nlargest(100)
top_100_edits = df.groupby("Tag Prefix")["Total Edits"].first().nlargest(100)
top_100_edits_2021_now = df.groupby("Tag Prefix")["Total Edits (2021 - Now)"].first().nlargest(100)

table_configs = [
    util.TableConfig(
        title="Top 100 Tag Prefixes by Contributors",
        query_or_df=df[df["Tag Prefix"].isin(top_100_contributors.index)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Contributors",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Contributors",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Contributors 2021 - Now",
        query_or_df=df[(df["Tag Prefix"].isin(top_100_contributors_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Contributors",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Contributors (2021 - Now)",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Edits",
        query_or_df=df[df["Tag Prefix"].isin(top_100_edits.index)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Edits",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Edits",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Edits 2021 - Now",
        query_or_df=df[(df["Tag Prefix"].isin(top_100_edits_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Edits",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Edits (2021 - Now)",
    ),
]

util.show_tables(table_configs)
Rank Tag Prefix 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 Total Contributors
1 created_by 64,271 79,548 94,631 122,416 127,132 155,904 163,504 261,946 318,851 307,995 281,778 302,771 292,540 258,556 273,523 263,199 237,204 2,354,699
2 comment 38,981 61,602 76,523 100,702 96,185 95,195 110,950 254,853 312,573 301,664 275,756 300,447 290,998 256,925 271,704 262,001 236,366 2,181,561
3 imagery_used 0 0 0 0 56,447 125,548 134,093 148,792 195,614 216,254 206,481 245,587 243,206 209,843 219,588 216,398 189,352 1,659,692
4 locale 0 0 0 0 0 0 97,179 148,482 195,195 215,384 205,222 250,333 255,078 224,522 236,992 234,477 209,170 1,562,066
5 host 0 0 0 0 0 0 97,178 148,481 194,558 213,957 203,646 242,552 240,327 207,088 216,679 213,305 186,879 1,516,815
6 changesets_count 0 0 0 0 0 0 0 0 87,312 213,851 203,704 242,421 240,027 205,019 214,397 206,437 185,816 1,275,943
7 review_requested 0 0 0 0 0 0 0 0 27,653 64,721 70,171 89,607 88,709 68,719 72,617 70,705 54,338 522,426
8 ideditor 0 0 0 0 0 0 0 0 20,577 60,617 58,015 73,159 64,522 53,378 53,806 51,293 45,354 439,348
9 hashtags 0 0 0 0 0 0 0 0 26,592 53,296 54,135 87,224 89,990 59,699 60,736 54,550 36,101 435,841
10 source 144 592 864 992 3,385 17,755 19,981 20,293 35,191 52,333 51,976 52,119 61,802 62,124 78,860 129,950 131,532 425,218
11 bundle_id 0 0 0 0 0 0 0 96,586 102,387 71,207 55,724 35,484 22,740 21,630 26,569 20,366 22,387 356,713
12 warnings 0 0 0 0 0 0 0 0 0 0 45,784 71,226 75,750 61,559 63,662 60,045 51,617 318,908
13 version 0 3,786 66,591 102,013 73,254 30,299 24,730 15,138 10,235 6,476 4,920 3,431 605 350 234 148 92 257,750
14 build 0 3,690 66,592 102,007 73,253 30,299 24,730 15,125 10,235 6,468 4,913 3,431 606 351 234 148 90 257,711
15 resolved 0 0 0 0 0 0 0 0 0 0 20,093 84,805 60,939 46,967 48,426 45,438 41,112 241,548
16 StreetComplete 0 0 0 0 0 0 0 7 9,572 9,764 8,966 11,694 21,935 23,525 27,366 28,392 29,534 88,759
17 platform 0 0 0 0 0 0 26,306 53 29 10 5 0 0 0 0 0 0 26,365
18 browser 0 0 0 0 0 0 26,305 53 29 10 5 0 0 0 0 0 0 26,364
19 info 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 1,559 17,391 18,091
20 closed 0 0 0 0 0 0 0 0 0 0 1,707 2,302 3,006 2,709 3,133 3,980 6,316 15,581
21 theme 0 0 0 0 0 0 0 0 0 0 0 184 718 1,253 1,410 1,545 2,302 5,638
22 v 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1,021 4,922 5,090
23 answer 0 0 0 0 0 0 0 0 0 0 0 0 190 1,100 1,244 1,427 2,117 4,709
24 merge_conflict_resolved 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4,168 4,168
25 create_by 0 1 577 1,185 1,851 378 49 0 0 0 0 0 0 0 0 0 0 3,510
26 website 0 7 9 11 17 12 8 6 15 18 346 480 464 505 882 972 363 3,287
27 data_used 0 0 0 0 0 0 0 0 0 0 0 0 0 0 139 1,998 1,794 3,185
28 create 0 0 0 0 0 0 0 0 0 0 0 0 96 610 678 801 1,213 2,783
29 imagery 0 1 1 0 2 2 1 1 3 4 6 8 701 1,247 880 24 4 2,364
30 type 1 1 3 8 21 173 230 256 304 344 445 524 427 358 393 371 349 2,247
31 mapwithai 0 0 0 0 0 0 0 0 0 0 44 406 376 478 625 491 311 1,750
32 git_commit 0 0 0 0 0 0 0 0 0 0 0 0 0 0 176 578 1,071 1,684
33 revert 1 1 3 5 4 3 3 3 2 3 2 3 1 1 462 832 854 1,604
34 add-image 0 0 0 0 0 0 0 0 0 0 0 0 49 267 347 367 719 1,371
35 change_over_5000m 0 0 0 0 0 0 0 0 0 0 0 0 27 230 356 321 657 1,265
36 StreetComplete_ee 0 0 0 0 0 0 0 0 0 0 0 0 0 414 788 16 0 1,060
37 rapid 0 0 0 0 0 0 0 0 0 0 0 0 0 0 29 455 605 1,008
38 language 0 0 0 0 0 0 0 0 0 0 0 125 714 162 70 12 0 936
39 os 0 0 0 0 0 0 0 0 0 0 0 26 517 350 234 147 90 843
40 (new key) 0 8 178 239 154 52 43 32 33 19 12 11 3 3 1 0 0 762
41 move 0 0 0 0 0 0 0 0 0 0 0 0 29 162 164 227 321 756
42 theme-creator 0 0 0 0 0 0 0 0 0 0 0 172 500 69 71 12 0 726
43 photos_used 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 439 373 680
44 note 29 41 61 52 78 58 55 53 46 48 41 63 77 78 79 71 64 655
45 change_within_25m 0 0 0 0 0 0 0 0 0 0 0 0 34 152 205 120 319 640
46 url 3 8 8 15 21 45 34 69 28 68 71 120 119 96 75 83 64 523
47 deletion 0 0 0 0 0 0 0 0 0 0 0 0 11 124 104 167 203 515
48 bot 0 13 12 29 30 32 36 42 43 269 37 37 29 38 38 35 26 483
49 import 3 3 6 10 26 56 55 63 29 16 28 67 74 105 43 70 51 479
50 change_within_5000m 0 0 0 0 0 0 0 0 0 0 0 0 14 125 137 86 137 384
51 change_within_500m 0 0 0 0 0 0 0 0 0 0 0 0 19 122 112 84 145 378
52 name 9 14 11 8 15 5 2 3 14 26 19 31 15 30 33 23 48 302
53 change_within_1000m 0 0 0 0 0 0 0 0 0 0 0 0 10 99 95 55 104 288
54 change_within_50m 0 0 0 0 0 0 0 0 0 0 0 0 17 95 92 54 103 282
55 building 0 2 5 6 7 5 1 4 9 11 11 41 35 26 38 29 25 249
56 change_within_100m 0 0 0 0 0 0 0 0 0 0 0 0 15 79 85 45 84 234
57 description 1 5 2 10 9 3 5 9 24 26 22 20 36 28 22 35 29 228
58 link-image 0 0 0 0 0 0 0 0 0 0 0 0 0 12 26 103 113 225
59 submitted_from 0 0 0 0 0 0 0 0 0 0 0 0 13 18 16 57 126 194
60 soft-delete 0 0 0 0 0 0 0 0 0 0 0 0 0 12 21 69 108 189
61 delete-image 0 0 0 0 0 0 0 0 0 0 0 0 0 1 16 53 133 179
62 fixme 0 4 4 11 9 11 4 6 4 10 11 14 21 13 24 14 13 158
63 split 0 0 0 0 0 0 0 0 0 0 0 0 4 41 38 37 57 156
64 survey 1 1 1 9 15 11 12 15 13 17 13 25 36 33 23 30 33 149
65 source_ref 3 15 13 13 18 26 15 16 9 13 11 10 19 9 15 17 9 138
66 highway 2 8 3 1 2 6 2 2 9 9 7 24 18 13 14 12 11 135
67 Source 2 17 31 30 30 7 2 3 6 1 3 4 6 5 2 2 1 134
68 maproulette 0 0 0 0 1 0 0 0 0 0 0 0 0 0 40 37 71 132
69 contact 0 1 0 1 2 0 1 0 0 0 1 0 0 0 30 49 53 123
70 relation-fix 0 0 0 0 0 0 0 0 0 0 0 0 3 29 25 27 42 110
71 0 3 23 23 12 10 12 1 3 3 0 5 7 2 4 5 3 110
72 hashtag 0 0 0 0 0 0 0 0 1 8 12 13 16 21 12 12 16 101
73 amenity 2 3 2 2 4 2 2 2 9 10 2 13 7 2 9 13 15 98
74 author 6 31 16 17 8 4 5 1 0 1 0 1 0 1 1 1 1 92
75 path 0 1 0 0 0 0 0 0 2 0 1 1 62 29 1 1 4 91
76 generator 0 0 0 0 1 0 0 1 1 1 1 0 18 26 37 39 34 85
77 parking 0 0 1 0 0 0 0 0 0 1 0 4 2 4 2 61 12 79
78 mechanical 0 0 0 4 3 4 5 5 9 7 5 6 3 5 14 19 19 74
79 leisure 1 1 0 1 1 0 0 1 4 20 8 8 4 5 6 4 4 68
80 addr 1 5 1 2 2 3 1 2 3 2 7 10 2 2 12 6 9 67
81 special-delete 0 0 0 0 0 0 0 0 0 0 0 0 0 1 7 21 36 59
82 remove-image 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 20 30 57
83 plantnet-ai-detection 0 0 0 0 0 0 0 0 0 0 0 0 0 10 21 12 23 57
84 attribution 2 7 4 3 6 3 1 1 5 9 3 4 10 10 7 5 5 54
85 souce 1 3 10 12 8 3 1 2 1 3 0 5 6 2 0 0 1 53
86 license 0 1 0 1 11 27 18 17 14 13 7 4 0 1 1 0 0 52
87 image 0 0 1 0 2 3 4 0 1 2 10 5 3 5 8 8 2 50
88 campaign 0 0 0 0 0 0 0 0 49 0 0 0 1 0 0 0 0 50
89 surface 0 6 3 7 3 1 1 2 1 3 2 4 3 2 3 4 7 49
90 ref 2 3 4 3 5 1 1 0 2 2 4 8 4 2 1 5 8 49
91 organisation 0 1 0 0 0 0 0 0 44 0 0 0 0 1 1 1 0 46
92 natural 3 0 0 1 3 1 1 2 2 5 1 5 5 4 3 1 8 45
93 operator 0 2 0 0 0 0 0 1 3 1 2 9 3 4 1 4 15 44
94 payment 0 1 0 0 4 9 2 0 2 2 2 0 0 0 4 7 11 44
95 relocated 0 0 0 0 0 0 0 0 0 0 0 0 0 2 11 19 19 42
96 landuse 1 0 0 5 0 0 0 2 3 6 3 4 7 2 7 2 0 42
97 organization 0 0 0 1 0 0 0 0 38 0 0 0 0 0 0 0 0 39
98 mapillary 0 0 0 0 0 0 1 2 2 2 4 6 4 9 3 8 5 39
99 clacks_overhead 0 0 0 0 0 0 0 0 1 1 1 2 7 19 8 7 10 38
100 user 4 5 5 8 4 3 2 2 0 0 0 1 1 2 1 0 1 36