Tag Analysis

Analysis of OpenStreetMap changesets that use tag prefixes to indicate the types of data being edited.

Monthly Top 10 Tag Prefixes: Edits, Contributors, and Percentage Analysis

code
# Get top 10 tag prefixes by total edits
df = duckdb.sql("""
WITH tags_expanded AS (
    SELECT 
        year,
        month,
        user_name,
        edit_count,
        unnest(all_tags) as tag_prefix
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
),
top_tags AS (
    SELECT tag_prefix
    FROM (
        SELECT
            tag_prefix,
            SUM(edit_count) as total_edits
        FROM tags_expanded
        GROUP BY tag_prefix
        ORDER BY total_edits DESC
        LIMIT 10
    )
),
monthly_tag_data AS (
    SELECT 
        te.year,
        te.month,
        CONCAT(te.year, '-', LPAD(CAST(te.month as VARCHAR), 2, '0')) as months,
        te.tag_prefix,
        COUNT(DISTINCT te.user_name) as contributors,
        SUM(te.edit_count) as edits
    FROM tags_expanded te
    WHERE te.tag_prefix IN (SELECT tag_prefix FROM top_tags)
    GROUP BY te.year, te.month, te.tag_prefix
),
monthly_total_contributors AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as total_contributors
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
    GROUP BY year, month
),
monthly_total_edits AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        SUM(edit_count) as total_edits
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
    GROUP BY year, month
)
SELECT 
    mtd.months,
    mtd.tag_prefix,
    mtd.contributors as "Contributors",
    mtd.edits as "Edits",
    ROUND((mtd.contributors * 100.0) / mtc.total_contributors, 2) as 'Percentage of Contributors',
    ROUND((mtd.edits * 100.0) / mte.total_edits, 2) as 'Percentage of Edits'
FROM monthly_tag_data mtd
JOIN monthly_total_contributors mtc ON mtd.months = mtc.months
JOIN monthly_total_edits mte ON mtd.months = mte.months
ORDER BY mtd.year, mtd.month, mtd.tag_prefix
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Edits by Top 10 Tag Prefixes",
            label="Edits",
            x_col="months",
            y_col="Edits",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Percentage of Edits by Top 10 Tag Prefixes",
            label="Percentage of Edits",
            x_col="months",
            y_col="Percentage of Edits",
            y_unit_hover_template="%",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Contributors by Top 10 Tag Prefixes",
            label="Contributors",
            x_col="months",
            y_col="Contributors",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Percentage of Contributors by Top 10 Tag Prefixes",
            label="Percentage of Contributors",
            x_col="months",
            y_col="Percentage of Contributors",
            y_unit_hover_template="%",
            group_col="tag_prefix",
            query_or_df=df,
        ),
    ]
)

Top 100 Tag Prefixes Yearly

code
query = """
WITH tags_expanded AS (
    SELECT 
        year,
        user_name,
        edit_count,
        unnest(all_tags) as tag_prefix
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
),
user_first_year AS (
    SELECT 
        user_name,
        tag_prefix,
        MIN(year) as first_year
    FROM tags_expanded
    GROUP BY user_name, tag_prefix
),
tag_totals AS (
    SELECT
        tag_prefix as "Tag Prefix",
        CAST(SUM(edit_count) as BIGINT) as total_edits_all_time,
        CAST(SUM(CASE WHEN year >= 2021 THEN edit_count ELSE 0 END) as BIGINT) as total_edits_2021_now,
        CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors_all_time,
        CAST(COUNT(DISTINCT CASE WHEN year >= 2021 THEN user_name END) as BIGINT) as total_contributors_2021_now
    FROM tags_expanded
    GROUP BY tag_prefix
),
yearly_metrics AS (
    SELECT
        te.year,
        te.tag_prefix as "Tag Prefix",
        CAST(SUM(te.edit_count) as BIGINT) as "Edits",
        CAST(COUNT(DISTINCT te.user_name) as BIGINT) as "Contributors",
        CAST(COUNT(DISTINCT CASE WHEN ufy.first_year = te.year THEN te.user_name END) as BIGINT) as "New Contributors"
    FROM tags_expanded te
    LEFT JOIN user_first_year ufy 
        ON te.user_name = ufy.user_name AND te.tag_prefix = ufy.tag_prefix
    GROUP BY te.year, te.tag_prefix
)
SELECT 
    ym.year,
    ym."Tag Prefix",
    ym."Edits",
    ym."New Contributors",
    ym."Contributors",
    tt.total_edits_all_time as "Total Edits",
    tt.total_edits_2021_now as "Total Edits (2021 - Now)",
    tt.total_contributors_all_time as "Total Contributors",
    tt.total_contributors_2021_now as "Total Contributors (2021 - Now)"
FROM yearly_metrics ym
JOIN tag_totals tt
    ON ym."Tag Prefix" = tt."Tag Prefix"
ORDER BY year DESC, "Edits" DESC
"""
df = duckdb.sql(query).df()

top_100_contributors = df.groupby("Tag Prefix")["Total Contributors"].first().nlargest(100)
top_100_contributors_2021_now = df.groupby("Tag Prefix")["Total Contributors (2021 - Now)"].first().nlargest(100)
top_100_edits = df.groupby("Tag Prefix")["Total Edits"].first().nlargest(100)
top_100_edits_2021_now = df.groupby("Tag Prefix")["Total Edits (2021 - Now)"].first().nlargest(100)

table_configs = [
    util.TableConfig(
        title="Top 100 Tag Prefixes by Contributors",
        query_or_df=df[df["Tag Prefix"].isin(top_100_contributors.index)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Contributors",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Contributors",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Contributors 2021 - Now",
        query_or_df=df[(df["Tag Prefix"].isin(top_100_contributors_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Contributors",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Contributors (2021 - Now)",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Edits",
        query_or_df=df[df["Tag Prefix"].isin(top_100_edits.index)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Edits",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Edits",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Edits 2021 - Now",
        query_or_df=df[(df["Tag Prefix"].isin(top_100_edits_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Edits",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Edits (2021 - Now)",
    ),
]

util.show_tables(table_configs)
Rank Tag Prefix 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 Total Contributors
1 created_by 64,271 79,548 94,631 122,416 127,132 155,904 163,504 261,946 318,851 307,995 281,778 302,771 292,540 258,556 273,523 263,199 273,935 137,396 2,446,668
2 comment 38,981 61,602 76,523 100,702 96,185 95,195 110,950 254,853 312,573 301,664 275,756 300,447 290,998 256,925 271,704 262,001 273,000 136,750 2,273,492
3 imagery_used 0 0 0 0 56,447 125,548 134,093 148,792 195,614 216,254 206,481 245,587 243,206 209,843 219,588 216,398 217,914 98,190 1,729,844
4 locale 0 0 0 0 0 0 97,179 148,482 195,195 215,384 205,222 250,333 255,078 224,522 236,992 234,477 240,051 111,309 1,637,041
5 host 0 0 0 0 0 0 97,178 148,481 194,558 213,957 203,646 242,552 240,327 207,088 216,679 213,305 215,441 96,559 1,587,480
6 changesets_count 0 0 0 0 0 0 0 0 87,312 213,851 203,704 242,421 240,027 205,019 214,397 206,437 214,154 95,796 1,346,645
7 review_requested 0 0 0 0 0 0 0 0 27,653 64,721 70,171 89,607 88,709 68,719 72,617 70,705 64,286 23,028 548,497
8 source 144 592 864 992 3,385 17,755 19,981 20,293 35,191 52,333 51,976 52,119 61,802 62,124 78,860 129,950 149,055 69,850 470,663
9 ideditor 0 0 0 0 0 0 0 0 20,577 60,617 58,015 73,159 64,522 53,378 53,806 51,293 53,631 20,420 464,259
10 hashtags 0 0 0 0 0 0 0 0 26,592 53,296 54,135 87,224 89,990 59,699 60,736 54,550 42,799 15,482 451,580
11 bundle_id 0 0 0 0 0 0 0 96,586 102,387 71,207 55,724 35,484 22,740 21,630 26,569 20,366 26,181 16,269 369,781
12 warnings 0 0 0 0 0 0 0 0 0 0 45,784 71,226 75,750 61,559 63,662 60,045 60,018 25,797 338,631
13 version 0 3,786 66,591 102,013 73,254 30,299 24,730 15,138 10,235 6,476 4,920 3,431 605 350 234 148 102 50 257,760
14 build 0 3,690 66,592 102,007 73,253 30,299 24,730 15,125 10,235 6,468 4,913 3,431 606 351 234 148 100 50 257,721
15 resolved 0 0 0 0 0 0 0 0 0 0 20,093 84,805 60,939 46,967 48,426 45,438 46,839 20,695 253,903
16 StreetComplete 0 0 0 0 0 0 0 7 9,572 9,764 8,966 11,694 21,935 23,525 27,366 28,392 32,919 17,874 95,402
17 info 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 1,559 20,711 9,850 27,505
18 platform 0 0 0 0 0 0 26,306 53 29 10 5 0 0 0 0 0 0 0 26,365
19 browser 0 0 0 0 0 0 26,305 53 29 10 5 0 0 0 0 0 0 0 26,364
20 closed 0 0 0 0 0 0 0 0 0 0 1,707 2,302 3,006 2,709 3,133 3,980 7,121 3,214 17,744
21 theme 0 0 0 0 0 0 0 0 0 0 0 184 718 1,253 1,410 1,545 2,716 1,346 6,837
22 merge_conflict_resolved 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5,082 2,346 6,793
23 v 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1,021 5,382 2,778 6,519
24 answer 0 0 0 0 0 0 0 0 0 0 0 0 190 1,100 1,244 1,427 2,495 1,232 5,805
25 data_used 0 0 0 0 0 0 0 0 0 0 0 0 0 0 139 1,998 2,021 746 3,703
26 create_by 0 1 577 1,185 1,851 378 49 0 0 0 0 0 0 0 0 0 0 0 3,510
27 website 0 7 9 11 17 12 8 6 15 18 346 480 464 505 882 972 423 198 3,487
28 create 0 0 0 0 0 0 0 0 0 0 0 0 96 610 678 801 1,421 685 3,403
29 git_commit 0 0 0 0 0 0 0 0 0 0 0 0 0 0 176 578 1,309 773 2,557
30 imagery 0 1 1 0 2 2 1 1 3 4 6 8 701 1,247 880 24 4 1 2,365
31 type 1 1 3 8 21 173 230 256 304 344 445 524 427 358 393 371 390 163 2,321
32 mapwithai 0 0 0 0 0 0 0 0 0 0 44 406 376 478 625 491 357 178 1,846
33 revert 1 1 3 5 4 3 3 3 2 3 2 3 1 1 462 832 964 361 1,824
34 add-image 0 0 0 0 0 0 0 0 0 0 0 0 49 267 347 367 800 385 1,639
35 change_over_5000m 0 0 0 0 0 0 0 0 0 0 0 0 27 230 356 321 759 377 1,573
36 StreetComplete_ee 0 0 0 0 0 0 0 0 0 0 0 0 0 414 788 16 0 0 1,060
37 rapid 0 0 0 0 0 0 0 0 0 0 0 0 0 0 29 455 632 66 1,050
38 language 0 0 0 0 0 0 0 0 0 0 0 125 714 162 70 12 0 1 937
39 move 0 0 0 0 0 0 0 0 0 0 0 0 29 162 164 227 372 165 914
40 os 0 0 0 0 0 0 0 0 0 0 0 26 517 350 234 147 100 50 855
41 photos_used 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 439 408 200 820
42 change_within_25m 0 0 0 0 0 0 0 0 0 0 0 0 34 152 205 120 372 214 797
43 (new key) 0 8 178 239 154 52 43 32 33 19 12 11 3 3 1 0 0 0 762
44 theme-creator 0 0 0 0 0 0 0 0 0 0 0 172 500 69 71 12 0 0 726
45 note 29 41 61 52 78 58 55 53 46 48 41 63 77 78 79 71 70 34 680
46 deletion 0 0 0 0 0 0 0 0 0 0 0 0 11 124 104 167 238 99 614
47 url 3 8 8 15 21 45 34 69 28 68 71 120 119 96 75 83 67 39 540
48 import 3 3 6 10 26 56 55 63 29 16 28 67 74 105 43 70 63 26 506
49 bot 0 13 12 29 30 32 36 42 43 269 37 37 29 38 38 35 31 32 500
50 change_within_5000m 0 0 0 0 0 0 0 0 0 0 0 0 14 125 137 86 168 88 454
51 change_within_500m 0 0 0 0 0 0 0 0 0 0 0 0 19 122 112 84 172 81 454
52 change_within_1000m 0 0 0 0 0 0 0 0 0 0 0 0 10 99 95 55 125 64 344
53 change_within_50m 0 0 0 0 0 0 0 0 0 0 0 0 17 95 92 54 120 73 338
54 name 9 14 11 8 15 5 2 3 14 26 19 31 15 30 33 23 55 10 319
55 change_within_100m 0 0 0 0 0 0 0 0 0 0 0 0 15 79 85 45 103 58 283
56 description 1 5 2 10 9 3 5 9 24 26 22 20 36 28 22 35 30 59 280
57 link-image 0 0 0 0 0 0 0 0 0 0 0 0 0 12 26 103 126 51 268
58 building 0 2 5 6 7 5 1 4 9 11 11 41 35 26 38 29 28 5 257
59 split 0 0 0 0 0 0 0 0 0 0 0 0 4 41 38 37 88 70 252
60 delete-image 0 0 0 0 0 0 0 0 0 0 0 0 0 1 16 53 160 68 250
61 soft-delete 0 0 0 0 0 0 0 0 0 0 0 0 0 12 21 69 127 48 246
62 submitted_from 0 0 0 0 0 0 0 0 0 0 0 0 13 18 16 57 141 45 236
63 upload_attempt_error 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 55 175 215
64 fixme 0 4 4 11 9 11 4 6 4 10 11 14 21 13 24 14 16 6 165
65 relation-fix 0 0 0 0 0 0 0 0 0 0 0 0 3 29 25 27 62 38 165
66 survey 1 1 1 9 15 11 12 15 13 17 13 25 36 33 23 30 34 14 156
67 maproulette 0 0 0 0 1 0 0 0 0 0 0 0 0 0 40 37 80 30 156
68 contact 0 1 0 1 2 0 1 0 0 0 1 0 0 0 30 49 58 27 146
69 source_ref 3 15 13 13 18 26 15 16 9 13 11 10 19 9 15 17 10 11 146
70 highway 2 8 3 1 2 6 2 2 9 9 7 24 18 13 14 12 12 6 142
71 Source 2 17 31 30 30 7 2 3 6 1 3 4 6 5 2 2 1 4 138
72 0 3 23 23 12 10 12 1 3 3 0 5 7 2 4 5 3 0 110
73 hashtag 0 0 0 0 0 0 0 0 1 8 12 13 16 21 12 12 17 6 106
74 amenity 2 3 2 2 4 2 2 2 9 10 2 13 7 2 9 13 15 7 105
75 generator 0 0 0 0 1 0 0 1 1 1 1 0 18 26 37 39 35 23 97
76 author 6 31 16 17 8 4 5 1 0 1 0 1 0 1 1 1 1 1 93
77 path 0 1 0 0 0 0 0 0 2 0 1 1 62 29 1 1 4 0 91
78 special-delete 0 0 0 0 0 0 0 0 0 0 0 0 0 1 7 21 43 27 89
79 parking 0 0 1 0 0 0 0 0 0 1 0 4 2 4 2 61 13 0 80
80 mechanical 0 0 0 4 3 4 5 5 9 7 5 6 3 5 14 19 22 12 79
81 addr 1 5 1 2 2 3 1 2 3 2 7 10 2 2 12 6 9 6 73
82 leisure 1 1 0 1 1 0 0 1 4 20 8 8 4 5 6 4 4 1 69
83 remove-image 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 20 33 7 66
84 plantnet-ai-detection 0 0 0 0 0 0 0 0 0 0 0 0 0 10 21 12 24 7 62
85 attribution 2 7 4 3 6 3 1 1 5 9 3 4 10 10 7 5 5 6 56
86 surface 0 6 3 7 3 1 1 2 1 3 2 4 3 2 3 4 8 5 54
87 image 0 0 1 0 2 3 4 0 1 2 10 5 3 5 8 8 3 2 53
88 souce 1 3 10 12 8 3 1 2 1 3 0 5 6 2 0 0 1 1 53
89 relocated 0 0 0 0 0 0 0 0 0 0 0 0 0 2 11 19 19 13 53
90 license 0 1 0 1 11 27 18 17 14 13 7 4 0 1 1 0 0 1 53
91 ref 2 3 4 3 5 1 1 0 2 2 4 8 4 2 1 5 8 2 51
92 operator 0 2 0 0 0 0 0 1 3 1 2 9 3 4 1 4 17 6 51
93 campaign 0 0 0 0 0 0 0 0 49 0 0 0 1 0 0 0 0 0 50
94 natural 3 0 0 1 3 1 1 2 2 5 1 5 5 4 3 1 8 3 48
95 copy 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31 23 47
96 organisation 0 1 0 0 0 0 0 0 44 0 0 0 0 1 1 1 0 0 46
97 payment 0 1 0 0 4 9 2 0 2 2 2 0 0 0 4 7 12 0 45
98 landuse 1 0 0 5 0 0 0 2 3 6 3 4 7 2 7 2 0 2 44
99 mapillary 0 0 0 0 0 0 1 2 2 2 4 6 4 9 3 8 5 4 41
100 clacks_overhead 0 0 0 0 0 0 0 0 1 1 1 2 7 19 8 7 11 7 40