Tag Analysis

Analysis of OpenStreetMap changesets that use tag prefixes to indicate the types of data being edited.

Monthly Top 10 Tag Prefixes: Edits, Contributors, and Percentage Analysis

code
# Get top 10 tag prefixes by total edits
df = duckdb.sql("""
WITH tags_expanded AS (
    SELECT 
        year,
        month,
        user_name,
        edit_count,
        unnest(all_tags) as tag_prefix
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
),
top_tags AS (
    SELECT tag_prefix
    FROM (
        SELECT
            tag_prefix,
            SUM(edit_count) as total_edits
        FROM tags_expanded
        GROUP BY tag_prefix
        ORDER BY total_edits DESC
        LIMIT 10
    )
),
monthly_tag_data AS (
    SELECT 
        te.year,
        te.month,
        CONCAT(te.year, '-', LPAD(CAST(te.month as VARCHAR), 2, '0')) as months,
        te.tag_prefix,
        COUNT(DISTINCT te.user_name) as contributors,
        SUM(te.edit_count) as edits
    FROM tags_expanded te
    WHERE te.tag_prefix IN (SELECT tag_prefix FROM top_tags)
    GROUP BY te.year, te.month, te.tag_prefix
),
monthly_total_contributors AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as total_contributors
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
    GROUP BY year, month
),
monthly_total_edits AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        SUM(edit_count) as total_edits
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
    GROUP BY year, month
)
SELECT 
    mtd.months,
    mtd.tag_prefix,
    mtd.contributors as "Contributors",
    mtd.edits as "Edits",
    ROUND((mtd.contributors * 100.0) / mtc.total_contributors, 2) as 'Percentage of Contributors',
    ROUND((mtd.edits * 100.0) / mte.total_edits, 2) as 'Percentage of Edits'
FROM monthly_tag_data mtd
JOIN monthly_total_contributors mtc ON mtd.months = mtc.months
JOIN monthly_total_edits mte ON mtd.months = mte.months
ORDER BY mtd.year, mtd.month, mtd.tag_prefix
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Edits by Top 10 Tag Prefixes",
            label="Edits",
            x_col="months",
            y_col="Edits",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Percentage of Edits by Top 10 Tag Prefixes",
            label="Percentage of Edits",
            x_col="months",
            y_col="Percentage of Edits",
            y_unit_hover_template="%",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Contributors by Top 10 Tag Prefixes",
            label="Contributors",
            x_col="months",
            y_col="Contributors",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Percentage of Contributors by Top 10 Tag Prefixes",
            label="Percentage of Contributors",
            x_col="months",
            y_col="Percentage of Contributors",
            y_unit_hover_template="%",
            group_col="tag_prefix",
            query_or_df=df,
        ),
    ]
)

Top 100 Tag Prefixes Yearly

code
query = """
WITH tags_expanded AS (
    SELECT 
        year,
        user_name,
        edit_count,
        unnest(all_tags) as tag_prefix
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
),
user_first_year AS (
    SELECT 
        user_name,
        tag_prefix,
        MIN(year) as first_year
    FROM tags_expanded
    GROUP BY user_name, tag_prefix
),
tag_totals AS (
    SELECT
        tag_prefix as "Tag Prefix",
        CAST(SUM(edit_count) as BIGINT) as total_edits_all_time,
        CAST(SUM(CASE WHEN year >= 2021 THEN edit_count ELSE 0 END) as BIGINT) as total_edits_2021_now,
        CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors_all_time,
        CAST(COUNT(DISTINCT CASE WHEN year >= 2021 THEN user_name END) as BIGINT) as total_contributors_2021_now
    FROM tags_expanded
    GROUP BY tag_prefix
),
yearly_metrics AS (
    SELECT
        te.year,
        te.tag_prefix as "Tag Prefix",
        CAST(SUM(te.edit_count) as BIGINT) as "Edits",
        CAST(COUNT(DISTINCT te.user_name) as BIGINT) as "Contributors",
        CAST(COUNT(DISTINCT CASE WHEN ufy.first_year = te.year THEN te.user_name END) as BIGINT) as "New Contributors"
    FROM tags_expanded te
    LEFT JOIN user_first_year ufy 
        ON te.user_name = ufy.user_name AND te.tag_prefix = ufy.tag_prefix
    GROUP BY te.year, te.tag_prefix
)
SELECT 
    ym.year,
    ym."Tag Prefix",
    ym."Edits",
    ym."New Contributors",
    ym."Contributors",
    tt.total_edits_all_time as "Total Edits",
    tt.total_edits_2021_now as "Total Edits (2021 - Now)",
    tt.total_contributors_all_time as "Total Contributors",
    tt.total_contributors_2021_now as "Total Contributors (2021 - Now)"
FROM yearly_metrics ym
JOIN tag_totals tt
    ON ym."Tag Prefix" = tt."Tag Prefix"
ORDER BY year DESC, "Edits" DESC
"""
df = duckdb.sql(query).df()

top_100_contributors = df.groupby("Tag Prefix")["Total Contributors"].first().nlargest(100)
top_100_contributors_2021_now = df.groupby("Tag Prefix")["Total Contributors (2021 - Now)"].first().nlargest(100)
top_100_edits = df.groupby("Tag Prefix")["Total Edits"].first().nlargest(100)
top_100_edits_2021_now = df.groupby("Tag Prefix")["Total Edits (2021 - Now)"].first().nlargest(100)

table_configs = [
    util.TableConfig(
        title="Top 100 Tag Prefixes by Contributors",
        query_or_df=df[df["Tag Prefix"].isin(top_100_contributors.index)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Contributors",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Contributors",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Contributors 2021 - Now",
        query_or_df=df[(df["Tag Prefix"].isin(top_100_contributors_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Contributors",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Contributors (2021 - Now)",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Edits",
        query_or_df=df[df["Tag Prefix"].isin(top_100_edits.index)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Edits",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Edits",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Edits 2021 - Now",
        query_or_df=df[(df["Tag Prefix"].isin(top_100_edits_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Edits",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Edits (2021 - Now)",
    ),
]

util.show_tables(table_configs)
Rank Tag Prefix 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 Total Contributors
1 created_by 64,271 79,548 94,631 122,416 127,132 155,904 163,504 261,946 318,851 307,995 281,778 302,771 292,540 258,556 273,523 263,199 273,935 109,776 2,429,992
2 comment 38,981 61,602 76,523 100,702 96,185 95,195 110,950 254,853 312,573 301,664 275,756 300,447 290,998 256,925 271,704 262,001 273,000 109,236 2,256,837
3 imagery_used 0 0 0 0 56,447 125,548 134,093 148,792 195,614 216,254 206,481 245,587 243,206 209,843 219,588 216,398 217,914 77,972 1,717,421
4 locale 0 0 0 0 0 0 97,179 148,482 195,195 215,384 205,222 250,333 255,078 224,522 236,992 234,477 240,051 88,988 1,623,847
5 host 0 0 0 0 0 0 97,178 148,481 194,558 213,957 203,646 242,552 240,327 207,088 216,679 213,305 215,441 76,554 1,574,943
6 changesets_count 0 0 0 0 0 0 0 0 87,312 213,851 203,704 242,421 240,027 205,019 214,397 206,437 214,154 75,994 1,334,131
7 review_requested 0 0 0 0 0 0 0 0 27,653 64,721 70,171 89,607 88,709 68,719 72,617 70,705 64,286 17,477 543,996
8 source 144 592 864 992 3,385 17,755 19,981 20,293 35,191 52,333 51,976 52,119 61,802 62,124 78,860 129,950 149,055 56,247 462,657
9 ideditor 0 0 0 0 0 0 0 0 20,577 60,617 58,015 73,159 64,522 53,378 53,806 51,293 53,631 15,445 459,867
10 hashtags 0 0 0 0 0 0 0 0 26,592 53,296 54,135 87,224 89,990 59,699 60,736 54,550 42,799 12,199 449,004
11 bundle_id 0 0 0 0 0 0 0 96,586 102,387 71,207 55,724 35,484 22,740 21,630 26,569 20,366 26,181 12,495 367,073
12 warnings 0 0 0 0 0 0 0 0 0 0 45,784 71,226 75,750 61,559 63,662 60,045 60,018 20,376 335,178
13 version 0 3,786 66,591 102,013 73,254 30,299 24,730 15,138 10,235 6,476 4,920 3,431 605 350 234 148 102 47 257,759
14 build 0 3,690 66,592 102,007 73,253 30,299 24,730 15,125 10,235 6,468 4,913 3,431 606 351 234 148 100 47 257,720
15 resolved 0 0 0 0 0 0 0 0 0 0 20,093 84,805 60,939 46,967 48,426 45,438 46,839 16,840 251,782
16 StreetComplete 0 0 0 0 0 0 0 7 9,572 9,764 8,966 11,694 21,935 23,525 27,366 28,392 32,919 14,830 94,388
17 info 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 1,559 20,711 9,239 27,008
18 platform 0 0 0 0 0 0 26,306 53 29 10 5 0 0 0 0 0 0 0 26,365
19 browser 0 0 0 0 0 0 26,305 53 29 10 5 0 0 0 0 0 0 0 26,364
20 closed 0 0 0 0 0 0 0 0 0 0 1,707 2,302 3,006 2,709 3,133 3,980 7,121 2,605 17,373
21 theme 0 0 0 0 0 0 0 0 0 0 0 184 718 1,253 1,410 1,545 2,716 998 6,580
22 merge_conflict_resolved 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5,082 1,771 6,344
23 v 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1,021 5,382 2,205 6,219
24 answer 0 0 0 0 0 0 0 0 0 0 0 0 190 1,100 1,244 1,427 2,495 905 5,565
25 data_used 0 0 0 0 0 0 0 0 0 0 0 0 0 0 139 1,998 2,021 602 3,609
26 create_by 0 1 577 1,185 1,851 378 49 0 0 0 0 0 0 0 0 0 0 0 3,510
27 website 0 7 9 11 17 12 8 6 15 18 346 480 464 505 882 972 423 154 3,452
28 create 0 0 0 0 0 0 0 0 0 0 0 0 96 610 678 801 1,421 491 3,262
29 git_commit 0 0 0 0 0 0 0 0 0 0 0 0 0 0 176 578 1,309 592 2,403
30 imagery 0 1 1 0 2 2 1 1 3 4 6 8 701 1,247 880 24 4 1 2,365
31 type 1 1 3 8 21 173 230 256 304 344 445 524 427 358 393 371 390 143 2,313
32 mapwithai 0 0 0 0 0 0 0 0 0 0 44 406 376 478 625 491 357 145 1,829
33 revert 1 1 3 5 4 3 3 3 2 3 2 3 1 1 462 832 964 313 1,800
34 add-image 0 0 0 0 0 0 0 0 0 0 0 0 49 267 347 367 800 285 1,571
35 change_over_5000m 0 0 0 0 0 0 0 0 0 0 0 0 27 230 356 321 759 264 1,492
36 StreetComplete_ee 0 0 0 0 0 0 0 0 0 0 0 0 0 414 788 16 0 0 1,060
37 rapid 0 0 0 0 0 0 0 0 0 0 0 0 0 0 29 455 632 51 1,045
38 language 0 0 0 0 0 0 0 0 0 0 0 125 714 162 70 12 0 1 937
39 move 0 0 0 0 0 0 0 0 0 0 0 0 29 162 164 227 372 118 877
40 os 0 0 0 0 0 0 0 0 0 0 0 26 517 350 234 147 100 47 854
41 photos_used 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 439 408 177 807
42 (new key) 0 8 178 239 154 52 43 32 33 19 12 11 3 3 1 0 0 0 762
43 change_within_25m 0 0 0 0 0 0 0 0 0 0 0 0 34 152 205 120 372 154 753
44 theme-creator 0 0 0 0 0 0 0 0 0 0 0 172 500 69 71 12 0 0 726
45 note 29 41 61 52 78 58 55 53 46 48 41 63 77 78 79 71 70 27 675
46 deletion 0 0 0 0 0 0 0 0 0 0 0 0 11 124 104 167 238 75 595
47 url 3 8 8 15 21 45 34 69 28 68 71 120 119 96 75 83 67 36 539
48 import 3 3 6 10 26 56 55 63 29 16 28 67 74 105 43 70 63 22 504
49 bot 0 13 12 29 30 32 36 42 43 269 37 37 29 38 38 35 31 29 498
50 change_within_500m 0 0 0 0 0 0 0 0 0 0 0 0 19 122 112 84 172 67 445
51 change_within_5000m 0 0 0 0 0 0 0 0 0 0 0 0 14 125 137 86 168 67 440
52 change_within_1000m 0 0 0 0 0 0 0 0 0 0 0 0 10 99 95 55 125 46 333
53 change_within_50m 0 0 0 0 0 0 0 0 0 0 0 0 17 95 92 54 120 51 318
54 name 9 14 11 8 15 5 2 3 14 26 19 31 15 30 33 23 55 7 316
55 change_within_100m 0 0 0 0 0 0 0 0 0 0 0 0 15 79 85 45 103 46 275
56 link-image 0 0 0 0 0 0 0 0 0 0 0 0 0 12 26 103 126 35 255
57 building 0 2 5 6 7 5 1 4 9 11 11 41 35 26 38 29 28 2 254
58 description 1 5 2 10 9 3 5 9 24 26 22 20 36 28 22 35 30 27 249
59 soft-delete 0 0 0 0 0 0 0 0 0 0 0 0 0 12 21 69 127 41 241
60 split 0 0 0 0 0 0 0 0 0 0 0 0 4 41 38 37 88 52 236
61 submitted_from 0 0 0 0 0 0 0 0 0 0 0 0 13 18 16 57 141 35 228
62 delete-image 0 0 0 0 0 0 0 0 0 0 0 0 0 1 16 53 160 42 227
63 upload_attempt_error 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 55 125 168
64 fixme 0 4 4 11 9 11 4 6 4 10 11 14 21 13 24 14 16 5 164
65 relation-fix 0 0 0 0 0 0 0 0 0 0 0 0 3 29 25 27 62 27 155
66 survey 1 1 1 9 15 11 12 15 13 17 13 25 36 33 23 30 34 11 153
67 maproulette 0 0 0 0 1 0 0 0 0 0 0 0 0 0 40 37 80 18 149
68 source_ref 3 15 13 13 18 26 15 16 9 13 11 10 19 9 15 17 10 8 143
69 highway 2 8 3 1 2 6 2 2 9 9 7 24 18 13 14 12 12 4 140
70 contact 0 1 0 1 2 0 1 0 0 0 1 0 0 0 30 49 58 17 140
71 Source 2 17 31 30 30 7 2 3 6 1 3 4 6 5 2 2 1 3 137
72 0 3 23 23 12 10 12 1 3 3 0 5 7 2 4 5 3 0 110
73 hashtag 0 0 0 0 0 0 0 0 1 8 12 13 16 21 12 12 17 5 106
74 amenity 2 3 2 2 4 2 2 2 9 10 2 13 7 2 9 13 15 6 104
75 generator 0 0 0 0 1 0 0 1 1 1 1 0 18 26 37 39 35 22 97
76 author 6 31 16 17 8 4 5 1 0 1 0 1 0 1 1 1 1 1 93
77 path 0 1 0 0 0 0 0 0 2 0 1 1 62 29 1 1 4 0 91
78 special-delete 0 0 0 0 0 0 0 0 0 0 0 0 0 1 7 21 43 19 82
79 parking 0 0 1 0 0 0 0 0 0 1 0 4 2 4 2 61 13 0 80
80 mechanical 0 0 0 4 3 4 5 5 9 7 5 6 3 5 14 19 22 12 79
81 addr 1 5 1 2 2 3 1 2 3 2 7 10 2 2 12 6 9 3 70
82 leisure 1 1 0 1 1 0 0 1 4 20 8 8 4 5 6 4 4 1 69
83 remove-image 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 20 33 6 65
84 plantnet-ai-detection 0 0 0 0 0 0 0 0 0 0 0 0 0 10 21 12 24 5 60
85 attribution 2 7 4 3 6 3 1 1 5 9 3 4 10 10 7 5 5 5 56
86 souce 1 3 10 12 8 3 1 2 1 3 0 5 6 2 0 0 1 0 53
87 license 0 1 0 1 11 27 18 17 14 13 7 4 0 1 1 0 0 1 53
88 relocated 0 0 0 0 0 0 0 0 0 0 0 0 0 2 11 19 19 13 53
89 image 0 0 1 0 2 3 4 0 1 2 10 5 3 5 8 8 3 1 52
90 surface 0 6 3 7 3 1 1 2 1 3 2 4 3 2 3 4 8 2 51
91 campaign 0 0 0 0 0 0 0 0 49 0 0 0 1 0 0 0 0 0 50
92 operator 0 2 0 0 0 0 0 1 3 1 2 9 3 4 1 4 17 5 50
93 ref 2 3 4 3 5 1 1 0 2 2 4 8 4 2 1 5 8 1 50
94 natural 3 0 0 1 3 1 1 2 2 5 1 5 5 4 3 1 8 2 47
95 organisation 0 1 0 0 0 0 0 0 44 0 0 0 0 1 1 1 0 0 46
96 payment 0 1 0 0 4 9 2 0 2 2 2 0 0 0 4 7 12 0 45
97 landuse 1 0 0 5 0 0 0 2 3 6 3 4 7 2 7 2 0 1 43
98 copy 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31 15 41
99 clacks_overhead 0 0 0 0 0 0 0 0 1 1 1 2 7 19 8 7 11 7 40
100 mapillary 0 0 0 0 0 0 1 2 2 2 4 6 4 9 3 8 5 3 40