Tag Analysis

Analysis of OpenStreetMap changesets that use tag prefixes to indicate the types of data being edited.

Monthly Top 10 Tag Prefixes: Edits, Contributors, and Percentage Analysis

code
# Get top 10 tag prefixes by total edits
df = duckdb.sql("""
WITH tags_expanded AS (
    SELECT 
        year,
        month,
        user_name,
        edit_count,
        unnest(all_tags) as tag_prefix
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
),
top_tags AS (
    SELECT tag_prefix
    FROM (
        SELECT
            tag_prefix,
            SUM(edit_count) as total_edits
        FROM tags_expanded
        GROUP BY tag_prefix
        ORDER BY total_edits DESC
        LIMIT 10
    )
),
monthly_tag_data AS (
    SELECT 
        te.year,
        te.month,
        CONCAT(te.year, '-', LPAD(CAST(te.month as VARCHAR), 2, '0')) as months,
        te.tag_prefix,
        COUNT(DISTINCT te.user_name) as contributors,
        SUM(te.edit_count) as edits
    FROM tags_expanded te
    WHERE te.tag_prefix IN (SELECT tag_prefix FROM top_tags)
    GROUP BY te.year, te.month, te.tag_prefix
),
monthly_total_contributors AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as total_contributors
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
    GROUP BY year, month
),
monthly_total_edits AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        SUM(edit_count) as total_edits
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
    GROUP BY year, month
)
SELECT 
    mtd.months,
    mtd.tag_prefix,
    mtd.contributors as "Contributors",
    mtd.edits as "Edits",
    ROUND((mtd.contributors * 100.0) / mtc.total_contributors, 2) as 'Percentage of Contributors',
    ROUND((mtd.edits * 100.0) / mte.total_edits, 2) as 'Percentage of Edits'
FROM monthly_tag_data mtd
JOIN monthly_total_contributors mtc ON mtd.months = mtc.months
JOIN monthly_total_edits mte ON mtd.months = mte.months
ORDER BY mtd.year, mtd.month, mtd.tag_prefix
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Edits by Top 10 Tag Prefixes",
            label="Edits",
            x_col="months",
            y_col="Edits",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Percentage of Edits by Top 10 Tag Prefixes",
            label="Percentage of Edits",
            x_col="months",
            y_col="Percentage of Edits",
            y_unit_hover_template="%",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Contributors by Top 10 Tag Prefixes",
            label="Contributors",
            x_col="months",
            y_col="Contributors",
            group_col="tag_prefix",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Percentage of Contributors by Top 10 Tag Prefixes",
            label="Percentage of Contributors",
            x_col="months",
            y_col="Percentage of Contributors",
            y_unit_hover_template="%",
            group_col="tag_prefix",
            query_or_df=df,
        ),
    ]
)

Top 100 Tag Prefixes Yearly

code
query = """
WITH tags_expanded AS (
    SELECT 
        year,
        user_name,
        edit_count,
        unnest(all_tags) as tag_prefix
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE all_tags IS NOT NULL
),
user_first_year AS (
    SELECT 
        user_name,
        tag_prefix,
        MIN(year) as first_year
    FROM tags_expanded
    GROUP BY user_name, tag_prefix
),
tag_totals AS (
    SELECT
        tag_prefix as "Tag Prefix",
        CAST(SUM(edit_count) as BIGINT) as total_edits_all_time,
        CAST(SUM(CASE WHEN year >= 2021 THEN edit_count ELSE 0 END) as BIGINT) as total_edits_2021_now,
        CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors_all_time,
        CAST(COUNT(DISTINCT CASE WHEN year >= 2021 THEN user_name END) as BIGINT) as total_contributors_2021_now
    FROM tags_expanded
    GROUP BY tag_prefix
),
yearly_metrics AS (
    SELECT
        te.year,
        te.tag_prefix as "Tag Prefix",
        CAST(SUM(te.edit_count) as BIGINT) as "Edits",
        CAST(COUNT(DISTINCT te.user_name) as BIGINT) as "Contributors",
        CAST(COUNT(DISTINCT CASE WHEN ufy.first_year = te.year THEN te.user_name END) as BIGINT) as "New Contributors"
    FROM tags_expanded te
    LEFT JOIN user_first_year ufy 
        ON te.user_name = ufy.user_name AND te.tag_prefix = ufy.tag_prefix
    GROUP BY te.year, te.tag_prefix
)
SELECT 
    ym.year,
    ym."Tag Prefix",
    ym."Edits",
    ym."New Contributors",
    ym."Contributors",
    tt.total_edits_all_time as "Total Edits",
    tt.total_edits_2021_now as "Total Edits (2021 - Now)",
    tt.total_contributors_all_time as "Total Contributors",
    tt.total_contributors_2021_now as "Total Contributors (2021 - Now)"
FROM yearly_metrics ym
JOIN tag_totals tt
    ON ym."Tag Prefix" = tt."Tag Prefix"
ORDER BY year DESC, "Edits" DESC
"""
df = duckdb.sql(query).df()

top_100_contributors = df.groupby("Tag Prefix")["Total Contributors"].first().nlargest(100)
top_100_contributors_2021_now = df.groupby("Tag Prefix")["Total Contributors (2021 - Now)"].first().nlargest(100)
top_100_edits = df.groupby("Tag Prefix")["Total Edits"].first().nlargest(100)
top_100_edits_2021_now = df.groupby("Tag Prefix")["Total Edits (2021 - Now)"].first().nlargest(100)

table_configs = [
    util.TableConfig(
        title="Top 100 Tag Prefixes by Contributors",
        query_or_df=df[df["Tag Prefix"].isin(top_100_contributors.index)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Contributors",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Contributors",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Contributors 2021 - Now",
        query_or_df=df[(df["Tag Prefix"].isin(top_100_contributors_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Contributors",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Contributors (2021 - Now)",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Edits",
        query_or_df=df[df["Tag Prefix"].isin(top_100_edits.index)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Edits",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Edits",
    ),
    util.TableConfig(
        title="Top 100 Tag Prefixes by Edits 2021 - Now",
        query_or_df=df[(df["Tag Prefix"].isin(top_100_edits_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Tag Prefix",
        value_col="Edits",
        center_columns=["Rank", "Tag Prefix"],
        sum_col="Total Edits (2021 - Now)",
    ),
]

util.show_tables(table_configs)
Rank Tag Prefix 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 Total Contributors
1 created_by 64,271 79,548 94,631 122,416 127,132 155,904 163,504 261,946 318,851 307,995 281,778 302,771 292,540 258,556 273,523 263,199 274,458 50,423 2,398,728
2 comment 38,981 61,602 76,523 100,702 96,185 95,195 110,950 254,853 312,573 301,664 275,756 300,447 290,998 256,925 271,704 262,001 273,521 50,172 2,225,614
3 imagery_used 0 0 0 0 56,447 125,548 134,093 148,792 195,614 216,254 206,481 245,587 243,206 209,843 219,588 216,398 218,328 34,397 1,693,671
4 locale 0 0 0 0 0 0 97,179 148,482 195,195 215,384 205,222 250,333 255,078 224,522 236,992 234,477 240,522 40,196 1,598,614
5 host 0 0 0 0 0 0 97,178 148,481 194,558 213,957 203,646 242,552 240,327 207,088 216,679 213,305 215,848 33,489 1,551,026
6 changesets_count 0 0 0 0 0 0 0 0 87,312 213,851 203,704 242,421 240,027 205,019 214,397 206,437 214,560 33,235 1,310,185
7 review_requested 0 0 0 0 0 0 0 0 27,653 64,721 70,171 89,607 88,709 68,719 72,617 70,705 64,339 5,814 535,067
8 ideditor 0 0 0 0 0 0 0 0 20,577 60,617 58,015 73,159 64,522 53,378 53,806 51,293 53,675 5,338 451,193
9 source 144 592 864 992 3,385 17,755 19,981 20,293 35,191 52,333 51,976 52,119 61,802 62,124 78,860 129,950 149,353 26,727 447,549
10 hashtags 0 0 0 0 0 0 0 0 26,592 53,296 54,135 87,224 89,990 59,699 60,736 54,550 42,842 4,686 443,721
11 bundle_id 0 0 0 0 0 0 0 96,586 102,387 71,207 55,724 35,484 22,740 21,630 26,569 20,366 26,212 4,885 362,053
12 warnings 0 0 0 0 0 0 0 0 0 0 45,784 71,226 75,750 61,559 63,662 60,045 60,159 8,949 328,843
13 version 0 3,786 66,591 102,013 73,254 30,299 24,730 15,138 10,235 6,476 4,920 3,431 605 350 234 148 102 33 257,756
14 build 0 3,690 66,592 102,007 73,253 30,299 24,730 15,125 10,235 6,468 4,913 3,431 606 351 234 148 100 33 257,717
15 resolved 0 0 0 0 0 0 0 0 0 0 20,093 84,805 60,939 46,967 48,426 45,438 46,996 8,316 247,945
16 StreetComplete 0 0 0 0 0 0 0 7 9,572 9,764 8,966 11,694 21,935 23,525 27,366 28,392 32,996 7,583 92,421
17 platform 0 0 0 0 0 0 26,306 53 29 10 5 0 0 0 0 0 0 0 26,365
18 browser 0 0 0 0 0 0 26,305 53 29 10 5 0 0 0 0 0 0 0 26,364
19 info 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 1,559 20,739 4,417 23,533
20 closed 0 0 0 0 0 0 0 0 0 0 1,707 2,302 3,006 2,709 3,133 3,980 7,141 1,295 16,711
21 theme 0 0 0 0 0 0 0 0 0 0 0 184 718 1,253 1,410 1,545 2,720 440 6,212
22 v 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1,021 5,394 1,146 5,783
23 merge_conflict_resolved 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5,091 582 5,464
24 answer 0 0 0 0 0 0 0 0 0 0 0 0 190 1,100 1,244 1,427 2,499 400 5,232
25 create_by 0 1 577 1,185 1,851 378 49 0 0 0 0 0 0 0 0 0 0 0 3,510
26 data_used 0 0 0 0 0 0 0 0 0 0 0 0 0 0 139 1,998 2,023 312 3,472
27 website 0 7 9 11 17 12 8 6 15 18 346 480 464 505 882 972 425 54 3,374
28 create 0 0 0 0 0 0 0 0 0 0 0 0 96 610 678 801 1,423 205 3,075
29 imagery 0 1 1 0 2 2 1 1 3 4 6 8 701 1,247 880 24 4 0 2,364
30 type 1 1 3 8 21 173 230 256 304 344 445 524 427 358 393 371 391 91 2,296
31 git_commit 0 0 0 0 0 0 0 0 0 0 0 0 0 0 176 578 1,311 186 2,048
32 mapwithai 0 0 0 0 0 0 0 0 0 0 44 406 376 478 625 491 357 68 1,793
33 revert 1 1 3 5 4 3 3 3 2 3 2 3 1 1 462 832 968 174 1,747
34 add-image 0 0 0 0 0 0 0 0 0 0 0 0 49 267 347 367 800 127 1,485
35 change_over_5000m 0 0 0 0 0 0 0 0 0 0 0 0 27 230 356 321 760 114 1,406
36 StreetComplete_ee 0 0 0 0 0 0 0 0 0 0 0 0 0 414 788 16 0 0 1,060
37 rapid 0 0 0 0 0 0 0 0 0 0 0 0 0 0 29 455 632 30 1,035
38 language 0 0 0 0 0 0 0 0 0 0 0 125 714 162 70 12 0 0 936
39 os 0 0 0 0 0 0 0 0 0 0 0 26 517 350 234 147 100 33 849
40 move 0 0 0 0 0 0 0 0 0 0 0 0 29 162 164 227 372 40 824
41 (new key) 0 8 178 239 154 52 43 32 33 19 12 11 3 3 1 0 0 0 762
42 photos_used 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 439 408 76 733
43 theme-creator 0 0 0 0 0 0 0 0 0 0 0 172 500 69 71 12 0 0 726
44 change_within_25m 0 0 0 0 0 0 0 0 0 0 0 0 34 152 205 120 372 70 710
45 note 29 41 61 52 78 58 55 53 46 48 41 63 77 78 79 71 70 13 661
46 deletion 0 0 0 0 0 0 0 0 0 0 0 0 11 124 104 167 238 22 561
47 url 3 8 8 15 21 45 34 69 28 68 71 120 119 96 75 83 67 22 532
48 import 3 3 6 10 26 56 55 63 29 16 28 67 74 105 43 70 63 11 495
49 bot 0 13 12 29 30 32 36 42 43 269 37 37 29 38 38 35 31 19 492
50 change_within_5000m 0 0 0 0 0 0 0 0 0 0 0 0 14 125 137 86 168 32 424
51 change_within_500m 0 0 0 0 0 0 0 0 0 0 0 0 19 122 112 84 172 30 419
52 change_within_1000m 0 0 0 0 0 0 0 0 0 0 0 0 10 99 95 55 125 18 315
53 name 9 14 11 8 15 5 2 3 14 26 19 31 15 30 33 23 55 2 311
54 change_within_50m 0 0 0 0 0 0 0 0 0 0 0 0 17 95 92 54 120 20 303
55 change_within_100m 0 0 0 0 0 0 0 0 0 0 0 0 15 79 85 45 103 18 261
56 building 0 2 5 6 7 5 1 4 9 11 11 41 35 26 38 29 28 1 253
57 link-image 0 0 0 0 0 0 0 0 0 0 0 0 0 12 26 103 126 14 243
58 description 1 5 2 10 9 3 5 9 24 26 22 20 36 28 22 35 30 4 229
59 submitted_from 0 0 0 0 0 0 0 0 0 0 0 0 13 18 16 57 141 19 216
60 soft-delete 0 0 0 0 0 0 0 0 0 0 0 0 0 12 21 69 127 13 216
61 delete-image 0 0 0 0 0 0 0 0 0 0 0 0 0 1 16 53 160 16 209
62 split 0 0 0 0 0 0 0 0 0 0 0 0 4 41 38 37 88 21 207
63 fixme 0 4 4 11 9 11 4 6 4 10 11 14 21 13 24 14 16 3 162
64 survey 1 1 1 9 15 11 12 15 13 17 13 25 36 33 23 30 34 6 151
65 maproulette 0 0 0 0 1 0 0 0 0 0 0 0 0 0 40 37 80 13 145
66 source_ref 3 15 13 13 18 26 15 16 9 13 11 10 19 9 15 17 10 5 141
67 relation-fix 0 0 0 0 0 0 0 0 0 0 0 0 3 29 25 27 62 10 140
68 Source 2 17 31 30 30 7 2 3 6 1 3 4 6 5 2 2 1 3 137
69 highway 2 8 3 1 2 6 2 2 9 9 7 24 18 13 14 12 12 1 137
70 contact 0 1 0 1 2 0 1 0 0 0 1 0 0 0 30 49 58 5 130
71 0 3 23 23 12 10 12 1 3 3 0 5 7 2 4 5 3 0 110
72 hashtag 0 0 0 0 0 0 0 0 1 8 12 13 16 21 12 12 17 2 103
73 amenity 2 3 2 2 4 2 2 2 9 10 2 13 7 2 9 13 15 4 102
74 author 6 31 16 17 8 4 5 1 0 1 0 1 0 1 1 1 1 0 92
75 generator 0 0 0 0 1 0 0 1 1 1 1 0 18 26 37 39 35 14 92
76 path 0 1 0 0 0 0 0 0 2 0 1 1 62 29 1 1 4 0 91
77 upload_attempt_error 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 55 34 83
78 parking 0 0 1 0 0 0 0 0 0 1 0 4 2 4 2 61 13 0 80
79 mechanical 0 0 0 4 3 4 5 5 9 7 5 6 3 5 14 19 22 9 77
80 special-delete 0 0 0 0 0 0 0 0 0 0 0 0 0 1 7 21 43 8 73
81 leisure 1 1 0 1 1 0 0 1 4 20 8 8 4 5 6 4 4 1 69
82 addr 1 5 1 2 2 3 1 2 3 2 7 10 2 2 12 6 9 1 68
83 remove-image 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 20 33 0 60
84 plantnet-ai-detection 0 0 0 0 0 0 0 0 0 0 0 0 0 10 21 12 24 1 59
85 attribution 2 7 4 3 6 3 1 1 5 9 3 4 10 10 7 5 5 2 54
86 souce 1 3 10 12 8 3 1 2 1 3 0 5 6 2 0 0 1 0 53
87 license 0 1 0 1 11 27 18 17 14 13 7 4 0 1 1 0 0 0 52
88 image 0 0 1 0 2 3 4 0 1 2 10 5 3 5 8 8 3 0 51
89 surface 0 6 3 7 3 1 1 2 1 3 2 4 3 2 3 4 8 1 50
90 campaign 0 0 0 0 0 0 0 0 49 0 0 0 1 0 0 0 0 0 50
91 ref 2 3 4 3 5 1 1 0 2 2 4 8 4 2 1 5 8 0 49
92 operator 0 2 0 0 0 0 0 1 3 1 2 9 3 4 1 4 17 2 47
93 natural 3 0 0 1 3 1 1 2 2 5 1 5 5 4 3 1 8 1 46
94 organisation 0 1 0 0 0 0 0 0 44 0 0 0 0 1 1 1 0 0 46
95 payment 0 1 0 0 4 9 2 0 2 2 2 0 0 0 4 7 12 0 45
96 relocated 0 0 0 0 0 0 0 0 0 0 0 0 0 2 11 19 19 3 43
97 landuse 1 0 0 5 0 0 0 2 3 6 3 4 7 2 7 2 0 0 42
98 mapillary 0 0 0 0 0 0 1 2 2 2 4 6 4 9 3 8 5 2 40
99 clacks_overhead 0 0 0 0 0 0 0 0 1 1 1 2 7 19 8 7 11 6 39
100 organization 0 0 0 1 0 0 0 0 38 0 0 0 0 0 0 0 0 0 39