Source Tag Analysis

Analysis of OpenStreetMap changesets that use source tags to indicate data sources.

Monthly Percentage of Edits and Contributors Using Source Tags

code
df = duckdb.sql("""
WITH monthly_with_source AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as contributors_with_source,
        SUM(edit_count) as edits_with_source
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE source IS NOT NULL
    GROUP BY year, month
),
monthly_total AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as total_contributors,
        SUM(edit_count) as total_edits
    FROM '../changeset_data/year=*/month=*/*.parquet'
    GROUP BY year, month
)
SELECT 
    mt.months,
    COALESCE(ROUND((mws.contributors_with_source * 100.0) / mt.total_contributors, 2), 0) as 'Percentage Contributors with Source',
    COALESCE(ROUND((mws.edits_with_source * 100.0) / mt.total_edits, 2), 0) as 'Percentage Edits with Source'
FROM monthly_total mt
LEFT JOIN monthly_with_source mws ON mt.year = mws.year AND mt.month = mws.month
ORDER BY mt.year, mt.month
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Percentage of Contributors Using Source Tags",
            label="Contributors",
            x_col="months",
            y_col="Percentage Contributors with Source",
            y_unit_hover_template="%",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Percentage of Edits Using Source Tags",
            label="Edits",
            x_col="months",
            y_col="Percentage Edits with Source",
            y_unit_hover_template="%",
            query_or_df=df,
        ),
    ]
)

Monthly Top 10 Sources: Edits, Accumulated Edits, Contributors and Accumulated Contributors

code
# Get top 10 sources by total edits
df = duckdb.sql("""
WITH source_expanded AS (
    SELECT 
        year,
        month,
        user_name,
        edit_count,
        unnest(source) as source_tag
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE source IS NOT NULL
),
top_sources AS (
    SELECT source_tag
    FROM (
        SELECT
            source_tag,
            SUM(edit_count) as total_edits
        FROM source_expanded
        GROUP BY source_tag
        ORDER BY total_edits DESC
        LIMIT 10
    )
),
monthly_source_data AS (
    SELECT 
        se.year,
        se.month,
        CONCAT(se.year, '-', LPAD(CAST(se.month as VARCHAR), 2, '0')) as months,
        se.source_tag,
        COUNT(DISTINCT se.user_name) as "Contributors",
        SUM(se.edit_count) as "Edits"
    FROM source_expanded se
    WHERE se.source_tag IN (SELECT source_tag FROM top_sources)
    GROUP BY se.year, se.month, se.source_tag
)
SELECT 
    months,
    source_tag,
    "Contributors",
    "Edits",
    SUM("Contributors") OVER (PARTITION BY source_tag ORDER BY year, month) as "Contributors Accumulated",
    SUM("Edits") OVER (PARTITION BY source_tag ORDER BY year, month) as "Edits Accumulated"
FROM monthly_source_data
ORDER BY year, month, source_tag
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Edits by Top 10 Source Tags",
            label="Edits",
            x_col="months",
            y_col="Edits",
            group_col="source_tag",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Accumulated Edits by Top 10 Source Tags",
            label="Edits Accumulated",
            x_col="months",
            y_col="Edits Accumulated",
            group_col="source_tag",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Contributors by Top 10 Source Tags",
            label="Contributors",
            x_col="months",
            y_col="Contributors",
            group_col="source_tag",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Accumulated Contributors by Top 10 Source Tags",
            label="Contributors Accumulated",
            x_col="months",
            y_col="Contributors Accumulated",
            group_col="source_tag",
            query_or_df=df,
        ),
    ]
)

Top 100 Sources Yearly

code
import json

# Load replacement rules for clickable links
with open("../config/replace_rules_imagery_and_source.json") as f:
    source_name_to_html_link = {
        name: f'<a href="{item["link"]}">{name}</a>' for name, item in json.load(f).items() if "link" in item
    }

query = """
WITH source_expanded AS (
    SELECT 
        year,
        user_name,
        edit_count,
        unnest(source) as source_tag
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE source IS NOT NULL
),
user_first_year AS (
    SELECT 
        user_name,
        source_tag,
        MIN(year) as first_year
    FROM source_expanded
    GROUP BY user_name, source_tag
),
source_totals AS (
    SELECT
        source_tag as "Source",
        CAST(SUM(edit_count) as BIGINT) as total_edits_all_time,
        CAST(SUM(CASE WHEN year >= 2021 THEN edit_count ELSE 0 END) as BIGINT) as total_edits_2021_now,
        CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors_all_time,
        CAST(COUNT(DISTINCT CASE WHEN year >= 2021 THEN user_name END) as BIGINT) as total_contributors_2021_now
    FROM source_expanded
    GROUP BY source_tag
),
yearly_metrics AS (
    SELECT
        se.year,
        se.source_tag as "Source",
        CAST(SUM(se.edit_count) as BIGINT) as "Edits",
        CAST(COUNT(DISTINCT se.user_name) as BIGINT) as "Contributors",
        CAST(COUNT(DISTINCT CASE WHEN ufy.first_year = se.year THEN se.user_name END) as BIGINT) as "New Contributors"
    FROM source_expanded se
    LEFT JOIN user_first_year ufy 
        ON se.user_name = ufy.user_name AND se.source_tag = ufy.source_tag
    GROUP BY se.year, se.source_tag
)
SELECT 
    ym.year,
    ym."Source",
    ym."Edits",
    ym."New Contributors",
    ym."Contributors",
    st.total_edits_all_time as "Total Edits",
    st.total_edits_2021_now as "Total Edits (2021 - Now)",
    st.total_contributors_all_time as "Total Contributors",
    st.total_contributors_2021_now as "Total Contributors (2021 - Now)"
FROM yearly_metrics ym
JOIN source_totals st
    ON ym."Source" = st."Source"
ORDER BY year DESC, "Edits" DESC
"""
df = duckdb.sql(query).df()

# Apply HTML links to source names
df["Source"] = df["Source"].apply(
    lambda name: source_name_to_html_link[name] if name in source_name_to_html_link else name
)

top_100_contributors = df.groupby("Source")["Total Contributors"].first().nlargest(100)
top_100_contributors_2021_now = df.groupby("Source")["Total Contributors (2021 - Now)"].first().nlargest(100)
top_100_edits = df.groupby("Source")["Total Edits"].first().nlargest(100)
top_100_edits_2021_now = df.groupby("Source")["Total Edits (2021 - Now)"].first().nlargest(100)

table_configs = [
    util.TableConfig(
        title="Top 100 Sources by Contributors",
        query_or_df=df[df["Source"].isin(top_100_contributors.index)],
        x_axis_col="year",
        y_axis_col="Source",
        value_col="Contributors",
        center_columns=["Rank", "Source"],
        sum_col="Total Contributors",
    ),
    util.TableConfig(
        title="Top 100 Sources by Contributors 2021 - Now",
        query_or_df=df[(df["Source"].isin(top_100_contributors_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Source",
        value_col="Contributors",
        center_columns=["Rank", "Source"],
        sum_col="Total Contributors (2021 - Now)",
    ),
    util.TableConfig(
        title="Top 100 Sources by Edits",
        query_or_df=df[df["Source"].isin(top_100_edits.index)],
        x_axis_col="year",
        y_axis_col="Source",
        value_col="Edits",
        center_columns=["Rank", "Source"],
        sum_col="Total Edits",
    ),
    util.TableConfig(
        title="Top 100 Sources by Edits 2021 - Now",
        query_or_df=df[(df["Source"].isin(top_100_edits_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Source",
        value_col="Edits",
        center_columns=["Rank", "Source"],
        sum_col="Total Edits (2021 - Now)",
    ),
]

util.show_tables(table_configs)
Rank Source 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 Total Contributors
1 Local Knowledge 15 39 96 124 299 1,112 1,185 1,155 6,004 13,364 14,866 16,846 16,774 16,676 24,925 73,065 73,249 195,386
2 Survey 50 151 266 332 936 6,016 6,994 6,815 16,397 17,965 18,067 21,616 31,851 35,699 43,215 64,838 66,285 181,814
3 Bing Aerial Imagery 0 250 543 628 1,521 7,987 9,704 9,369 11,150 13,935 14,177 14,847 14,617 14,549 17,520 33,157 31,298 114,959
4 GPS 24 73 142 158 379 2,086 1,968 1,784 2,756 4,071 4,474 5,516 5,437 4,636 5,351 11,780 12,148 43,846
5 Streetlevel Imagery 0 0 0 0 0 0 0 0 552 1,525 3,804 5,611 5,473 4,915 6,128 12,539 13,138 41,649
6 Knowledge 5 36 63 95 150 5,231 5,692 5,528 4,952 4,737 4,846 4,643 4,101 4,110 3,955 3,988 3,630 29,296
7 Esri World Imagery 0 0 0 0 0 0 8 3 884 2,528 3,875 5,311 5,061 4,733 5,468 5,750 5,507 19,853
8 Maxar Imagery 0 0 0 0 0 0 0 0 0 0 4,099 5,927 6,691 6,777 4,527 886 509 16,698
9 Mapillary 0 0 0 0 0 76 294 452 733 832 2,899 4,476 4,421 3,594 3,428 3,404 3,383 15,920
10 Mapbox Satellite 0 0 0 0 5 589 1,309 1,513 1,669 1,651 1,943 2,692 2,740 2,399 2,376 2,231 2,250 13,098
11 OSM Notes 0 0 0 0 11 41 28 41 38 42 35 32 32 1,450 2,055 4,810 5,027 11,727
12 mapwithai 0 0 0 0 0 0 0 0 0 0 17 220 1,310 1,773 6,674 2,268 1,850 10,960
13 streetside 0 0 0 0 0 0 0 0 0 2 1,450 2,261 2,121 1,425 1,646 1,639 1,785 8,416
14 DigitalGlobe Imagery (now Maxar) 0 4 2 11 61 219 189 164 2,250 3,276 3,193 827 604 30 33 41 19 7,019
15 tms[]:https:// 0 0 0 0 0 36 635 1,236 2,079 1,365 1,224 1,408 979 574 271 170 39 6,764
16 OSM Note 0 1 1 3 44 307 318 342 368 438 473 1,394 1,755 1,799 1,753 1,586 1,437 5,350
17 OpenStreetMap Carto (Standard) 0 0 0 0 0 0 0 14 129 184 331 952 1,151 1,336 1,408 1,279 1,124 5,109
18 OpenStreetCam Images 0 0 0 0 0 0 0 7 101 135 1,470 2,239 2,011 301 9 1 3 4,574
19 JOSM Validator 1 2 1 1 3 17 20 47 46 45 47 3,175 417 166 86 43 19 3,499
20 Strava 0 0 0 0 0 78 198 298 335 342 311 957 1,230 945 786 815 702 3,491
21 website 0 2 6 8 28 190 228 242 312 468 534 596 619 648 600 728 776 3,224
22 kartaview 0 0 0 0 0 0 0 0 0 0 0 1 50 911 1,022 939 905 2,828
23 Osmose 0 0 0 2 9 230 310 347 381 376 484 602 546 464 489 471 413 2,601
24 wiki.openstreetmap.org 2 3 5 6 22 135 124 135 153 212 280 307 358 371 315 398 386 1,987
25 Wikipedia 0 4 6 16 34 215 177 211 252 207 230 248 244 263 216 252 248 1,849
26 BDOrtho 0 0 0 0 0 0 0 127 253 300 332 521 534 584 541 534 466 1,801
27 wikipedia 3 5 14 16 34 233 199 199 212 209 227 250 225 213 178 226 187 1,638
28 Geotagged Images 0 0 0 0 2 33 20 33 37 45 58 317 425 476 487 468 426 1,538
29 Cadastre 0 1 11 13 32 183 198 205 227 216 243 319 333 377 374 393 325 1,515
30 Website 0 0 0 1 4 53 42 64 98 102 121 153 189 255 290 463 394 1,501
31 mapilio 0 0 0 0 0 0 0 0 0 0 0 0 0 0 308 694 716 1,405
32 aerial_imagery 0 0 0 0 1 0 0 1 1,221 8 11 28 20 13 13 25 55 1,338
33 panoramax 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 413 980 1,214
34 Geoportal 2: Orthophotomap 0 0 0 0 1 96 166 144 176 144 189 288 365 359 368 354 326 1,205
35 KartaView 0 0 0 0 0 0 0 0 0 0 0 1 532 514 352 270 246 1,179
36 cadastre 0 3 9 13 37 212 189 199 213 222 230 246 227 207 192 192 172 1,162
37 basemap.at 0 0 0 0 26 121 149 192 163 164 151 208 246 362 296 255 199 1,123
38 gpx 1 7 8 15 16 131 157 168 169 153 164 188 152 139 129 148 129 1,105
39 osmose 0 0 1 2 11 98 139 115 124 132 177 196 179 173 159 140 114 1,020
40 .gpx data file 0 2 3 4 14 101 106 101 105 141 133 169 143 120 114 98 90 995
41 OSM 0 0 1 2 14 95 105 101 130 112 147 143 123 110 110 100 115 885
42 GPX 1 2 4 13 13 97 117 118 112 112 116 152 122 91 100 97 97 879
43 microsoft/BuildingFootprints 0 0 0 0 0 0 0 0 0 0 0 106 88 204 322 288 179 841
44 PNOA Spain 0 0 1 0 1 13 22 19 32 66 96 153 215 229 250 229 212 837
45 wiki 0 0 1 2 3 80 87 80 102 93 109 122 122 137 107 125 119 780
46 Google Maps 0 0 0 0 3 17 9 9 15 43 45 58 96 63 84 184 177 765
47 Validator 0 0 0 2 1 33 14 8 11 10 7 664 38 20 14 9 6 765
48 youtube 0 0 0 0 0 6 13 25 31 39 70 110 146 138 172 213 206 750
49 Japan GSI 0 0 0 0 0 0 3 16 75 142 168 147 149 147 154 180 164 726
50 Bavaria (80 cm) 0 0 0 0 0 0 6 11 25 66 116 214 273 266 263 164 2 701
51 MapBox 0 0 0 0 0 345 253 159 103 47 44 18 13 15 15 12 6 668
52 wikidata 0 0 0 0 0 18 18 41 51 54 80 95 107 141 128 152 150 661
53 PDOK aerial imagery 0 0 0 0 0 0 0 0 33 71 84 137 158 213 204 197 191 658
54 North Rhine-Westphalia aerial photograph 0 0 0 0 0 0 0 0 0 0 0 133 226 251 233 229 212 650
55 White) 0 0 0 0 0 12 15 24 21 23 56 257 261 38 0 0 0 631
56 local survey 0 0 4 4 8 59 66 71 83 91 118 121 125 133 97 97 101 623
57 OSM Data 0 0 0 0 0 4 5 5 8 12 17 161 307 248 121 54 42 603
58 OpenStreetMap (Standard Black 0 0 0 0 0 0 0 8 20 23 56 257 263 37 0 0 0 592
59 MapBox Satellite 0 0 0 0 0 413 176 54 31 16 18 8 5 5 6 3 3 575
60 Wikidata 0 0 0 0 0 16 11 56 42 47 60 76 89 114 100 110 120 558
61 Ortskenntnis 0 0 0 0 21 118 93 91 89 111 108 101 111 80 89 67 67 548
62 extrapolation 3 11 9 15 18 49 41 36 46 82 72 74 71 81 73 109 98 542
63 websites 0 0 3 2 7 47 43 44 54 58 82 104 100 78 86 105 71 525
64 OSM Inspector 0 0 0 2 8 79 55 82 113 78 78 80 89 75 52 61 40 513
65 osm 0 0 0 1 7 63 61 61 69 77 92 77 79 61 63 49 48 512
66 PNOA 1 3 13 9 10 76 66 95 91 96 123 115 97 91 92 78 60 509
67 local 0 0 1 1 11 34 45 44 36 63 73 65 80 80 44 44 49 498
68 google maps 0 0 1 0 1 6 6 6 23 40 39 59 65 31 57 91 82 494
69 web 0 0 1 1 6 56 51 47 50 75 63 81 68 75 77 89 81 494
70 www.openstreetmap.org/changeset/ 0 0 0 0 0 1 20 33 37 64 87 98 121 128 124 133 123 492
71 personal knowledge 0 2 4 2 18 67 58 62 50 60 60 63 71 46 44 61 62 491
72 Streetside 0 0 0 0 0 0 0 0 1 3 8 35 24 21 38 258 240 490
73 Internet 0 0 0 0 2 57 47 50 41 57 54 56 72 76 65 92 83 475
74 BANO 0 0 0 0 0 56 64 67 75 58 61 96 121 141 157 108 72 469
75 vor Ort 0 0 0 0 11 50 51 52 67 71 95 90 97 97 71 67 62 468
76 swisstopo SWISSIMAGE 0 0 0 0 0 0 0 0 0 0 0 0 155 184 186 186 178 468
77 cadastre-dgi-fr 4 27 27 33 41 112 100 82 75 75 63 80 64 64 47 44 33 468
78 National Agriculture Imagery Program 0 0 0 0 0 0 0 0 0 0 21 85 106 98 110 153 114 463
79 observation 0 2 6 3 15 80 60 74 61 47 58 60 65 66 52 41 36 449
80 Norway Orthophoto 0 0 0 0 0 0 0 0 0 40 60 82 95 155 112 115 106 418
81 IGN 0 0 0 0 0 4 7 23 41 48 55 58 78 70 76 87 74 410
82 {switch:a,b,c,d}.tiles.mapbox.com 0 0 0 0 0 0 1 0 37 59 212 92 37 3 0 0 0 409
83 Google 0 3 0 0 1 19 9 24 19 27 18 29 61 36 45 78 55 407
84 Catastro Spain 0 0 0 1 0 7 24 25 28 56 72 90 75 69 82 98 86 407
85 OpenTopoMap 0 0 0 0 0 0 0 0 8 22 35 94 119 109 118 103 79 406
86 Ortho HR 0 0 0 0 0 0 0 0 0 1 54 107 216 195 27 8 5 405
87 internet 0 0 0 2 1 39 33 40 38 47 68 65 51 60 51 73 53 404
88 MAPNIK 0 0 0 0 1 197 155 47 27 5 3 22 10 9 8 7 4 403
89 JOSM 0 1 2 1 3 51 47 60 60 59 60 66 56 49 42 21 19 398
90 Maps4BW 0 0 0 0 24 99 101 102 95 107 98 89 96 82 35 6 4 396
91 1.19) 0 0 0 0 0 0 0 0 0 1 3 124 322 16 5 6 1 393
92 1.79) 0 0 0 0 0 0 0 0 1 1 0 124 319 18 4 8 1 393
93 - 0 0 0 0 8 49 35 53 49 63 54 44 46 49 50 50 40 388
94 www.lgl-bw.de) 0 0 0 0 1 46 66 68 73 59 76 115 130 123 56 3 0 378
95 geoportal 0 0 0 0 3 83 62 69 51 50 66 64 71 49 52 46 44 377
96 MML Orthophoto 0 0 0 0 0 0 0 0 0 17 27 57 64 126 154 128 73 377
97 imagery 0 1 1 2 3 17 15 27 32 51 66 69 54 42 40 55 64 377
98 visit 0 3 0 1 5 41 33 44 41 63 59 45 49 53 55 38 43 375
99 2.39) 0 0 0 0 0 0 0 1 1 1 1 131 288 16 4 6 2 370
100 Maps4BW (LGL-BW 0 0 0 0 0 43 65 62 71 57 71 111 129 124 57 3 0 370