Source Tag Analysis

Analysis of OpenStreetMap changesets that use source tags to indicate data sources.

Monthly Percentage of Edits and Contributors Using Source Tags

code
df = duckdb.sql("""
WITH monthly_with_source AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as contributors_with_source,
        SUM(edit_count) as edits_with_source
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE source IS NOT NULL
    GROUP BY year, month
),
monthly_total AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as total_contributors,
        SUM(edit_count) as total_edits
    FROM '../changeset_data/year=*/month=*/*.parquet'
    GROUP BY year, month
)
SELECT 
    mt.months,
    COALESCE(ROUND((mws.contributors_with_source * 100.0) / mt.total_contributors, 2), 0) as 'Percentage Contributors with Source',
    COALESCE(ROUND((mws.edits_with_source * 100.0) / mt.total_edits, 2), 0) as 'Percentage Edits with Source'
FROM monthly_total mt
LEFT JOIN monthly_with_source mws ON mt.year = mws.year AND mt.month = mws.month
ORDER BY mt.year, mt.month
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Percentage of Contributors Using Source Tags",
            label="Contributors",
            x_col="months",
            y_col="Percentage Contributors with Source",
            y_unit_hover_template="%",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Percentage of Edits Using Source Tags",
            label="Edits",
            x_col="months",
            y_col="Percentage Edits with Source",
            y_unit_hover_template="%",
            query_or_df=df,
        ),
    ]
)

Monthly Top 10 Sources: Edits, Accumulated Edits, Contributors and Accumulated Contributors

code
# Get top 10 sources by total edits
df = duckdb.sql("""
WITH source_expanded AS (
    SELECT 
        year,
        month,
        user_name,
        edit_count,
        unnest(source) as source_tag
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE source IS NOT NULL
),
top_sources AS (
    SELECT source_tag
    FROM (
        SELECT
            source_tag,
            SUM(edit_count) as total_edits
        FROM source_expanded
        GROUP BY source_tag
        ORDER BY total_edits DESC
        LIMIT 10
    )
),
monthly_source_data AS (
    SELECT 
        se.year,
        se.month,
        CONCAT(se.year, '-', LPAD(CAST(se.month as VARCHAR), 2, '0')) as months,
        se.source_tag,
        COUNT(DISTINCT se.user_name) as "Contributors",
        SUM(se.edit_count) as "Edits"
    FROM source_expanded se
    WHERE se.source_tag IN (SELECT source_tag FROM top_sources)
    GROUP BY se.year, se.month, se.source_tag
)
SELECT 
    months,
    source_tag,
    "Contributors",
    "Edits",
    SUM("Contributors") OVER (PARTITION BY source_tag ORDER BY year, month) as "Contributors Accumulated",
    SUM("Edits") OVER (PARTITION BY source_tag ORDER BY year, month) as "Edits Accumulated"
FROM monthly_source_data
ORDER BY year, month, source_tag
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Edits by Top 10 Source Tags",
            label="Edits",
            x_col="months",
            y_col="Edits",
            group_col="source_tag",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Accumulated Edits by Top 10 Source Tags",
            label="Edits Accumulated",
            x_col="months",
            y_col="Edits Accumulated",
            group_col="source_tag",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Contributors by Top 10 Source Tags",
            label="Contributors",
            x_col="months",
            y_col="Contributors",
            group_col="source_tag",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Accumulated Contributors by Top 10 Source Tags",
            label="Contributors Accumulated",
            x_col="months",
            y_col="Contributors Accumulated",
            group_col="source_tag",
            query_or_df=df,
        ),
    ]
)

Top 100 Sources Yearly

code
import json

# Load replacement rules for clickable links
with open("../config/replace_rules_imagery_and_source.json") as f:
    source_name_to_html_link = {
        name: f'<a href="{item["link"]}">{name}</a>' for name, item in json.load(f).items() if "link" in item
    }

query = """
WITH source_expanded AS (
    SELECT 
        year,
        user_name,
        edit_count,
        unnest(source) as source_tag
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE source IS NOT NULL
),
user_first_year AS (
    SELECT 
        user_name,
        source_tag,
        MIN(year) as first_year
    FROM source_expanded
    GROUP BY user_name, source_tag
),
source_totals AS (
    SELECT
        source_tag as "Source",
        CAST(SUM(edit_count) as BIGINT) as total_edits_all_time,
        CAST(SUM(CASE WHEN year >= 2021 THEN edit_count ELSE 0 END) as BIGINT) as total_edits_2021_now,
        CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors_all_time,
        CAST(COUNT(DISTINCT CASE WHEN year >= 2021 THEN user_name END) as BIGINT) as total_contributors_2021_now
    FROM source_expanded
    GROUP BY source_tag
),
yearly_metrics AS (
    SELECT
        se.year,
        se.source_tag as "Source",
        CAST(SUM(se.edit_count) as BIGINT) as "Edits",
        CAST(COUNT(DISTINCT se.user_name) as BIGINT) as "Contributors",
        CAST(COUNT(DISTINCT CASE WHEN ufy.first_year = se.year THEN se.user_name END) as BIGINT) as "New Contributors"
    FROM source_expanded se
    LEFT JOIN user_first_year ufy 
        ON se.user_name = ufy.user_name AND se.source_tag = ufy.source_tag
    GROUP BY se.year, se.source_tag
)
SELECT 
    ym.year,
    ym."Source",
    ym."Edits",
    ym."New Contributors",
    ym."Contributors",
    st.total_edits_all_time as "Total Edits",
    st.total_edits_2021_now as "Total Edits (2021 - Now)",
    st.total_contributors_all_time as "Total Contributors",
    st.total_contributors_2021_now as "Total Contributors (2021 - Now)"
FROM yearly_metrics ym
JOIN source_totals st
    ON ym."Source" = st."Source"
ORDER BY year DESC, "Edits" DESC
"""
df = duckdb.sql(query).df()

# Apply HTML links to source names
df["Source"] = df["Source"].apply(
    lambda name: source_name_to_html_link[name] if name in source_name_to_html_link else name
)

top_100_contributors = df.groupby("Source")["Total Contributors"].first().nlargest(100)
top_100_contributors_2021_now = df.groupby("Source")["Total Contributors (2021 - Now)"].first().nlargest(100)
top_100_edits = df.groupby("Source")["Total Edits"].first().nlargest(100)
top_100_edits_2021_now = df.groupby("Source")["Total Edits (2021 - Now)"].first().nlargest(100)

table_configs = [
    util.TableConfig(
        title="Top 100 Sources by Contributors",
        query_or_df=df[df["Source"].isin(top_100_contributors.index)],
        x_axis_col="year",
        y_axis_col="Source",
        value_col="Contributors",
        center_columns=["Rank", "Source"],
        sum_col="Total Contributors",
    ),
    util.TableConfig(
        title="Top 100 Sources by Contributors 2021 - Now",
        query_or_df=df[(df["Source"].isin(top_100_contributors_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Source",
        value_col="Contributors",
        center_columns=["Rank", "Source"],
        sum_col="Total Contributors (2021 - Now)",
    ),
    util.TableConfig(
        title="Top 100 Sources by Edits",
        query_or_df=df[df["Source"].isin(top_100_edits.index)],
        x_axis_col="year",
        y_axis_col="Source",
        value_col="Edits",
        center_columns=["Rank", "Source"],
        sum_col="Total Edits",
    ),
    util.TableConfig(
        title="Top 100 Sources by Edits 2021 - Now",
        query_or_df=df[(df["Source"].isin(top_100_edits_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Source",
        value_col="Edits",
        center_columns=["Rank", "Source"],
        sum_col="Total Edits (2021 - Now)",
    ),
]

util.show_tables(table_configs)
Rank Source 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 Total Contributors
1 Local Knowledge 15 39 96 124 299 1,112 1,185 1,155 6,004 13,364 14,866 16,846 16,774 16,676 24,925 73,065 84,509 19,129 215,544
2 Survey 50 151 266 332 936 6,016 6,994 6,815 16,397 17,965 18,067 21,616 31,851 35,699 43,215 64,838 74,153 21,858 194,191
3 Bing Aerial Imagery 0 250 543 628 1,521 7,987 9,704 9,369 11,150 13,935 14,177 14,847 14,617 14,549 17,520 33,157 35,436 9,909 121,748
4 GPS 24 73 142 158 379 2,086 1,968 1,784 2,756 4,071 4,474 5,516 5,437 4,636 5,351 11,780 13,958 2,783 47,040
5 Streetlevel Imagery 0 0 0 0 0 0 0 0 552 1,525 3,804 5,611 5,473 4,915 6,128 12,539 15,216 3,795 45,479
6 Knowledge 5 36 63 95 150 5,231 5,692 5,528 4,952 4,737 4,846 4,643 4,101 4,110 3,955 3,988 4,034 1,076 29,824
7 Esri World Imagery 0 0 0 0 0 0 8 3 884 2,528 3,875 5,311 5,061 4,733 5,468 5,750 6,061 2,115 20,635
8 Maxar Imagery 0 0 0 0 0 0 0 0 0 0 4,099 5,927 6,691 6,777 4,527 886 528 51 16,723
9 Mapillary 0 0 0 0 0 76 294 452 733 832 2,899 4,476 4,421 3,594 3,428 3,404 3,752 1,367 16,555
10 Mapbox Satellite 0 0 0 0 5 589 1,309 1,513 1,669 1,651 1,943 2,692 2,740 2,399 2,376 2,231 2,602 1,107 13,658
11 OSM Notes 0 0 0 0 11 41 28 41 38 42 35 32 32 1,450 2,055 4,810 5,804 1,298 13,229
12 mapwithai 0 0 0 0 0 0 0 0 0 0 17 220 1,310 1,773 6,674 2,268 2,091 498 11,285
13 streetside 0 0 0 0 0 0 0 0 0 2 1,450 2,261 2,121 1,425 1,646 1,639 2,015 692 8,807
14 DigitalGlobe Imagery (now Maxar) 0 4 2 11 61 219 189 164 2,250 3,276 3,193 827 604 30 33 41 19 3 7,019
15 tms[]:https:// 0 0 0 0 0 36 635 1,236 2,079 1,365 1,224 1,408 979 574 271 170 65 53 6,795
16 OSM Note 0 1 1 3 44 307 318 342 368 438 473 1,394 1,755 1,799 1,753 1,586 1,585 573 5,502
17 OpenStreetMap Carto (Standard) 0 0 0 0 0 0 0 14 129 184 331 952 1,151 1,336 1,408 1,279 1,281 329 5,316
18 OpenStreetCam Images 0 0 0 0 0 0 0 7 101 135 1,470 2,239 2,011 301 9 1 3 0 4,574
19 Strava 0 0 0 0 0 78 198 298 335 342 311 957 1,230 945 786 815 747 252 3,557
20 JOSM Validator 1 2 1 1 3 17 20 47 46 45 47 3,175 417 166 86 43 23 8 3,500
21 website 0 2 6 8 28 190 228 242 312 468 534 596 619 648 600 728 861 237 3,336
22 kartaview 0 0 0 0 0 0 0 0 0 0 0 1 50 911 1,022 939 1,030 258 3,034
23 Osmose 0 0 0 2 9 230 310 347 381 376 484 602 546 464 489 471 477 150 2,672
24 wiki.openstreetmap.org 2 3 5 6 22 135 124 135 153 212 280 307 358 371 315 398 452 113 2,073
25 Wikipedia 0 4 6 16 34 215 177 211 252 207 230 248 244 263 216 252 281 68 1,906
26 BDOrtho 0 0 0 0 0 0 0 127 253 300 332 521 534 584 541 534 498 177 1,837
27 wikipedia 3 5 14 16 34 233 199 199 212 209 227 250 225 213 178 226 206 41 1,674
28 Website 0 0 0 1 4 53 42 64 98 102 121 153 189 255 290 463 454 135 1,613
29 mapilio 0 0 0 0 0 0 0 0 0 0 0 0 0 0 308 694 836 210 1,603
30 Geotagged Images 0 0 0 0 2 33 20 33 37 45 58 317 425 476 487 468 463 127 1,577
31 Cadastre 0 1 11 13 32 183 198 205 227 216 243 319 333 377 374 393 354 101 1,544
32 panoramax 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 413 1,147 356 1,508
33 aerial_imagery 0 0 0 0 1 0 0 1 1,221 8 11 28 20 13 13 25 65 14 1,351
34 Geoportal 2: Orthophotomap 0 0 0 0 1 96 166 144 176 144 189 288 365 359 368 354 348 167 1,248
35 KartaView 0 0 0 0 0 0 0 0 0 0 0 1 532 514 352 270 280 101 1,224
36 cadastre 0 3 9 13 37 212 189 199 213 222 230 246 227 207 192 192 191 52 1,182
37 basemap.at 0 0 0 0 26 121 149 192 163 164 151 208 246 362 296 255 215 83 1,141
38 gpx 1 7 8 15 16 131 157 168 169 153 164 188 152 139 129 148 141 36 1,120
39 osmose 0 0 1 2 11 98 139 115 124 132 177 196 179 173 159 140 136 33 1,051
40 .gpx data file 0 2 3 4 14 101 106 101 105 141 133 169 143 120 114 98 101 20 1,012
41 OSM 0 0 1 2 14 95 105 101 130 112 147 143 123 110 110 100 123 29 895
42 GPX 1 2 4 13 13 97 117 118 112 112 116 152 122 91 100 97 106 19 892
43 microsoft/BuildingFootprints 0 0 0 0 0 0 0 0 0 0 0 106 88 204 322 288 208 74 885
44 PNOA Spain 0 0 1 0 1 13 22 19 32 66 96 153 215 229 250 229 234 95 874
45 youtube 0 0 0 0 0 6 13 25 31 39 70 110 146 138 172 213 238 65 801
46 wiki 0 0 1 2 3 80 87 80 102 93 109 122 122 137 107 125 140 25 796
47 Google Maps 0 0 0 0 3 17 9 9 15 43 45 58 96 63 84 184 182 14 779
48 Validator 0 0 0 2 1 33 14 8 11 10 7 664 38 20 14 9 6 4 766
49 Japan GSI 0 0 0 0 0 0 3 16 75 142 168 147 149 147 154 180 183 89 757
50 Bavaria (80 cm) 0 0 0 0 0 0 6 11 25 66 116 214 273 266 263 164 2 1 701
51 wikidata 0 0 0 0 0 18 18 41 51 54 80 95 107 141 128 152 169 37 682
52 PDOK aerial imagery 0 0 0 0 0 0 0 0 33 71 84 137 158 213 204 197 206 68 679
53 MapBox 0 0 0 0 0 345 253 159 103 47 44 18 13 15 15 12 9 5 673
54 North Rhine-Westphalia aerial photograph 0 0 0 0 0 0 0 0 0 0 0 133 226 251 233 229 232 82 671
55 local survey 0 0 4 4 8 59 66 71 83 91 118 121 125 133 97 97 107 34 639
56 White) 0 0 0 0 0 12 15 24 21 23 56 257 261 38 0 0 0 0 631
57 OSM Data 0 0 0 0 0 4 5 5 8 12 17 161 307 248 121 54 43 15 605
58 Wikidata 0 0 0 0 0 16 11 56 42 47 60 76 89 114 100 110 145 49 592
59 OpenStreetMap (Standard Black 0 0 0 0 0 0 0 8 20 23 56 257 263 37 0 0 0 0 592
60 Streetside 0 0 0 0 0 0 0 0 1 3 8 35 24 21 38 258 270 125 576
61 MapBox Satellite 0 0 0 0 0 413 176 54 31 16 18 8 5 5 6 3 3 0 575
62 extrapolation 3 11 9 15 18 49 41 36 46 82 72 74 71 81 73 109 109 31 555
63 Ortskenntnis 0 0 0 0 21 118 93 91 89 111 108 101 111 80 89 67 72 21 554
64 websites 0 0 3 2 7 47 43 44 54 58 82 104 100 78 86 105 79 25 539
65 osm 0 0 0 1 7 63 61 61 69 77 92 77 79 61 63 49 55 20 520
66 OSM Inspector 0 0 0 2 8 79 55 82 113 78 78 80 89 75 52 61 46 23 519
67 www.openstreetmap.org/changeset/ 0 0 0 0 0 1 20 33 37 64 87 98 121 128 124 133 140 44 518
68 PNOA 1 3 13 9 10 76 66 95 91 96 123 115 97 91 92 78 67 29 518
69 local 0 0 1 1 11 34 45 44 36 63 73 65 80 80 44 44 60 11 510
70 google maps 0 0 1 0 1 6 6 6 23 40 39 59 65 31 57 91 88 7 506
71 web 0 0 1 1 6 56 51 47 50 75 63 81 68 75 77 89 93 21 505
72 personal knowledge 0 2 4 2 18 67 58 62 50 60 60 63 71 46 44 61 68 11 500
73 Internet 0 0 0 0 2 57 47 50 41 57 54 56 72 76 65 92 96 23 495
74 swisstopo SWISSIMAGE 0 0 0 0 0 0 0 0 0 0 0 0 155 184 186 186 194 69 486
75 vor Ort 0 0 0 0 11 50 51 52 67 71 95 90 97 97 71 67 71 28 478
76 BANO 0 0 0 0 0 56 64 67 75 58 61 96 121 141 157 108 80 21 476
77 National Agriculture Imagery Program 0 0 0 0 0 0 0 0 0 0 21 85 106 98 110 153 122 18 470
78 cadastre-dgi-fr 4 27 27 33 41 112 100 82 75 75 63 80 64 64 47 44 34 12 469
79 observation 0 2 6 3 15 80 60 74 61 47 58 60 65 66 52 41 42 11 456
80 Norway Orthophoto 0 0 0 0 0 0 0 0 0 40 60 82 95 155 112 115 112 40 427
81 Catastro Spain 0 0 0 1 0 7 24 25 28 56 72 90 75 69 82 98 95 39 423
82 IGN 0 0 0 0 0 4 7 23 41 48 55 58 78 70 76 87 81 20 420
83 internet 0 0 0 2 1 39 33 40 38 47 68 65 51 60 51 73 61 21 419
84 Google 0 3 0 0 1 19 9 24 19 27 18 29 61 36 45 78 60 4 414
85 OpenTopoMap 0 0 0 0 0 0 0 0 8 22 35 94 119 109 118 103 89 24 413
86 {switch:a,b,c,d}.tiles.mapbox.com 0 0 0 0 0 0 1 0 37 59 212 92 37 3 0 0 0 0 409
87 Ortho HR 0 0 0 0 0 0 0 0 0 1 54 107 216 195 27 8 5 1 405
88 MAPNIK 0 0 0 0 1 197 155 47 27 5 3 22 10 9 8 7 5 0 404
89 JOSM 0 1 2 1 3 51 47 60 60 59 60 66 56 49 42 21 23 7 402
90 - 0 0 0 0 8 49 35 53 49 63 54 44 46 49 50 50 46 20 401
91 Maps4BW 0 0 0 0 24 99 101 102 95 107 98 89 96 82 35 6 4 0 396
92 1.19) 0 0 0 0 0 0 0 0 0 1 3 124 322 16 5 6 1 1 394
93 MML Orthophoto 0 0 0 0 0 0 0 0 0 17 27 57 64 126 154 128 81 41 394
94 1.79) 0 0 0 0 0 0 0 0 1 1 0 124 319 18 4 8 1 0 393
95 imagery 0 1 1 2 3 17 15 27 32 51 66 69 54 42 40 55 74 19 391
96 Ortho HR IGN 0 0 0 0 0 0 0 0 0 0 0 0 7 26 226 164 136 38 391
97 geoportal 0 0 0 0 3 83 62 69 51 50 66 64 71 49 52 46 48 20 385
98 Luftbild 0 0 1 1 5 22 30 25 42 49 63 62 56 54 54 63 59 26 384
99 Vor Ort 0 0 0 1 1 24 22 14 28 31 45 42 54 65 100 90 81 21 384
100 visit 0 3 0 1 5 41 33 44 41 63 59 45 49 53 55 38 51 10 380