Hashtags Analysis

Analysis of OpenStreetMap changesets that use hashtags from the hashtags column.

Monthly Percentage of Edits and Contributors Using Hashtags

code
df = duckdb.sql("""
WITH monthly_with_hashtags AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as contributors_with_hashtags,
        SUM(edit_count) as edits_with_hashtags
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE hashtags IS NOT NULL
    GROUP BY year, month
),
monthly_total AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as total_contributors,
        SUM(edit_count) as total_edits
    FROM '../changeset_data/year=*/month=*/*.parquet'
    GROUP BY year, month
)
SELECT 
    mt.months,
    COALESCE(ROUND((mwh.contributors_with_hashtags * 100.0) / mt.total_contributors, 2), 0) as 'Percentage Contributors with Hashtags',
    COALESCE(ROUND((mwh.edits_with_hashtags * 100.0) / mt.total_edits, 2), 0) as 'Percentage Edits with Hashtags'
FROM monthly_total mt
LEFT JOIN monthly_with_hashtags mwh ON mt.year = mwh.year AND mt.month = mwh.month
ORDER BY mt.year, mt.month
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Percentage of Contributors Using Hashtags",
            label="Contributors",
            x_col="months",
            y_col="Percentage Contributors with Hashtags",
            y_unit_hover_template="%",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Percentage of Edits Using Hashtags",
            label="Edits",
            x_col="months",
            y_col="Percentage Edits with Hashtags",
            y_unit_hover_template="%",
            query_or_df=df,
        ),
    ]
)

Monthly Top 10 Hashtags: Edits, Accumulated Edits, Contributors and Accumulated Contributors

code
# Get top 10 hashtags by total edits
df = duckdb.sql("""
WITH hashtag_expanded AS (
    SELECT 
        year,
        month,
        user_name,
        edit_count,
        unnest(hashtags) as hashtag
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE hashtags IS NOT NULL
),
top_hashtags AS (
    SELECT hashtag
    FROM (
        SELECT
            hashtag,
            SUM(edit_count) as total_edits
        FROM hashtag_expanded
        GROUP BY hashtag
        ORDER BY total_edits DESC
        LIMIT 10
    )
),
monthly_hashtag_data AS (
    SELECT 
        he.year,
        he.month,
        CONCAT(he.year, '-', LPAD(CAST(he.month as VARCHAR), 2, '0')) as months,
        he.hashtag,
        COUNT(DISTINCT he.user_name) as "Contributors",
        SUM(he.edit_count) as "Edits"
    FROM hashtag_expanded he
    WHERE he.hashtag IN (SELECT hashtag FROM top_hashtags)
    GROUP BY he.year, he.month, he.hashtag
)
SELECT 
    months,
    hashtag,
    "Contributors",
    "Edits",
    SUM("Contributors") OVER (PARTITION BY hashtag ORDER BY year, month) as "Contributors Accumulated",
    SUM("Edits") OVER (PARTITION BY hashtag ORDER BY year, month) as "Edits Accumulated"
FROM monthly_hashtag_data
ORDER BY year, month, hashtag
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Edits by Top 10 Hashtags",
            label="Edits",
            x_col="months",
            y_col="Edits",
            group_col="hashtag",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Accumulated Edits by Top 10 Hashtags",
            label="Edits Accumulated",
            x_col="months",
            y_col="Edits Accumulated",
            group_col="hashtag",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Contributors by Top 10 Hashtags",
            label="Contributors",
            x_col="months",
            y_col="Contributors",
            group_col="hashtag",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Accumulated Contributors by Top 10 Hashtags",
            label="Contributors Accumulated",
            x_col="months",
            y_col="Contributors Accumulated",
            group_col="hashtag",
            query_or_df=df,
        ),
    ]
)

Top 100 Hashtags Yearly

code
query = """
WITH hashtag_expanded AS (
    SELECT 
        year,
        user_name,
        edit_count,
        unnest(hashtags) as hashtag
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE hashtags IS NOT NULL
),
user_first_year AS (
    SELECT 
        user_name,
        hashtag,
        MIN(year) as first_year
    FROM hashtag_expanded
    GROUP BY user_name, hashtag
),
hashtag_totals AS (
    SELECT
        hashtag as "Hashtag",
        CAST(SUM(edit_count) as BIGINT) as total_edits_all_time,
        CAST(SUM(CASE WHEN year >= 2021 THEN edit_count ELSE 0 END) as BIGINT) as total_edits_2021_now,
        CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors_all_time,
        CAST(COUNT(DISTINCT CASE WHEN year >= 2021 THEN user_name END) as BIGINT) as total_contributors_2021_now
    FROM hashtag_expanded
    GROUP BY hashtag
),
yearly_metrics AS (
    SELECT
        he.year,
        he.hashtag as "Hashtag",
        CAST(SUM(he.edit_count) as BIGINT) as "Edits",
        CAST(COUNT(DISTINCT he.user_name) as BIGINT) as "Contributors",
        CAST(COUNT(DISTINCT CASE WHEN ufy.first_year = he.year THEN he.user_name END) as BIGINT) as "New Contributors"
    FROM hashtag_expanded he
    LEFT JOIN user_first_year ufy 
        ON he.user_name = ufy.user_name AND he.hashtag = ufy.hashtag
    GROUP BY he.year, he.hashtag
)
SELECT 
    ym.year,
    ym."Hashtag",
    ym."Edits",
    ym."New Contributors",
    ym."Contributors",
    ht.total_edits_all_time as "Total Edits",
    ht.total_edits_2021_now as "Total Edits (2021 - Now)",
    ht.total_contributors_all_time as "Total Contributors",
    ht.total_contributors_2021_now as "Total Contributors (2021 - Now)"
FROM yearly_metrics ym
JOIN hashtag_totals ht
    ON ym."Hashtag" = ht."Hashtag"
ORDER BY year DESC, "Edits" DESC
"""
df = duckdb.sql(query).df()

top_100_contributors = df.groupby("Hashtag")["Total Contributors"].first().nlargest(100)
top_100_contributors_2021_now = df.groupby("Hashtag")["Total Contributors (2021 - Now)"].first().nlargest(100)
top_100_edits = df.groupby("Hashtag")["Total Edits"].first().nlargest(100)
top_100_edits_2021_now = df.groupby("Hashtag")["Total Edits (2021 - Now)"].first().nlargest(100)

table_configs = [
    util.TableConfig(
        title="Top 100 Hashtags by Contributors",
        query_or_df=df[df["Hashtag"].isin(top_100_contributors.index)],
        x_axis_col="year",
        y_axis_col="Hashtag",
        value_col="Contributors",
        center_columns=["Rank", "Hashtag"],
        sum_col="Total Contributors",
    ),
    util.TableConfig(
        title="Top 100 Hashtags by Contributors 2021 - Now",
        query_or_df=df[(df["Hashtag"].isin(top_100_contributors_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Hashtag",
        value_col="Contributors",
        center_columns=["Rank", "Hashtag"],
        sum_col="Total Contributors (2021 - Now)",
    ),
    util.TableConfig(
        title="Top 100 Hashtags by Edits",
        query_or_df=df[df["Hashtag"].isin(top_100_edits.index)],
        x_axis_col="year",
        y_axis_col="Hashtag",
        value_col="Edits",
        center_columns=["Rank", "Hashtag"],
        sum_col="Total Edits",
    ),
    util.TableConfig(
        title="Top 100 Hashtags by Edits 2021 - Now",
        query_or_df=df[(df["Hashtag"].isin(top_100_edits_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Hashtag",
        value_col="Edits",
        center_columns=["Rank", "Hashtag"],
        sum_col="Total Edits (2021 - Now)",
    ),
]

util.show_tables(table_configs)
Rank Hashtag 2017 2018 2019 2020 2021 2022 2023 2024 2025 Total Contributors
1 #missingmaps 12,561 28,330 26,727 28,770 35,232 10,913 12,791 9,616 7,179 152,316
2 #redcross 7,849 8,209 7,332 19,818 25,801 3,043 688 1,071 290 67,885
3 #msf 724 5,128 8,456 4,899 5,714 4,988 6,094 6,319 5,824 42,947
4 #opencitieslac 0 0 0 0 9,244 16,658 9,565 125 36 32,844
5 #youthmappers 1,601 2,916 4,288 5,469 9,179 4,429 2,870 2,408 1,968 31,252
6 #indonesia 1 1,622 2,031 14,365 14,434 371 51 3 4 30,600
7 #covid19 0 0 0 21,990 6,691 283 154 72 1 28,072
8 #climatechange 0 0 4 8,012 19,957 462 0 160 9 27,037
9 #accenture 548 1,870 2,085 4,814 5,614 1,730 2,065 1,792 1,431 20,163
10 #bangladesh 3,191 3,011 161 675 7,919 3,644 186 521 50 18,626
11 #jpmc 141 1,018 1,256 2,815 4,305 4,828 2,222 2,532 23 17,264
12 #tanzaniadevelopmenttrust 1,383 4,409 3,894 3,425 1,157 1,173 1,232 429 15 15,881
13 #educategirls 0 0 0 12,563 2,757 177 147 127 0 15,543
14 #amm 781 3,299 2,378 4,597 2,630 1,824 1,790 1,175 267 15,028
15 #nigeria 1,097 3,496 2,555 2,177 2,608 878 141 26 33 12,370
16 #osmph 3 621 2,994 5,091 1,368 472 487 1,392 21 11,873
17 #aphub 0 0 0 0 16 0 1,575 5,630 5,102 11,388
18 #buildings 478 119 107 2,966 3,795 1,999 1,059 1,686 101 11,375
19 #salesforce 57 3,251 1,234 3,726 1,216 665 278 1,836 287 11,358
20 #liga-zambia 0 0 0 0 11,124 111 134 0 0 11,319
21 #mapgive 3,012 1,804 708 1,178 3,488 533 197 447 2 10,916
22 #maproulette 154 473 1,426 1,779 1,564 1,676 2,769 2,997 2,843 10,588
23 #msft 1,514 2,100 1,542 2,558 2,586 35 131 661 265 10,416
24 #myanmar 3,013 4,336 2,435 418 334 27 22 2 3 10,095
25 #covbots 0 0 0 7,617 1,853 100 71 4 0 9,346
26 #dronebird 1 659 2,321 1,257 1,656 385 523 361 2,456 9,256
27 #türkiyeeq060223 0 0 0 0 0 0 8,719 3 2 8,721
28 #tanzania 1,383 2,295 740 2,004 1,066 695 595 435 34 8,446
29 #upnoah 36 820 2,991 3,463 1,100 212 0 2 0 8,330
30 #osmgeoweek 170 297 162 460 3,370 1,873 1,321 1,215 2 8,314
31 #omguru 0 0 0 0 0 0 364 6,276 2,160 8,294
32 #furuhashilab 0 31 1,584 1,269 1,695 494 544 364 2,549 8,220
33 #galschool 0 154 1,257 6,856 7 5 0 0 0 8,123
34 #msftgive 1,251 1,561 1,581 2,496 2,013 44 39 5 13 8,114
35 #hotmicrogrants2020 0 0 0 7,676 148 122 77 84 0 8,057
36 #mapph 7 595 2,884 3,425 969 191 0 3 0 7,813
37 #foodsecurityph 0 595 2,879 3,424 965 192 0 2 0 7,797
38 #crisismappersjapan 52 20 2,049 1,167 1,201 258 329 329 2,298 7,505
39 #yercizenler 0 554 415 946 247 51 5,379 202 60 7,499
40 #rodekruis 2,278 1,787 1,435 1,050 633 224 75 439 67 7,470
41 #missingmaps_drk 0 0 254 1,343 3,154 1,966 397 455 217 7,373
42 #uniquemappersnetwork 0 0 262 560 3,404 1,801 653 306 692 7,108
43 #awsmapathon 0 159 38 675 675 1,500 3,338 1,285 44 7,092
44 #uniquemappersteam 20 232 29 465 3,381 1,862 616 304 690 7,049
45 #maplibya 0 0 0 41 2,937 1,817 816 1,865 1 7,002
46 #lnsdi 0 0 0 0 2,927 1,816 816 1,865 1 6,957
47 #tt_event 0 0 0 0 0 0 26 2,934 4,435 6,882
48 #mapimpacto 0 0 0 6,667 45 5 0 0 0 6,713
49 #ourimpact 0 0 0 1,389 891 1,325 1,835 2,073 293 6,620
50 #msgivesback 0 0 0 1,106 2,133 2,275 810 944 12 6,386
51 #maptripoli 0 0 0 0 2,881 1,719 702 1,507 1 6,364
52 #nigerstate 1,094 2,171 907 1,696 347 99 1 0 0 6,076
53 #lndti 0 0 0 0 1,940 1,816 815 1,865 1 6,013
54 #cartong 137 1,565 786 1,032 1,434 676 386 267 229 5,998
55 #covjam 0 0 0 5,670 137 0 0 0 0 5,787
56 #hurricanemaria 5,763 4 0 1 0 0 0 0 0 5,764
57 #kenya 174 869 3,805 676 220 172 7 24 6 5,743
58 #unmappers 0 0 324 471 1,939 1,335 778 343 996 5,702
59 #osm_in 0 2,820 2,592 458 5 1 0 0 0 5,648
60 #puertorico 5,636 2 0 5 0 0 0 0 0 5,641
61 #osmnigeria 0 29 281 557 3,318 1,627 227 1 0 5,610
62 #water 11 11 14 15 2,018 1,832 897 1,186 16 5,577
63 #disasterresponse 0 0 1 0 309 776 583 3,186 1,187 5,552
64 #moroccoearthquake2023 0 0 0 0 0 0 4,727 1,027 4 5,510
65 #infrastructure 0 4 0 2 1,943 1,814 872 1,171 4 5,404
66 #vegetation 0 0 0 0 1,936 1,813 878 1,171 2 5,397
67 #landuselandcover 0 0 0 0 1,937 1,813 871 1,170 1 5,390
68 #streetsroads 0 0 0 0 1,936 1,812 872 1,169 1 5,388
69 #soil 0 0 0 0 1,935 1,810 872 1,169 1 5,386
70 #air 0 1 1 1 1,930 1,801 870 1,169 5 5,376
71 #osm 39 63 97 456 2,958 541 209 1,087 81 5,311
72 #omhap 0 0 0 0 0 628 4,526 37 12 5,092
73 #uniquemappersteam2021 0 0 0 0 3,404 1,616 226 1 0 4,923
74 #hotph 0 0 1,758 2,894 388 12 9 31 0 4,875
75 #srilanka 3 2,474 5 12 19 46 2,308 1 4 4,829
76 #turkey 0 1 9 5 3 4 4,810 13 1 4,828
77 #turkiye 0 0 0 0 1 0 4,813 7 0 4,817
78 #osmindia 0 0 0 0 1 7 449 3,872 671 4,782
79 #withrefugees 628 3,803 413 24 15 3 0 0 0 4,781
80 #youthmappersagu 0 0 0 30 1,419 433 420 137 2,445 4,780
81 #syria 0 1 1 1 1 0 4,532 164 65 4,740
82 #philaware 0 0 1,758 2,852 274 11 8 31 0 4,729
83 #colombia 235 2,602 736 602 652 12 1 7 5 4,728
84 #victornsunday 0 0 260 556 2,623 1,346 225 1 0 4,725
85 #bloomberg 1 0 344 1,866 1,185 501 664 670 82 4,620
86 #madagascar 147 305 15 5 2,000 2,323 6 3 1 4,619
87 #chad 0 279 2,183 522 188 1,408 215 1 0 4,592
88 #drc 2 356 1,427 792 179 79 369 1,391 169 4,529
89 #rccolombia 232 2,598 523 565 650 12 0 0 0 4,480
90 #nigeriafloods2021 0 0 0 0 2,491 2,043 175 8 0 4,467
91 #osmafrica 0 0 1 0 4 2,796 894 846 124 4,407
92 #mapuganda 230 2,465 22 579 375 599 152 97 65 4,391
93 #womenconnect 0 0 3,157 1,275 6 3 0 0 0 4,294
94 #osmnepal 7 26 25 390 396 1,187 1,437 1,260 73 4,282
95 #ngdisasterriskreduction 0 0 262 527 2,510 1,004 208 1 0 4,273
96 #gomicron21twsg 0 0 0 0 4,248 0 0 0 0 4,248
97 #cycloneidai 0 0 4,207 9 3 1 1 0 0 4,212
98 #mapforpreparedness 307 104 1,298 1,801 821 3 0 0 0 4,211
99 #osmbd 117 82 26 6 48 3,463 270 259 48 4,100
100 #mozambique 5 18 2,920 218 87 871 4 6 0 4,064

Geographical Distribution of Top 10 Hashtags

code
# Create geographical maps for top 10 hashtags
df = duckdb.sql("""
WITH hashtag_expanded AS (
    SELECT 
        mid_pos_x,
        mid_pos_y,
        edit_count,
        unnest(hashtags) as hashtag
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE hashtags IS NOT NULL 
      AND mid_pos_x IS NOT NULL 
      AND mid_pos_y IS NOT NULL
),
top_hashtags AS (
    SELECT hashtag
    FROM (
        SELECT
            hashtag,
            SUM(edit_count) as total_edits
        FROM hashtag_expanded
        GROUP BY hashtag
        ORDER BY total_edits DESC
        LIMIT 10
    )
),
hashtag_geo_data AS (
    SELECT 
        he.hashtag,
        he.mid_pos_x as x,
        he.mid_pos_y as y,
        SUM(he.edit_count) as z
    FROM hashtag_expanded he
    WHERE he.hashtag IN (SELECT hashtag FROM top_hashtags)
    GROUP BY he.hashtag, he.mid_pos_x, he.mid_pos_y
)
SELECT * FROM hashtag_geo_data
ORDER BY hashtag, x, y
""").df()

# Create dropdown maps for each hashtag
configs = []
for hashtag in sorted(df["hashtag"].unique()):
    configs.append(
        util.FigureConfig(
            title=f"Edit Distribution - {hashtag}".replace("#", ""),
            x_col="x",
            y_col="y",
            z_col="z",
            query_or_df=df[df["hashtag"] == hashtag],
            plot_type="map",
        )
    )

util.show_figure(configs, type="dropdown")