Analysis of OpenStreetMap changesets that use hashtags from the hashtags column.
df = duckdb.sql("""
WITH monthly_with_hashtags AS (
SELECT
year,
month,
CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
COUNT(DISTINCT user_name) as contributors_with_hashtags,
SUM(edit_count) as edits_with_hashtags
FROM '../changeset_data/year=*/month=*/*.parquet'
WHERE hashtags IS NOT NULL
GROUP BY year, month
),
monthly_total AS (
SELECT
year,
month,
CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
COUNT(DISTINCT user_name) as total_contributors,
SUM(edit_count) as total_edits
FROM '../changeset_data/year=*/month=*/*.parquet'
GROUP BY year, month
)
SELECT
mt.months,
COALESCE(ROUND((mwh.contributors_with_hashtags * 100.0) / mt.total_contributors, 2), 0) as 'Percentage Contributors with Hashtags',
COALESCE(ROUND((mwh.edits_with_hashtags * 100.0) / mt.total_edits, 2), 0) as 'Percentage Edits with Hashtags'
FROM monthly_total mt
LEFT JOIN monthly_with_hashtags mwh ON mt.year = mwh.year AND mt.month = mwh.month
ORDER BY mt.year, mt.month
""").df()
util.show_figure(
[
util.FigureConfig(
title="Monthly Percentage of Contributors Using Hashtags",
label="Contributors",
x_col="months",
y_col="Percentage Contributors with Hashtags",
y_unit_hover_template="%",
query_or_df=df,
),
util.FigureConfig(
title="Monthly Percentage of Edits Using Hashtags",
label="Edits",
x_col="months",
y_col="Percentage Edits with Hashtags",
y_unit_hover_template="%",
query_or_df=df,
),
]
)
# Get top 10 hashtags by total edits
df = duckdb.sql("""
WITH hashtag_expanded AS (
SELECT
year,
month,
user_name,
edit_count,
unnest(hashtags) as hashtag
FROM '../changeset_data/year=*/month=*/*.parquet'
WHERE hashtags IS NOT NULL
),
top_hashtags AS (
SELECT hashtag
FROM (
SELECT
hashtag,
SUM(edit_count) as total_edits
FROM hashtag_expanded
GROUP BY hashtag
ORDER BY total_edits DESC
LIMIT 10
)
),
monthly_hashtag_data AS (
SELECT
he.year,
he.month,
CONCAT(he.year, '-', LPAD(CAST(he.month as VARCHAR), 2, '0')) as months,
he.hashtag,
COUNT(DISTINCT he.user_name) as "Contributors",
SUM(he.edit_count) as "Edits"
FROM hashtag_expanded he
WHERE he.hashtag IN (SELECT hashtag FROM top_hashtags)
GROUP BY he.year, he.month, he.hashtag
)
SELECT
months,
hashtag,
"Contributors",
"Edits",
SUM("Contributors") OVER (PARTITION BY hashtag ORDER BY year, month) as "Contributors Accumulated",
SUM("Edits") OVER (PARTITION BY hashtag ORDER BY year, month) as "Edits Accumulated"
FROM monthly_hashtag_data
ORDER BY year, month, hashtag
""").df()
util.show_figure(
[
util.FigureConfig(
title="Monthly Edits by Top 10 Hashtags",
label="Edits",
x_col="months",
y_col="Edits",
group_col="hashtag",
query_or_df=df,
),
util.FigureConfig(
title="Accumulated Edits by Top 10 Hashtags",
label="Edits Accumulated",
x_col="months",
y_col="Edits Accumulated",
group_col="hashtag",
query_or_df=df,
),
util.FigureConfig(
title="Monthly Contributors by Top 10 Hashtags",
label="Contributors",
x_col="months",
y_col="Contributors",
group_col="hashtag",
query_or_df=df,
),
util.FigureConfig(
title="Accumulated Contributors by Top 10 Hashtags",
label="Contributors Accumulated",
x_col="months",
y_col="Contributors Accumulated",
group_col="hashtag",
query_or_df=df,
),
]
)
query = """
WITH hashtag_expanded AS (
SELECT
year,
user_name,
edit_count,
unnest(hashtags) as hashtag
FROM '../changeset_data/year=*/month=*/*.parquet'
WHERE hashtags IS NOT NULL
),
user_first_year AS (
SELECT
user_name,
hashtag,
MIN(year) as first_year
FROM hashtag_expanded
GROUP BY user_name, hashtag
),
hashtag_totals AS (
SELECT
hashtag as "Hashtag",
CAST(SUM(edit_count) as BIGINT) as total_edits_all_time,
CAST(SUM(CASE WHEN year >= 2021 THEN edit_count ELSE 0 END) as BIGINT) as total_edits_2021_now,
CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors_all_time,
CAST(COUNT(DISTINCT CASE WHEN year >= 2021 THEN user_name END) as BIGINT) as total_contributors_2021_now
FROM hashtag_expanded
GROUP BY hashtag
),
yearly_metrics AS (
SELECT
he.year,
he.hashtag as "Hashtag",
CAST(SUM(he.edit_count) as BIGINT) as "Edits",
CAST(COUNT(DISTINCT he.user_name) as BIGINT) as "Contributors",
CAST(COUNT(DISTINCT CASE WHEN ufy.first_year = he.year THEN he.user_name END) as BIGINT) as "New Contributors"
FROM hashtag_expanded he
LEFT JOIN user_first_year ufy
ON he.user_name = ufy.user_name AND he.hashtag = ufy.hashtag
GROUP BY he.year, he.hashtag
)
SELECT
ym.year,
ym."Hashtag",
ym."Edits",
ym."New Contributors",
ym."Contributors",
ht.total_edits_all_time as "Total Edits",
ht.total_edits_2021_now as "Total Edits (2021 - Now)",
ht.total_contributors_all_time as "Total Contributors",
ht.total_contributors_2021_now as "Total Contributors (2021 - Now)"
FROM yearly_metrics ym
JOIN hashtag_totals ht
ON ym."Hashtag" = ht."Hashtag"
ORDER BY year DESC, "Edits" DESC
"""
df = duckdb.sql(query).df()
top_100_contributors = df.groupby("Hashtag")["Total Contributors"].first().nlargest(100)
top_100_contributors_2021_now = df.groupby("Hashtag")["Total Contributors (2021 - Now)"].first().nlargest(100)
top_100_edits = df.groupby("Hashtag")["Total Edits"].first().nlargest(100)
top_100_edits_2021_now = df.groupby("Hashtag")["Total Edits (2021 - Now)"].first().nlargest(100)
table_configs = [
util.TableConfig(
title="Top 100 Hashtags by Contributors",
query_or_df=df[df["Hashtag"].isin(top_100_contributors.index)],
x_axis_col="year",
y_axis_col="Hashtag",
value_col="Contributors",
center_columns=["Rank", "Hashtag"],
sum_col="Total Contributors",
),
util.TableConfig(
title="Top 100 Hashtags by Contributors 2021 - Now",
query_or_df=df[(df["Hashtag"].isin(top_100_contributors_2021_now.index)) & (df["year"] >= 2021)],
x_axis_col="year",
y_axis_col="Hashtag",
value_col="Contributors",
center_columns=["Rank", "Hashtag"],
sum_col="Total Contributors (2021 - Now)",
),
util.TableConfig(
title="Top 100 Hashtags by Edits",
query_or_df=df[df["Hashtag"].isin(top_100_edits.index)],
x_axis_col="year",
y_axis_col="Hashtag",
value_col="Edits",
center_columns=["Rank", "Hashtag"],
sum_col="Total Edits",
),
util.TableConfig(
title="Top 100 Hashtags by Edits 2021 - Now",
query_or_df=df[(df["Hashtag"].isin(top_100_edits_2021_now.index)) & (df["year"] >= 2021)],
x_axis_col="year",
y_axis_col="Hashtag",
value_col="Edits",
center_columns=["Rank", "Hashtag"],
sum_col="Total Edits (2021 - Now)",
),
]
util.show_tables(table_configs)
| Rank | Hashtag | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 | 2023 | 2024 | 2025 | Total Contributors |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | #missingmaps | 12,561 | 28,330 | 26,727 | 28,770 | 35,232 | 10,913 | 12,791 | 9,616 | 5,948 | 151,269 |
| 2 | #redcross | 7,849 | 8,209 | 7,332 | 19,818 | 25,801 | 3,043 | 688 | 1,071 | 285 | 67,882 |
| 3 | #msf | 724 | 5,128 | 8,456 | 4,899 | 5,714 | 4,988 | 6,094 | 6,319 | 4,670 | 41,936 |
| 4 | #opencitieslac | 0 | 0 | 0 | 0 | 9,244 | 16,658 | 9,565 | 125 | 36 | 32,844 |
| 5 | #youthmappers | 1,601 | 2,916 | 4,288 | 5,469 | 9,179 | 4,429 | 2,870 | 2,408 | 1,818 | 31,117 |
| 6 | #indonesia | 1 | 1,622 | 2,031 | 14,365 | 14,434 | 371 | 51 | 3 | 3 | 30,599 |
| 7 | #covid19 | 0 | 0 | 0 | 21,990 | 6,691 | 283 | 154 | 72 | 1 | 28,072 |
| 8 | #climatechange | 0 | 0 | 4 | 8,012 | 19,957 | 462 | 0 | 160 | 8 | 27,036 |
| 9 | #accenture | 548 | 1,870 | 2,085 | 4,814 | 5,614 | 1,730 | 2,065 | 1,792 | 1,378 | 20,118 |
| 10 | #bangladesh | 3,191 | 3,011 | 161 | 675 | 7,919 | 3,644 | 186 | 521 | 48 | 18,624 |
| 11 | #jpmc | 141 | 1,018 | 1,256 | 2,815 | 4,305 | 4,828 | 2,222 | 2,532 | 21 | 17,262 |
| 12 | #tanzaniadevelopmenttrust | 1,383 | 4,409 | 3,894 | 3,425 | 1,157 | 1,173 | 1,232 | 429 | 15 | 15,881 |
| 13 | #educategirls | 0 | 0 | 0 | 12,563 | 2,757 | 177 | 147 | 127 | 0 | 15,543 |
| 14 | #amm | 781 | 3,299 | 2,378 | 4,597 | 2,630 | 1,824 | 1,790 | 1,175 | 261 | 15,027 |
| 15 | #nigeria | 1,097 | 3,496 | 2,555 | 2,177 | 2,608 | 878 | 141 | 26 | 31 | 12,369 |
| 16 | #osmph | 3 | 621 | 2,994 | 5,091 | 1,368 | 472 | 487 | 1,392 | 19 | 11,871 |
| 17 | #buildings | 478 | 119 | 107 | 2,966 | 3,795 | 1,999 | 1,059 | 1,686 | 95 | 11,369 |
| 18 | #salesforce | 57 | 3,251 | 1,234 | 3,726 | 1,216 | 665 | 278 | 1,836 | 287 | 11,358 |
| 19 | #aphub | 0 | 0 | 0 | 0 | 16 | 0 | 1,575 | 5,630 | 5,059 | 11,349 |
| 20 | #liga-zambia | 0 | 0 | 0 | 0 | 11,124 | 111 | 134 | 0 | 0 | 11,319 |
| 21 | #mapgive | 3,012 | 1,804 | 708 | 1,178 | 3,488 | 533 | 197 | 447 | 2 | 10,916 |
| 22 | #maproulette | 154 | 473 | 1,426 | 1,779 | 1,564 | 1,676 | 2,769 | 2,997 | 2,637 | 10,425 |
| 23 | #msft | 1,514 | 2,100 | 1,542 | 2,558 | 2,586 | 35 | 131 | 661 | 265 | 10,416 |
| 24 | #myanmar | 3,013 | 4,336 | 2,435 | 418 | 334 | 27 | 22 | 2 | 3 | 10,095 |
| 25 | #covbots | 0 | 0 | 0 | 7,617 | 1,853 | 100 | 71 | 4 | 0 | 9,346 |
| 26 | #dronebird | 1 | 659 | 2,321 | 1,257 | 1,656 | 385 | 523 | 361 | 1,937 | 8,744 |
| 27 | #türkiyeeq060223 | 0 | 0 | 0 | 0 | 0 | 0 | 8,719 | 3 | 2 | 8,721 |
| 28 | #tanzania | 1,383 | 2,295 | 740 | 2,004 | 1,066 | 695 | 595 | 435 | 33 | 8,445 |
| 29 | #upnoah | 36 | 820 | 2,991 | 3,463 | 1,100 | 212 | 0 | 2 | 0 | 8,330 |
| 30 | #osmgeoweek | 170 | 297 | 162 | 460 | 3,370 | 1,873 | 1,321 | 1,215 | 1 | 8,313 |
| 31 | #omguru | 0 | 0 | 0 | 0 | 0 | 0 | 364 | 6,276 | 2,118 | 8,257 |
| 32 | #galschool | 0 | 154 | 1,257 | 6,856 | 7 | 5 | 0 | 0 | 0 | 8,123 |
| 33 | #msftgive | 1,251 | 1,561 | 1,581 | 2,496 | 2,013 | 44 | 39 | 5 | 13 | 8,114 |
| 34 | #hotmicrogrants2020 | 0 | 0 | 0 | 7,676 | 148 | 122 | 77 | 84 | 0 | 8,057 |
| 35 | #mapph | 7 | 595 | 2,884 | 3,425 | 969 | 191 | 0 | 3 | 0 | 7,813 |
| 36 | #foodsecurityph | 0 | 595 | 2,879 | 3,424 | 965 | 192 | 0 | 2 | 0 | 7,797 |
| 37 | #furuhashilab | 0 | 31 | 1,584 | 1,269 | 1,695 | 494 | 544 | 364 | 2,022 | 7,699 |
| 38 | #yercizenler | 0 | 554 | 415 | 946 | 247 | 51 | 5,379 | 202 | 59 | 7,498 |
| 39 | #rodekruis | 2,278 | 1,787 | 1,435 | 1,050 | 633 | 224 | 75 | 439 | 64 | 7,468 |
| 40 | #missingmaps_drk | 0 | 0 | 254 | 1,343 | 3,154 | 1,966 | 397 | 455 | 217 | 7,373 |
| 41 | #awsmapathon | 0 | 159 | 38 | 675 | 675 | 1,500 | 3,338 | 1,285 | 44 | 7,092 |
| 42 | #maplibya | 0 | 0 | 0 | 41 | 2,937 | 1,817 | 816 | 1,865 | 1 | 7,002 |
| 43 | #uniquemappersnetwork | 0 | 0 | 262 | 560 | 3,404 | 1,801 | 653 | 306 | 580 | 6,997 |
| 44 | #crisismappersjapan | 52 | 20 | 2,049 | 1,167 | 1,201 | 258 | 329 | 329 | 1,778 | 6,992 |
| 45 | #lnsdi | 0 | 0 | 0 | 0 | 2,927 | 1,816 | 816 | 1,865 | 1 | 6,957 |
| 46 | #uniquemappersteam | 20 | 232 | 29 | 465 | 3,381 | 1,862 | 616 | 304 | 578 | 6,938 |
| 47 | #mapimpacto | 0 | 0 | 0 | 6,667 | 45 | 5 | 0 | 0 | 0 | 6,713 |
| 48 | #ourimpact | 0 | 0 | 0 | 1,389 | 891 | 1,325 | 1,835 | 2,073 | 292 | 6,620 |
| 49 | #tt_event | 0 | 0 | 0 | 0 | 0 | 0 | 26 | 2,934 | 4,116 | 6,570 |
| 50 | #msgivesback | 0 | 0 | 0 | 1,106 | 2,133 | 2,275 | 810 | 944 | 10 | 6,384 |
| 51 | #maptripoli | 0 | 0 | 0 | 0 | 2,881 | 1,719 | 702 | 1,507 | 1 | 6,364 |
| 52 | #nigerstate | 1,094 | 2,171 | 907 | 1,696 | 347 | 99 | 1 | 0 | 0 | 6,076 |
| 53 | #lndti | 0 | 0 | 0 | 0 | 1,940 | 1,816 | 815 | 1,865 | 1 | 6,013 |
| 54 | #cartong | 137 | 1,565 | 786 | 1,032 | 1,434 | 676 | 386 | 267 | 206 | 5,978 |
| 55 | #covjam | 0 | 0 | 0 | 5,670 | 137 | 0 | 0 | 0 | 0 | 5,787 |
| 56 | #hurricanemaria | 5,763 | 4 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 5,764 |
| 57 | #kenya | 174 | 869 | 3,805 | 676 | 220 | 172 | 7 | 24 | 6 | 5,743 |
| 58 | #osm_in | 0 | 2,820 | 2,592 | 458 | 5 | 1 | 0 | 0 | 0 | 5,648 |
| 59 | #puertorico | 5,636 | 2 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 5,641 |
| 60 | #osmnigeria | 0 | 29 | 281 | 557 | 3,318 | 1,627 | 227 | 1 | 0 | 5,610 |
| 61 | #unmappers | 0 | 0 | 324 | 471 | 1,939 | 1,335 | 778 | 343 | 879 | 5,592 |
| 62 | #water | 11 | 11 | 14 | 15 | 2,018 | 1,832 | 897 | 1,186 | 15 | 5,576 |
| 63 | #disasterresponse | 0 | 0 | 1 | 0 | 309 | 776 | 583 | 3,186 | 1,140 | 5,517 |
| 64 | #moroccoearthquake2023 | 0 | 0 | 0 | 0 | 0 | 0 | 4,727 | 1,027 | 4 | 5,510 |
| 65 | #infrastructure | 0 | 4 | 0 | 2 | 1,943 | 1,814 | 872 | 1,171 | 3 | 5,404 |
| 66 | #vegetation | 0 | 0 | 0 | 0 | 1,936 | 1,813 | 878 | 1,171 | 2 | 5,397 |
| 67 | #landuselandcover | 0 | 0 | 0 | 0 | 1,937 | 1,813 | 871 | 1,170 | 1 | 5,390 |
| 68 | #streetsroads | 0 | 0 | 0 | 0 | 1,936 | 1,812 | 872 | 1,169 | 1 | 5,388 |
| 69 | #soil | 0 | 0 | 0 | 0 | 1,935 | 1,810 | 872 | 1,169 | 1 | 5,386 |
| 70 | #air | 0 | 1 | 1 | 1 | 1,930 | 1,801 | 870 | 1,169 | 4 | 5,375 |
| 71 | #osm | 39 | 63 | 97 | 456 | 2,958 | 541 | 209 | 1,087 | 71 | 5,301 |
| 72 | #omhap | 0 | 0 | 0 | 0 | 0 | 628 | 4,526 | 37 | 12 | 5,092 |
| 73 | #uniquemappersteam2021 | 0 | 0 | 0 | 0 | 3,404 | 1,616 | 226 | 1 | 0 | 4,923 |
| 74 | #hotph | 0 | 0 | 1,758 | 2,894 | 388 | 12 | 9 | 31 | 0 | 4,875 |
| 75 | #turkey | 0 | 1 | 9 | 5 | 3 | 4 | 4,810 | 13 | 1 | 4,828 |
| 76 | #srilanka | 3 | 2,474 | 5 | 12 | 19 | 46 | 2,308 | 1 | 3 | 4,828 |
| 77 | #turkiye | 0 | 0 | 0 | 0 | 1 | 0 | 4,813 | 7 | 0 | 4,817 |
| 78 | #withrefugees | 628 | 3,803 | 413 | 24 | 15 | 3 | 0 | 0 | 0 | 4,781 |
| 79 | #osmindia | 0 | 0 | 0 | 0 | 1 | 7 | 449 | 3,872 | 635 | 4,750 |
| 80 | #syria | 0 | 1 | 1 | 1 | 1 | 0 | 4,532 | 164 | 63 | 4,738 |
| 81 | #philaware | 0 | 0 | 1,758 | 2,852 | 274 | 11 | 8 | 31 | 0 | 4,729 |
| 82 | #colombia | 235 | 2,602 | 736 | 602 | 652 | 12 | 1 | 7 | 5 | 4,728 |
| 83 | #victornsunday | 0 | 0 | 260 | 556 | 2,623 | 1,346 | 225 | 1 | 0 | 4,725 |
| 84 | #bloomberg | 1 | 0 | 344 | 1,866 | 1,185 | 501 | 664 | 670 | 82 | 4,620 |
| 85 | #madagascar | 147 | 305 | 15 | 5 | 2,000 | 2,323 | 6 | 3 | 1 | 4,619 |
| 86 | #chad | 0 | 279 | 2,183 | 522 | 188 | 1,408 | 215 | 1 | 0 | 4,592 |
| 87 | #drc | 2 | 356 | 1,427 | 792 | 179 | 79 | 369 | 1,391 | 169 | 4,529 |
| 88 | #rccolombia | 232 | 2,598 | 523 | 565 | 650 | 12 | 0 | 0 | 0 | 4,480 |
| 89 | #nigeriafloods2021 | 0 | 0 | 0 | 0 | 2,491 | 2,043 | 175 | 8 | 0 | 4,467 |
| 90 | #osmafrica | 0 | 0 | 1 | 0 | 4 | 2,796 | 894 | 846 | 120 | 4,403 |
| 91 | #mapuganda | 230 | 2,465 | 22 | 579 | 375 | 599 | 152 | 97 | 64 | 4,390 |
| 92 | #womenconnect | 0 | 0 | 3,157 | 1,275 | 6 | 3 | 0 | 0 | 0 | 4,294 |
| 93 | #osmnepal | 7 | 26 | 25 | 390 | 396 | 1,187 | 1,437 | 1,260 | 72 | 4,281 |
| 94 | #ngdisasterriskreduction | 0 | 0 | 262 | 527 | 2,510 | 1,004 | 208 | 1 | 0 | 4,273 |
| 95 | #youthmappersagu | 0 | 0 | 0 | 30 | 1,419 | 433 | 420 | 137 | 1,920 | 4,256 |
| 96 | #gomicron21twsg | 0 | 0 | 0 | 0 | 4,248 | 0 | 0 | 0 | 0 | 4,248 |
| 97 | #cycloneidai | 0 | 0 | 4,207 | 9 | 3 | 1 | 1 | 0 | 0 | 4,212 |
| 98 | #mapforpreparedness | 307 | 104 | 1,298 | 1,801 | 821 | 3 | 0 | 0 | 0 | 4,211 |
| 99 | #osmbd | 117 | 82 | 26 | 6 | 48 | 3,463 | 270 | 259 | 47 | 4,099 |
| 100 | #mozambique | 5 | 18 | 2,920 | 218 | 87 | 871 | 4 | 6 | 0 | 4,064 |
# Create geographical maps for top 10 hashtags
df = duckdb.sql("""
WITH hashtag_expanded AS (
SELECT
mid_pos_x,
mid_pos_y,
edit_count,
unnest(hashtags) as hashtag
FROM '../changeset_data/year=*/month=*/*.parquet'
WHERE hashtags IS NOT NULL
AND mid_pos_x IS NOT NULL
AND mid_pos_y IS NOT NULL
),
top_hashtags AS (
SELECT hashtag
FROM (
SELECT
hashtag,
SUM(edit_count) as total_edits
FROM hashtag_expanded
GROUP BY hashtag
ORDER BY total_edits DESC
LIMIT 10
)
),
hashtag_geo_data AS (
SELECT
he.hashtag,
he.mid_pos_x as x,
he.mid_pos_y as y,
SUM(he.edit_count) as z
FROM hashtag_expanded he
WHERE he.hashtag IN (SELECT hashtag FROM top_hashtags)
GROUP BY he.hashtag, he.mid_pos_x, he.mid_pos_y
)
SELECT * FROM hashtag_geo_data
ORDER BY hashtag, x, y
""").df()
# Create dropdown maps for each hashtag
configs = []
for hashtag in sorted(df["hashtag"].unique()):
configs.append(
util.FigureConfig(
title=f"Edit Distribution - {hashtag}".replace("#", ""),
x_col="x",
y_col="y",
z_col="z",
query_or_df=df[df["hashtag"] == hashtag],
plot_type="map",
)
)
util.show_figure(configs, type="dropdown")