This website contains monthly updated OpenStreetMap statistics. The code is Open Source and can be found on GitHub. If you know SQL, you can also create your own statistics using the preprocessed data. The code for creating the plots can be explored by toggling the code cell.
df = duckdb.sql("""
WITH user_first_appearance AS (
SELECT
user_name,
year,
month,
ROW_NUMBER() OVER (PARTITION BY user_name ORDER BY year, month) as rn
FROM (
SELECT DISTINCT user_name, year, month
FROM '../changeset_data/year=*/month=*/*.parquet'
)
),
first_appearances AS (
SELECT user_name, year, month
FROM user_first_appearance
WHERE rn = 1
),
monthly_metrics AS (
SELECT
year,
month,
CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
COUNT(DISTINCT user_name) as Contributors,
CAST(SUM(edit_count) as BIGINT) as Edits,
CAST(COUNT(*) AS INTEGER) as Changesets
FROM '../changeset_data/year=*/month=*/*.parquet'
GROUP BY year, month
),
monthly_new_contributors AS (
SELECT
year,
month,
COUNT(DISTINCT user_name) as "New Contributors"
FROM first_appearances
GROUP BY year, month
),
combined_metrics AS (
SELECT
m.year,
m.month,
m.months,
m.Contributors,
COALESCE(n."New Contributors", 0) as "New Contributors",
m.Edits,
m.Changesets
FROM monthly_metrics m
LEFT JOIN monthly_new_contributors n ON m.year = n.year AND m.month = n.month
)
SELECT
months,
Contributors,
"New Contributors",
Edits,
Changesets,
SUM("New Contributors") OVER (ORDER BY year, month) as "Accumulated Contributors",
SUM(Edits) OVER (ORDER BY year, month) as "Accumulated Edits",
SUM(Changesets) OVER (ORDER BY year, month) as "Accumulated Changesets"
FROM combined_metrics
ORDER BY year, month
""").df()
util.show_figure(
[
util.FigureConfig(
title="Monthly Contributors",
label="Contributors",
x_col="months",
y_col="Contributors",
query_or_df=df,
),
util.FigureConfig(
title="Monthly New Contributors",
label="New Contributors",
x_col="months",
y_col="New Contributors",
query_or_df=df,
),
util.FigureConfig(
title="Monthly Edits",
label="Edits",
x_col="months",
y_col="Edits",
query_or_df=df,
),
util.FigureConfig(
title="Monthly Changesets",
label="Changesets",
x_col="months",
y_col="Changesets",
query_or_df=df,
),
]
)
util.show_figure(
[
util.FigureConfig(
title="Accumulated Contributors",
label="Accumulated Contributors",
x_col="months",
y_col="Accumulated Contributors",
query_or_df=df,
),
util.FigureConfig(
title="Accumulated Edits",
label="Accumulated Edits",
x_col="months",
y_col="Accumulated Edits",
query_or_df=df,
),
util.FigureConfig(
title="Accumulated Changesets",
label="Accumulated Changesets",
x_col="months",
y_col="Accumulated Changesets",
query_or_df=df,
),
]
)
df = duckdb.sql("""
SELECT
mid_pos_x as x,
mid_pos_y as y,
SUM(edit_count) as z
FROM '../changeset_data/year=*/month=*/*.parquet'
WHERE mid_pos_x IS NOT NULL AND mid_pos_y IS NOT NULL
GROUP BY mid_pos_x, mid_pos_y
""").df()
util.show_figure(
[
util.FigureConfig(
title="Edit Count",
x_col="x",
y_col="y",
z_col="z",
query_or_df=df,
plot_type="map",
),
]
)
This shows the contribution patterns of users grouped by their first edit period (2-year periods), displaying the percentage of total yearly edits from each cohort.
sql_query = """
WITH user_first_edit AS (
-- Find the first edit year for each user
SELECT
user_name,
MIN(year) as first_edit_year
FROM
read_parquet('../changeset_data/year=*/month=*/*.parquet')
GROUP BY
user_name
),
year_user_edits AS (
-- Sum edits by year and user
SELECT
year,
user_name,
SUM(edit_count) as total_edits
FROM
read_parquet('../changeset_data/year=*/month=*/*.parquet')
GROUP BY
year, user_name
),
merged_data AS (
-- Join to get first edit year for each user's yearly edits
SELECT
y.year,
y.user_name,
y.total_edits,
u.first_edit_year,
CASE
-- Create 2-year periods based on odd/even years
WHEN u.first_edit_year % 2 = 1 THEN
CONCAT('First edit in: ', CAST(u.first_edit_year AS VARCHAR), '-', CAST(u.first_edit_year + 1 AS VARCHAR))
ELSE
CONCAT('First edit in: ', CAST(u.first_edit_year - 1 AS VARCHAR), '-', CAST(u.first_edit_year AS VARCHAR))
END as first_edit_period
FROM
year_user_edits y
JOIN
user_first_edit u ON y.user_name = u.user_name
),
grouped_data AS (
SELECT
CAST(year AS VARCHAR) as years,
first_edit_period,
SUM(total_edits) as total_edits
FROM
merged_data
GROUP BY
year, first_edit_period
)
SELECT
years,
first_edit_period,
total_edits,
ROUND(100.0 * total_edits / SUM(total_edits) OVER (PARTITION BY years), 2) as percentage
FROM
grouped_data
ORDER BY
years, first_edit_period
"""
df = duckdb.sql(sql_query).df()
util.show_figure(
[
util.FigureConfig(
title="Contributor Attrition Rate by First Edit Period",
label="Absolute",
x_col="years",
y_col="total_edits",
query_or_df=df,
group_col="first_edit_period",
plot_type="bar",
trace_names=util.get_trace_names(df, "first_edit_period", "years", "total_edits", "unique"),
),
util.FigureConfig(
title="Percentage of Edits by First Edit Period",
label="Percentage",
x_col="years",
y_col="percentage",
query_or_df=df,
group_col="first_edit_period",
plot_type="bar",
),
]
)
This shows the number of contributors who have accumulated more than various edit count thresholds (10, 100, 1000, 10000, 100000 edits) throughout their entire OSM history up to each month.
df = duckdb.sql("""
WITH user_cumulative_edits AS (
SELECT
year,
month,
user_name,
SUM(edit_count) OVER (PARTITION BY user_name ORDER BY year, month) as cumulative_edits
FROM (
SELECT
year,
month,
user_name,
SUM(edit_count) as edit_count
FROM '../changeset_data/year=*/month=*/*.parquet'
GROUP BY year, month, user_name
)
),
monthly_thresholds AS (
SELECT
year,
month,
CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
COUNT(DISTINCT CASE WHEN cumulative_edits > 10 THEN user_name END) as "more then 10 edits",
COUNT(DISTINCT CASE WHEN cumulative_edits > 100 THEN user_name END) as "more then 100 edits",
COUNT(DISTINCT CASE WHEN cumulative_edits > 1000 THEN user_name END) as "more then 1000 edits",
COUNT(DISTINCT CASE WHEN cumulative_edits > 10000 THEN user_name END) as "more then 10000 edits",
COUNT(DISTINCT CASE WHEN cumulative_edits > 100000 THEN user_name END) as "more then 100000 edits"
FROM user_cumulative_edits
GROUP BY year, month
)
SELECT
months,
"more then 10 edits",
"more then 100 edits",
"more then 1000 edits",
"more then 10000 edits",
"more then 100000 edits"
FROM monthly_thresholds
ORDER BY year, month
""").df()
# Reshape for plotting
df_melted = df.melt(id_vars=["months"], var_name="threshold", value_name="contributors")
util.show_figure(
[
util.FigureConfig(
title="Contributors with More than K Edits Total",
label="contributors",
x_col="months",
y_col="contributors",
group_col="threshold",
query_or_df=df_melted,
trace_names=[
"more then 10 edits",
"more then 100 edits",
"more then 1000 edits",
"more then 10000 edits",
"more then 100000 edits",
],
)
]
)
There is a big spike of new contributors in April 2016. This is because a lot of users using MAPS.ME were creating their first edits.
sql_query = """
WITH user_first_edit AS (
-- Find the first edit month for each user and determine if they used MAPS.ME
SELECT
user_name,
MIN(CONCAT(CAST(year AS VARCHAR), '-', LPAD(CAST(month AS VARCHAR), 2, '0'))) as first_edit_month,
-- Check if user ever used MAPS.ME
BOOL_OR(CASE WHEN created_by LIKE '%MAPS.ME%' THEN true ELSE false END) as used_maps_me
FROM
read_parquet('../changeset_data/year=*/month=*/*.parquet')
GROUP BY
user_name
)
SELECT
first_edit_month as Months,
COUNT(*) as 'New Contributors',
CASE
WHEN used_maps_me THEN 'Only MAPS.ME'
ELSE 'Without MAPS.ME'
END as group_column
FROM
user_first_edit
GROUP BY
first_edit_month, used_maps_me
ORDER BY
first_edit_month, group_column
"""
util.show_figure(
[
util.FigureConfig(
title="Monthly New Contributors with and without MAPS.ME",
x_col="Months",
y_col="New Contributors",
query_or_df=sql_query,
group_col="group_column",
)
]
)
This shows the median number of edits made by contributors each year, providing insight into typical contributor activity levels.
df_median_edits_yearly = duckdb.sql("""
WITH yearly_contributor_edits AS (
SELECT
year,
user_name,
CAST(SUM(edit_count) as INTEGER) as user_edits
FROM '../changeset_data/year=*/month=*/*.parquet'
GROUP BY year, user_name
)
SELECT
CAST(year AS VARCHAR) as Years,
CAST(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY user_edits) as INTEGER) as "Median edits per contributor"
FROM yearly_contributor_edits
GROUP BY year
ORDER BY year
""").df()
util.show_figure(
[
util.FigureConfig(
title="Median Number of Edits per Contributor",
x_col="Years",
y_col="Median edits per contributor",
query_or_df=df_median_edits_yearly,
),
],
)
df = duckdb.sql("""
SELECT
year,
mid_pos_x as x,
mid_pos_y as y,
SUM(edit_count) as z
FROM '../changeset_data/year=*/month=*/*.parquet'
WHERE mid_pos_x IS NOT NULL AND mid_pos_y IS NOT NULL
GROUP BY year, mid_pos_x, mid_pos_y
""").df()
configs = []
for year in sorted(df["year"].unique()):
configs.append(
util.FigureConfig(
title=f"Edit Count {year}",
x_col="x",
y_col="y",
z_col="z",
query_or_df=df[df["year"] == year],
plot_type="map",
)
)
util.show_figure(configs, type="dropdown")