feat(maps): Add Italy regions code to the map generator notebook (#27542)

This commit is contained in:
İSKENDER ÜLGEN OĞUL 2024-03-19 18:37:37 +03:00 committed by GitHub
parent 36290ce72f
commit 86aa8bde8b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 151 additions and 231 deletions

View File

@ -40,6 +40,27 @@
"Download datasets (_Admin 0 - Countries_ in [1:10](https://www.naturalearthdata.com/downloads/10m-cultural-vectors/), and _Admin 1 States, Provinces_ in 1:10 and [1:50](https://www.naturalearthdata.com/downloads/50m-cultural-vectors/)) from Natural Earch Data:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Dependencies\n",
"\n",
"import os\n",
"import requests\n",
"import geopandas as gpd\n",
"import matplotlib.pyplot as plt\n",
"import shapely\n",
"import pandas as pd\n",
"import shapely.geometry\n",
"import shapely.ops\n",
"import shapely.affinity\n",
"from shapely.geometry import Polygon, MultiPolygon\n",
"import shutil"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -52,9 +73,6 @@
},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"\n",
"data_dir = os.path.expanduser(\"~/Downloads\")\n",
"if not os.path.exists(data_dir):\n",
" os.mkdir(data_dir)\n",
@ -106,15 +124,14 @@
},
"outputs": [],
"source": [
"import os\n",
"import geopandas as gpd\n",
"import matplotlib.pyplot as plt\n",
"import shapely\n",
"\n",
"# Read Natural Earth data files into GeoDataFrames\n",
"df_admin0_10m = gpd.read_file(f\"{data_dir}/ne_10m_admin_0_countries.zip\")\n",
"df_10m = gpd.read_file(f\"{data_dir}/ne_10m_admin_1_states_provinces.zip\")\n",
"df_50m = gpd.read_file(f\"{data_dir}/ne_50m_admin_1_states_provinces.zip\")\n",
"\n",
"# Convert column names to lowercase\n",
"df_admin0_10m.columns = df_admin0_10m.columns.str.lower()\n",
"\n",
"# Download and load the GeoJSON file for India\n",
"india_geojson_url = \"https://github.com/geohacker/india/raw/bcb920c7d3c686f01d085f7661c9ba89bf9bf65e/state/india_state_kashmir_ladakh.geojson\"\n",
"\n",
@ -126,31 +143,6 @@
" print(\"Please download the file from the URL and try again.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "_PKEg8s1t1LT",
"outputId": "9937eacd-7a05-491f-f356-790ad75a819f"
},
"outputs": [],
"source": [
"df_50m.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "w8OD4nq2t1LT",
"outputId": "b7164440-d6e0-4074-b357-47fd8a8d2884"
},
"outputs": [],
"source": [
"df_admin0_10m.columns = df_admin0_10m.columns.str.lower()\n",
"df_admin0_10m.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -178,18 +170,10 @@
"source": [
"df_50m[df_50m.adm0_a3 == 'USA'].plot(figsize=(20,10))\n",
"plt.show()\n",
"\n",
"india_gdf.plot(figsize=(20, 10))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "VvpqxOjMt1LU"
},
"source": [
"Use 1:50m geometry for some large countries:"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -199,7 +183,9 @@
},
"outputs": [],
"source": [
"df_50m['admin'].unique()"
"# Use 1:50m geometry for some large countries:\n",
"\n",
"print(*df_50m['admin'].unique(), sep='\\n')"
]
},
{
@ -210,8 +196,6 @@
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.concat([df_10m[~df_10m['admin'].isin(df_50m['admin'].unique())], df_50m])"
]
},
@ -273,7 +257,7 @@
" 'indonesia',\n",
" 'iran',\n",
" 'italy',\n",
" 'italy_regions', # this one is custom - see below\n",
" 'italy_regions', # this one is derived from italy - see below\n",
" 'japan',\n",
" 'jordan',\n",
" 'kazakhstan',\n",
@ -345,7 +329,7 @@
"\n",
"# CSV files that exist specifically on the repo, rather than in the dataset\n",
"custom_countries = [\n",
" 'italy_regions',\n",
" \n",
"]\n",
"\n",
"# Make sure all country names are covered:\n",
@ -373,8 +357,6 @@
},
"outputs": [],
"source": [
"import geopandas as gpd\n",
"\n",
"alt_maps = dict()\n",
"\n",
"def get_gdf(country):\n",
@ -393,9 +375,13 @@
" for i, country in enumerate(countries):\n",
" # create subplot axes in a 3x3 grid\n",
" ax = plt.subplot(len(countries) // 5, 6, i + 1) # nrows, ncols, axes position\n",
" get_gdf(country).plot(ax=ax)\n",
" gdf = get_gdf(country)\n",
" if not gdf.empty: # check if GeoDataFrame is not empty\n",
" gdf.plot(ax=ax)\n",
" ax.set_aspect('equal', adjustable='datalim')\n",
" else: # if GeoDataFrame is empty\n",
" ax.text(0.5, 0.5, country, ha='center', va='center') # add country name to the center of the subplot\n",
" ax.set_title(country)\n",
" ax.set_aspect('equal', adjustable='datalim')\n",
"\n",
" plt.tight_layout()\n",
" plt.show()\n",
@ -411,7 +397,7 @@
"source": [
"### Handle countries with flying islands\n",
"\n",
"#### USA"
"- For countries with flying islands, we need to move the islands closer to the mainland."
]
},
{
@ -420,7 +406,7 @@
"id": "Z4y46Zuot1LU"
},
"source": [
"For countries with flying islands, we need to move the islands closer to the mainland."
"#### USA"
]
},
{
@ -513,14 +499,15 @@
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"china = df[df.admin == \"China\"]\n",
"china_copy = pd.concat([china, china_sars], ignore_index=True)\n",
"\n",
"# Combine the 'name_zh' columns\n",
"china_copy[\"name_zh\"] = china_copy[\"name_zh\"].combine_first(china_copy[\"name_zh_y\"])\n",
"\n",
"# Drop the extra 'name_zh_x' and 'name_zh_y' columns, if they exist\n",
"china_copy = china_copy.drop([\"name_zh_x\", \"name_zh_y\"], axis=1)\n",
"\n",
"# Plotting the DataFrame\n",
"china_copy.plot(figsize=(12, 12))"
]
@ -542,7 +529,7 @@
"source": [
"#### Finland\n",
"\n",
"The Åland Islands (ISO country code AX) is an autonomous region of Finland, and carries the ISO-3166 code FI-01."
"- The Åland Islands (ISO country code AX) is an autonomous region of Finland, and carries the ISO-3166 code FI-01."
]
},
{
@ -575,17 +562,19 @@
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"finland = df[df.admin == \"Finland\"]\n",
"\n",
"# Concatenate the 'finland' DataFrame with 'finland_aland' DataFrame\n",
"finland_copy = pd.concat([finland, finland_aland], ignore_index=True)\n",
"\n",
"# Combine 'name_fi' columns. However, since both columns are named 'name_fi', this might be redundant\n",
"# If you have two different columns for 'name_fi' values in each DataFrame, specify them as 'name_fi_x' and 'name_fi_y'\n",
"finland_copy[\"name_fi\"] = finland_copy[\"name_fi\"].combine_first(finland_copy[\"name_fi\"])\n",
"\n",
"# Drop the 'name_fi' column, if that's intended. This will remove the 'name_fi' data entirely.\n",
"# If you meant to drop other columns (like 'name_fi_x' and 'name_fi_y'), update the column names accordingly\n",
"finland_copy = finland_copy.drop([\"name_fi\"], axis=1)\n",
"\n",
"# Plotting the DataFrame\n",
"finland_copy.plot(figsize=(12, 12))\n"
]
@ -598,7 +587,7 @@
"source": [
"#### Norway\n",
"\n",
"Remove NO-X01~ (The uninhabited Bouvet Island) and move Svalbard closer to mainland"
"- Remove NO-X01~ (The uninhabited Bouvet Island) and move Svalbard closer to mainland"
]
},
{
@ -624,6 +613,7 @@
"outputs": [],
"source": [
"norway_copy = norway.copy()\n",
"\n",
"norway_copy = norway_copy[norway_copy[\"iso_3166_2\"] != \"NO-X01~\"]\n",
"reposition(norway_copy, norway.name == 'Svalbard', -12, -8, 0.5, 0.5)\n",
"#reposition(norway_copy, norway.name == 'Nordland', 10, 0, 2, 2)\n",
@ -663,8 +653,10 @@
"outputs": [],
"source": [
"portugal_copy = portugal.copy()\n",
"\n",
"reposition(portugal_copy, portugal.name == 'Azores', 11, 0)\n",
"reposition(portugal_copy, portugal.name == 'Madeira', 6, 2, simplify=0.015)\n",
"\n",
"portugal_copy.plot(figsize=(8, 8))"
]
},
@ -700,7 +692,9 @@
"outputs": [],
"source": [
"spain_copy = spain.copy()\n",
"\n",
"reposition(spain_copy, spain.name.isin(['Las Palmas', 'Santa Cruz de Tenerife']), 3, 7, 1, 1)\n",
"\n",
"spain_copy.plot(figsize=(8, 8))"
]
},
@ -732,7 +726,7 @@
"id": "IOuQ_OzMt1LW"
},
"source": [
"Russia looks off because of Chukchi runs across E180. We need to move the parts on the other side of the map to the right."
"- Russia looks off because of Chukchi runs across E180. We need to move the parts on the other side of the map to the right."
]
},
{
@ -745,10 +739,6 @@
},
"outputs": [],
"source": [
"import shapely.geometry\n",
"import shapely.ops\n",
"import shapely.affinity\n",
"\n",
"def shift_geom(geom, cutoff=0):\n",
" border = shapely.geometry.LineString([(cutoff, -90), (cutoff, 90)])\n",
" splitted_geom = shapely.ops.split(geom, border)\n",
@ -815,8 +805,6 @@
"metadata": {},
"outputs": [],
"source": [
"from shapely.geometry import Polygon, MultiPolygon\n",
"\n",
"# NUTS - 1 Codes for Turkey and correspong region - city names\n",
"\n",
"region_dict = {\n",
@ -896,7 +884,7 @@
"id": "aYFQYe8-t1LW"
},
"source": [
"#### France"
"### France"
]
},
{
@ -978,123 +966,63 @@
"metadata": {},
"outputs": [],
"source": [
"france_regions = france_copy.copy()\n",
"# Define the mapping from the departments to their parent division\n",
"# i.e. from departments to regions\n",
"# Source: https://www.iso.org/obp/ui/#iso:code:3166:FR\n",
"dep_to_reg = {\n",
" \"FR-01\": \"FR-ARA\",\n",
" \"FR-02\": \"FR-HDF\",\n",
" \"FR-03\": \"FR-ARA\",\n",
" \"FR-04\": \"FR-PAC\",\n",
" \"FR-05\": \"FR-PAC\",\n",
" \"FR-06\": \"FR-PAC\",\n",
" \"FR-07\": \"FR-ARA\",\n",
" \"FR-08\": \"FR-GES\",\n",
" \"FR-09\": \"FR-OCC\",\n",
" \"FR-10\": \"FR-GES\",\n",
" \"FR-11\": \"FR-OCC\",\n",
" \"FR-12\": \"FR-OCC\",\n",
" \"FR-13\": \"FR-PAC\",\n",
" \"FR-14\": \"FR-NOR\",\n",
" \"FR-15\": \"FR-ARA\",\n",
" \"FR-16\": \"FR-NAQ\",\n",
" \"FR-17\": \"FR-NAQ\",\n",
" \"FR-18\": \"FR-CVL\",\n",
" \"FR-19\": \"FR-NAQ\",\n",
" \"FR-21\": \"FR-BFC\",\n",
" \"FR-22\": \"FR-BRE\",\n",
" \"FR-23\": \"FR-NAQ\",\n",
" \"FR-24\": \"FR-NAQ\",\n",
" \"FR-25\": \"FR-BFC\",\n",
" \"FR-26\": \"FR-ARA\",\n",
" \"FR-27\": \"FR-NOR\",\n",
" \"FR-28\": \"FR-CVL\",\n",
" \"FR-29\": \"FR-BRE\",\n",
" \"FR-2A\": \"FR-20R\",\n",
" \"FR-2B\": \"FR-20R\",\n",
" \"FR-30\": \"FR-OCC\",\n",
" \"FR-31\": \"FR-OCC\",\n",
" \"FR-32\": \"FR-OCC\",\n",
" \"FR-33\": \"FR-NAQ\",\n",
" \"FR-34\": \"FR-OCC\",\n",
" \"FR-35\": \"FR-BRE\",\n",
" \"FR-36\": \"FR-CVL\",\n",
" \"FR-37\": \"FR-CVL\",\n",
" \"FR-38\": \"FR-ARA\",\n",
" \"FR-39\": \"FR-BFC\",\n",
" \"FR-40\": \"FR-NAQ\",\n",
" \"FR-41\": \"FR-CVL\",\n",
" \"FR-42\": \"FR-ARA\",\n",
" \"FR-43\": \"FR-ARA\",\n",
" \"FR-44\": \"FR-PDL\",\n",
" \"FR-45\": \"FR-CVL\",\n",
" \"FR-46\": \"FR-OCC\",\n",
" \"FR-47\": \"FR-NAQ\",\n",
" \"FR-48\": \"FR-OCC\",\n",
" \"FR-49\": \"FR-PDL\",\n",
" \"FR-50\": \"FR-NOR\",\n",
" \"FR-51\": \"FR-GES\",\n",
" \"FR-52\": \"FR-GES\",\n",
" \"FR-53\": \"FR-PDL\",\n",
" \"FR-54\": \"FR-GES\",\n",
" \"FR-55\": \"FR-GES\",\n",
" \"FR-56\": \"FR-BRE\",\n",
" \"FR-57\": \"FR-GES\",\n",
" \"FR-58\": \"FR-BFC\",\n",
" \"FR-59\": \"FR-HDF\",\n",
" \"FR-60\": \"FR-HDF\",\n",
" \"FR-61\": \"FR-NOR\",\n",
" \"FR-62\": \"FR-HDF\",\n",
" \"FR-63\": \"FR-ARA\",\n",
" \"FR-64\": \"FR-NAQ\",\n",
" \"FR-65\": \"FR-OCC\",\n",
" \"FR-66\": \"FR-OCC\",\n",
" \"FR-67\": \"FR-GES\",\n",
" \"FR-68\": \"FR-GES\",\n",
" \"FR-69\": \"FR-ARA\",\n",
" \"FR-70\": \"FR-BFC\",\n",
" \"FR-71\": \"FR-BFC\",\n",
" \"FR-72\": \"FR-PDL\",\n",
" \"FR-73\": \"FR-ARA\",\n",
" \"FR-74\": \"FR-ARA\",\n",
" \"FR-75\": \"FR-IDF\",\n",
" \"FR-76\": \"FR-NOR\",\n",
" \"FR-77\": \"FR-IDF\",\n",
" \"FR-78\": \"FR-IDF\",\n",
" \"FR-79\": \"FR-NAQ\",\n",
" \"FR-80\": \"FR-HDF\",\n",
" \"FR-81\": \"FR-OCC\",\n",
" \"FR-82\": \"FR-OCC\",\n",
" \"FR-83\": \"FR-PAC\",\n",
" \"FR-84\": \"FR-PAC\",\n",
" \"FR-85\": \"FR-PDL\",\n",
" \"FR-86\": \"FR-NAQ\",\n",
" \"FR-87\": \"FR-NAQ\",\n",
" \"FR-88\": \"FR-GES\",\n",
" \"FR-89\": \"FR-BFC\",\n",
" \"FR-90\": \"FR-BFC\",\n",
" \"FR-91\": \"FR-IDF\",\n",
" \"FR-92\": \"FR-IDF\",\n",
" \"FR-93\": \"FR-IDF\",\n",
" \"FR-94\": \"FR-IDF\",\n",
" \"FR-95\": \"FR-IDF\",\n",
" \"FR-GF\": \"FR-GF\",\n",
" \"FR-MQ\": \"FR-MQ\",\n",
" \"FR-GP\": \"FR-GP\",\n",
" \"FR-RE\": \"FR-RE\",\n",
" \"FR-YT\": \"FR-YT\",\n",
"}\n",
"# Apply the mapping departments => regions\n",
"france_regions[\"iso_3166_2_region\"] = france_regions['iso_3166_2'].apply(lambda x: dep_to_reg[x])\n",
"# Merge the departments together \n",
"# https://geopandas.org/en/stable/docs/user_guide/aggregation_with_dissolve.html\n",
"france_regions = france_regions.dissolve(by='iso_3166_2_region')\n",
"# Fix the label\n",
"france_regions['iso_3166_2'] = france_regions['iso_3166_2'].index\n",
"# Fix the name\n",
"france_regions['name'] = france_regions['region']\n",
"france_regions.plot()"
"france_regions = france_copy[['geometry','region_cod','region']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"france_regions = france_regions.dissolve(by=['region_cod', 'region']).reset_index()\n",
"\n",
"france_regions = france_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"france_regions.plot(figsize=(10, 7), edgecolor='black', column='iso_3166_2', legend=False, cmap='tab20')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Italy"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Italy Regions"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"italy_regions = df[df.admin == 'Italy'][['geometry','region_cod','region']]\n",
"\n",
"italy_regions = italy_regions.dissolve(by=['region_cod', 'region']).reset_index()\n",
"\n",
"italy_regions = italy_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"italy_regions.plot(figsize=(10, 7), edgecolor='black', column='iso_3166_2', legend=False, cmap='tab20')"
]
},
{
@ -1205,6 +1133,7 @@
" \"france\": france_copy,\n",
" \"france_regions\": france_regions,\n",
" \"turkey_regions\": turkey_regions,\n",
" \"italy_regions\": italy_regions,\n",
" \"netherlands\": netherlands_copy,\n",
" \"norway\": norway_copy,\n",
" \"uk\": uk_copy,\n",
@ -1236,8 +1165,6 @@
},
"outputs": [],
"source": [
"import shutil\n",
"\n",
"simplify_factors = {\n",
" \"uk\": 0.005,\n",
"}\n",
@ -1368,13 +1295,6 @@
"\n",
"print(\"TypeScript code written to src/countries.ts\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {