feat(maps): Add Italy regions code to the map generator notebook (#27542)

2024-03-19 18:37:37 +03:00 · 2024-03-19 18:37:37 +03:00 · 86aa8bde8b
parent 36290ce72f
commit 86aa8bde8b
3 changed files with 151 additions and 231 deletions
--- a/superset-frontend/plugins/legacy-plugin-chart-country-map/scripts/Country
+++ b/superset-frontend/plugins/legacy-plugin-chart-country-map/scripts/Country
@ -40,6 +40,27 @@
    "Download datasets (_Admin 0 - Countries_ in [1:10](https://www.naturalearthdata.com/downloads/10m-cultural-vectors/), and _Admin 1 – States, Provinces_ in 1:10 and [1:50](https://www.naturalearthdata.com/downloads/50m-cultural-vectors/)) from Natural Earch Data:"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Dependencies\n",
+    "\n",
+    "import os\n",
+    "import requests\n",
+    "import geopandas as gpd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import shapely\n",
+    "import pandas as pd\n",
+    "import shapely.geometry\n",
+    "import shapely.ops\n",
+    "import shapely.affinity\n",
+    "from shapely.geometry import Polygon, MultiPolygon\n",
+    "import shutil"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -52,9 +73,6 @@
   },
   "outputs": [],
   "source": [
-    "import os\n",
-    "import requests\n",
-    "\n",
    "data_dir = os.path.expanduser(\"~/Downloads\")\n",
    "if not os.path.exists(data_dir):\n",
    "    os.mkdir(data_dir)\n",
@ -106,15 +124,14 @@
   },
   "outputs": [],
   "source": [
-    "import os\n",
-    "import geopandas as gpd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import shapely\n",
-    "\n",
+    "# Read Natural Earth data files into GeoDataFrames\n",
    "df_admin0_10m = gpd.read_file(f\"{data_dir}/ne_10m_admin_0_countries.zip\")\n",
    "df_10m = gpd.read_file(f\"{data_dir}/ne_10m_admin_1_states_provinces.zip\")\n",
    "df_50m = gpd.read_file(f\"{data_dir}/ne_50m_admin_1_states_provinces.zip\")\n",
    "\n",
+    "# Convert column names to lowercase\n",
+    "df_admin0_10m.columns = df_admin0_10m.columns.str.lower()\n",
+    "\n",
    "# Download and load the GeoJSON file for India\n",
    "india_geojson_url = \"https://github.com/geohacker/india/raw/bcb920c7d3c686f01d085f7661c9ba89bf9bf65e/state/india_state_kashmir_ladakh.geojson\"\n",
    "\n",
@ -126,31 +143,6 @@
    "    print(\"Please download the file from the URL and try again.\")"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "_PKEg8s1t1LT",
-    "outputId": "9937eacd-7a05-491f-f356-790ad75a819f"
-   },
-   "outputs": [],
-   "source": [
-    "df_50m.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "w8OD4nq2t1LT",
-    "outputId": "b7164440-d6e0-4074-b357-47fd8a8d2884"
-   },
-   "outputs": [],
-   "source": [
-    "df_admin0_10m.columns = df_admin0_10m.columns.str.lower()\n",
-    "df_admin0_10m.columns"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -178,18 +170,10 @@
   "source": [
    "df_50m[df_50m.adm0_a3 == 'USA'].plot(figsize=(20,10))\n",
    "plt.show()\n",
+    "\n",
    "india_gdf.plot(figsize=(20, 10))"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "VvpqxOjMt1LU"
-   },
-   "source": [
-    "Use 1:50m geometry for some large countries:"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -199,7 +183,9 @@
   },
   "outputs": [],
   "source": [
-    "df_50m['admin'].unique()"
+    "# Use 1:50m geometry for some large countries:\n",
+    "\n",
+    "print(*df_50m['admin'].unique(), sep='\\n')"
   ]
  },
  {
@ -210,8 +196,6 @@
   },
   "outputs": [],
   "source": [
-    "import pandas as pd\n",
-    "\n",
    "df = pd.concat([df_10m[~df_10m['admin'].isin(df_50m['admin'].unique())], df_50m])"
   ]
  },
@ -273,7 +257,7 @@
    "  'indonesia',\n",
    "  'iran',\n",
    "  'italy',\n",
-    "  'italy_regions', # this one is custom - see below\n",
+    "  'italy_regions', # this one is derived from italy - see below\n",
    "  'japan',\n",
    "  'jordan',\n",
    "  'kazakhstan',\n",
@ -345,7 +329,7 @@
    "\n",
    "# CSV files that exist specifically on the repo, rather than in the dataset\n",
    "custom_countries = [\n",
-    "    'italy_regions',\n",
+    "    \n",
    "]\n",
    "\n",
    "# Make sure all country names are covered:\n",
@ -373,8 +357,6 @@
   },
   "outputs": [],
   "source": [
-    "import geopandas as gpd\n",
-    "\n",
    "alt_maps = dict()\n",
    "\n",
    "def get_gdf(country):\n",
@ -393,9 +375,13 @@
    "    for i, country in enumerate(countries):\n",
    "        # create subplot axes in a 3x3 grid\n",
    "        ax = plt.subplot(len(countries) // 5, 6, i + 1) # nrows, ncols, axes position\n",
-    "        get_gdf(country).plot(ax=ax)\n",
+    "        gdf = get_gdf(country)\n",
+    "        if not gdf.empty:  # check if GeoDataFrame is not empty\n",
+    "            gdf.plot(ax=ax)\n",
+    "            ax.set_aspect('equal', adjustable='datalim')\n",
+    "        else:  # if GeoDataFrame is empty\n",
+    "            ax.text(0.5, 0.5, country, ha='center', va='center')  # add country name to the center of the subplot\n",
    "        ax.set_title(country)\n",
-    "        ax.set_aspect('equal', adjustable='datalim')\n",
    "\n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
@ -411,7 +397,7 @@
   "source": [
    "### Handle countries with flying islands\n",
    "\n",
-    "#### USA"
+    "- For countries with flying islands, we need to move the islands closer to the mainland."
   ]
  },
  {
@ -420,7 +406,7 @@
    "id": "Z4y46Zuot1LU"
   },
   "source": [
-    "For countries with flying islands, we need to move the islands closer to the mainland."
+    "#### USA"
   ]
  },
  {
@ -513,14 +499,15 @@
   },
   "outputs": [],
   "source": [
-    "import pandas as pd\n",
-    "\n",
    "china = df[df.admin == \"China\"]\n",
    "china_copy = pd.concat([china, china_sars], ignore_index=True)\n",
+    "\n",
    "# Combine the 'name_zh' columns\n",
    "china_copy[\"name_zh\"] = china_copy[\"name_zh\"].combine_first(china_copy[\"name_zh_y\"])\n",
+    "\n",
    "# Drop the extra 'name_zh_x' and 'name_zh_y' columns, if they exist\n",
    "china_copy = china_copy.drop([\"name_zh_x\", \"name_zh_y\"], axis=1)\n",
+    "\n",
    "# Plotting the DataFrame\n",
    "china_copy.plot(figsize=(12, 12))"
   ]
@ -542,7 +529,7 @@
   "source": [
    "#### Finland\n",
    "\n",
-    "The Åland Islands (ISO country code AX) is an autonomous region of Finland, and carries the ISO-3166 code FI-01."
+    "- The Åland Islands (ISO country code AX) is an autonomous region of Finland, and carries the ISO-3166 code FI-01."
   ]
  },
  {
@ -575,17 +562,19 @@
   },
   "outputs": [],
   "source": [
-    "import pandas as pd\n",
-    "\n",
    "finland = df[df.admin == \"Finland\"]\n",
+    "\n",
    "# Concatenate the 'finland' DataFrame with 'finland_aland' DataFrame\n",
    "finland_copy = pd.concat([finland, finland_aland], ignore_index=True)\n",
+    "\n",
    "# Combine 'name_fi' columns. However, since both columns are named 'name_fi', this might be redundant\n",
    "# If you have two different columns for 'name_fi' values in each DataFrame, specify them as 'name_fi_x' and 'name_fi_y'\n",
    "finland_copy[\"name_fi\"] = finland_copy[\"name_fi\"].combine_first(finland_copy[\"name_fi\"])\n",
+    "\n",
    "# Drop the 'name_fi' column, if that's intended. This will remove the 'name_fi' data entirely.\n",
    "# If you meant to drop other columns (like 'name_fi_x' and 'name_fi_y'), update the column names accordingly\n",
    "finland_copy = finland_copy.drop([\"name_fi\"], axis=1)\n",
+    "\n",
    "# Plotting the DataFrame\n",
    "finland_copy.plot(figsize=(12, 12))\n"
   ]
@ -598,7 +587,7 @@
   "source": [
    "#### Norway\n",
    "\n",
-    "Remove NO-X01~ (The uninhabited Bouvet Island) and move Svalbard closer to mainland"
+    "- Remove NO-X01~ (The uninhabited Bouvet Island) and move Svalbard closer to mainland"
   ]
  },
  {
@ -624,6 +613,7 @@
   "outputs": [],
   "source": [
    "norway_copy = norway.copy()\n",
+    "\n",
    "norway_copy = norway_copy[norway_copy[\"iso_3166_2\"] != \"NO-X01~\"]\n",
    "reposition(norway_copy, norway.name == 'Svalbard', -12, -8, 0.5, 0.5)\n",
    "#reposition(norway_copy, norway.name == 'Nordland', 10, 0, 2, 2)\n",
@ -663,8 +653,10 @@
   "outputs": [],
   "source": [
    "portugal_copy = portugal.copy()\n",
+    "\n",
    "reposition(portugal_copy, portugal.name == 'Azores', 11, 0)\n",
    "reposition(portugal_copy, portugal.name == 'Madeira', 6, 2, simplify=0.015)\n",
+    "\n",
    "portugal_copy.plot(figsize=(8, 8))"
   ]
  },
@ -700,7 +692,9 @@
   "outputs": [],
   "source": [
    "spain_copy = spain.copy()\n",
+    "\n",
    "reposition(spain_copy, spain.name.isin(['Las Palmas', 'Santa Cruz de Tenerife']), 3, 7, 1, 1)\n",
+    "\n",
    "spain_copy.plot(figsize=(8, 8))"
   ]
  },
@ -732,7 +726,7 @@
    "id": "IOuQ_OzMt1LW"
   },
   "source": [
-    "Russia looks off because of Chukchi runs across E180. We need to move the parts on the other side of the map to the right."
+    "- Russia looks off because of Chukchi runs across E180. We need to move the parts on the other side of the map to the right."
   ]
  },
  {
@ -745,10 +739,6 @@
   },
   "outputs": [],
   "source": [
-    "import shapely.geometry\n",
-    "import shapely.ops\n",
-    "import shapely.affinity\n",
-    "\n",
    "def shift_geom(geom, cutoff=0):\n",
    "    border = shapely.geometry.LineString([(cutoff, -90), (cutoff, 90)])\n",
    "    splitted_geom = shapely.ops.split(geom, border)\n",
@ -815,8 +805,6 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from shapely.geometry import Polygon, MultiPolygon\n",
-    "\n",
    "# NUTS - 1 Codes for Turkey and correspong region - city names\n",
    "\n",
    "region_dict = {\n",
@ -896,7 +884,7 @@
    "id": "aYFQYe8-t1LW"
   },
   "source": [
-    "#### France"
+    "### France"
   ]
  },
  {
@ -978,123 +966,63 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "france_regions = france_copy.copy()\n",
-    "# Define the mapping from the departments to their parent division\n",
-    "# i.e. from departments to regions\n",
-    "# Source: https://www.iso.org/obp/ui/#iso:code:3166:FR\n",
-    "dep_to_reg = {\n",
-    "    \"FR-01\": \"FR-ARA\",\n",
-    "    \"FR-02\": \"FR-HDF\",\n",
-    "    \"FR-03\": \"FR-ARA\",\n",
-    "    \"FR-04\": \"FR-PAC\",\n",
-    "    \"FR-05\": \"FR-PAC\",\n",
-    "    \"FR-06\": \"FR-PAC\",\n",
-    "    \"FR-07\": \"FR-ARA\",\n",
-    "    \"FR-08\": \"FR-GES\",\n",
-    "    \"FR-09\": \"FR-OCC\",\n",
-    "    \"FR-10\": \"FR-GES\",\n",
-    "    \"FR-11\": \"FR-OCC\",\n",
-    "    \"FR-12\": \"FR-OCC\",\n",
-    "    \"FR-13\": \"FR-PAC\",\n",
-    "    \"FR-14\": \"FR-NOR\",\n",
-    "    \"FR-15\": \"FR-ARA\",\n",
-    "    \"FR-16\": \"FR-NAQ\",\n",
-    "    \"FR-17\": \"FR-NAQ\",\n",
-    "    \"FR-18\": \"FR-CVL\",\n",
-    "    \"FR-19\": \"FR-NAQ\",\n",
-    "    \"FR-21\": \"FR-BFC\",\n",
-    "    \"FR-22\": \"FR-BRE\",\n",
-    "    \"FR-23\": \"FR-NAQ\",\n",
-    "    \"FR-24\": \"FR-NAQ\",\n",
-    "    \"FR-25\": \"FR-BFC\",\n",
-    "    \"FR-26\": \"FR-ARA\",\n",
-    "    \"FR-27\": \"FR-NOR\",\n",
-    "    \"FR-28\": \"FR-CVL\",\n",
-    "    \"FR-29\": \"FR-BRE\",\n",
-    "    \"FR-2A\": \"FR-20R\",\n",
-    "    \"FR-2B\": \"FR-20R\",\n",
-    "    \"FR-30\": \"FR-OCC\",\n",
-    "    \"FR-31\": \"FR-OCC\",\n",
-    "    \"FR-32\": \"FR-OCC\",\n",
-    "    \"FR-33\": \"FR-NAQ\",\n",
-    "    \"FR-34\": \"FR-OCC\",\n",
-    "    \"FR-35\": \"FR-BRE\",\n",
-    "    \"FR-36\": \"FR-CVL\",\n",
-    "    \"FR-37\": \"FR-CVL\",\n",
-    "    \"FR-38\": \"FR-ARA\",\n",
-    "    \"FR-39\": \"FR-BFC\",\n",
-    "    \"FR-40\": \"FR-NAQ\",\n",
-    "    \"FR-41\": \"FR-CVL\",\n",
-    "    \"FR-42\": \"FR-ARA\",\n",
-    "    \"FR-43\": \"FR-ARA\",\n",
-    "    \"FR-44\": \"FR-PDL\",\n",
-    "    \"FR-45\": \"FR-CVL\",\n",
-    "    \"FR-46\": \"FR-OCC\",\n",
-    "    \"FR-47\": \"FR-NAQ\",\n",
-    "    \"FR-48\": \"FR-OCC\",\n",
-    "    \"FR-49\": \"FR-PDL\",\n",
-    "    \"FR-50\": \"FR-NOR\",\n",
-    "    \"FR-51\": \"FR-GES\",\n",
-    "    \"FR-52\": \"FR-GES\",\n",
-    "    \"FR-53\": \"FR-PDL\",\n",
-    "    \"FR-54\": \"FR-GES\",\n",
-    "    \"FR-55\": \"FR-GES\",\n",
-    "    \"FR-56\": \"FR-BRE\",\n",
-    "    \"FR-57\": \"FR-GES\",\n",
-    "    \"FR-58\": \"FR-BFC\",\n",
-    "    \"FR-59\": \"FR-HDF\",\n",
-    "    \"FR-60\": \"FR-HDF\",\n",
-    "    \"FR-61\": \"FR-NOR\",\n",
-    "    \"FR-62\": \"FR-HDF\",\n",
-    "    \"FR-63\": \"FR-ARA\",\n",
-    "    \"FR-64\": \"FR-NAQ\",\n",
-    "    \"FR-65\": \"FR-OCC\",\n",
-    "    \"FR-66\": \"FR-OCC\",\n",
-    "    \"FR-67\": \"FR-GES\",\n",
-    "    \"FR-68\": \"FR-GES\",\n",
-    "    \"FR-69\": \"FR-ARA\",\n",
-    "    \"FR-70\": \"FR-BFC\",\n",
-    "    \"FR-71\": \"FR-BFC\",\n",
-    "    \"FR-72\": \"FR-PDL\",\n",
-    "    \"FR-73\": \"FR-ARA\",\n",
-    "    \"FR-74\": \"FR-ARA\",\n",
-    "    \"FR-75\": \"FR-IDF\",\n",
-    "    \"FR-76\": \"FR-NOR\",\n",
-    "    \"FR-77\": \"FR-IDF\",\n",
-    "    \"FR-78\": \"FR-IDF\",\n",
-    "    \"FR-79\": \"FR-NAQ\",\n",
-    "    \"FR-80\": \"FR-HDF\",\n",
-    "    \"FR-81\": \"FR-OCC\",\n",
-    "    \"FR-82\": \"FR-OCC\",\n",
-    "    \"FR-83\": \"FR-PAC\",\n",
-    "    \"FR-84\": \"FR-PAC\",\n",
-    "    \"FR-85\": \"FR-PDL\",\n",
-    "    \"FR-86\": \"FR-NAQ\",\n",
-    "    \"FR-87\": \"FR-NAQ\",\n",
-    "    \"FR-88\": \"FR-GES\",\n",
-    "    \"FR-89\": \"FR-BFC\",\n",
-    "    \"FR-90\": \"FR-BFC\",\n",
-    "    \"FR-91\": \"FR-IDF\",\n",
-    "    \"FR-92\": \"FR-IDF\",\n",
-    "    \"FR-93\": \"FR-IDF\",\n",
-    "    \"FR-94\": \"FR-IDF\",\n",
-    "    \"FR-95\": \"FR-IDF\",\n",
-    "    \"FR-GF\": \"FR-GF\",\n",
-    "    \"FR-MQ\": \"FR-MQ\",\n",
-    "    \"FR-GP\": \"FR-GP\",\n",
-    "    \"FR-RE\": \"FR-RE\",\n",
-    "    \"FR-YT\": \"FR-YT\",\n",
-    "}\n",
-    "# Apply the mapping departments => regions\n",
-    "france_regions[\"iso_3166_2_region\"] = france_regions['iso_3166_2'].apply(lambda x: dep_to_reg[x])\n",
-    "# Merge the departments together \n",
-    "# https://geopandas.org/en/stable/docs/user_guide/aggregation_with_dissolve.html\n",
-    "france_regions = france_regions.dissolve(by='iso_3166_2_region')\n",
-    "# Fix the label\n",
-    "france_regions['iso_3166_2'] = france_regions['iso_3166_2'].index\n",
-    "# Fix the name\n",
-    "france_regions['name'] = france_regions['region']\n",
-    "france_regions.plot()"
+    "france_regions = france_copy[['geometry','region_cod','region']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "france_regions = france_regions.dissolve(by=['region_cod', 'region']).reset_index()\n",
+    "\n",
+    "france_regions = france_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "france_regions.plot(figsize=(10, 7), edgecolor='black', column='iso_3166_2', legend=False, cmap='tab20')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Italy"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Italy Regions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "italy_regions = df[df.admin == 'Italy'][['geometry','region_cod','region']]\n",
+    "\n",
+    "italy_regions = italy_regions.dissolve(by=['region_cod', 'region']).reset_index()\n",
+    "\n",
+    "italy_regions = italy_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "italy_regions.plot(figsize=(10, 7), edgecolor='black', column='iso_3166_2', legend=False, cmap='tab20')"
   ]
  },
  {
@ -1205,6 +1133,7 @@
    "    \"france\": france_copy,\n",
    "    \"france_regions\": france_regions,\n",
    "    \"turkey_regions\": turkey_regions,\n",
+    "    \"italy_regions\": italy_regions,\n",
    "    \"netherlands\": netherlands_copy,\n",
    "    \"norway\": norway_copy,\n",
    "    \"uk\": uk_copy,\n",
@ -1236,8 +1165,6 @@
   },
   "outputs": [],
   "source": [
-    "import shutil\n",
-    "\n",
    "simplify_factors = {\n",
    "    \"uk\": 0.005,\n",
    "}\n",
@ -1368,13 +1295,6 @@
    "\n",
    "print(\"TypeScript code written to src/countries.ts\")"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {
--- a/superset-frontend/plugins/legacy-plugin-chart-country-map/src/countries/france_regions.geojson
+++ b/superset-frontend/plugins/legacy-plugin-chart-country-map/src/countries/france_regions.geojson
--- a/superset-frontend/plugins/legacy-plugin-chart-country-map/src/countries/italy_regions.geojson
+++ b/superset-frontend/plugins/legacy-plugin-chart-country-map/src/countries/italy_regions.geojson