mirror of
https://github.com/apache/superset.git
synced 2024-09-12 16:49:40 -04:00
1330 lines
38 KiB
Plaintext
1330 lines
38 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "65aIalqEt1LR"
|
||
},
|
||
"source": [
|
||
"# Generate GeoJSON from Natural Earth Data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "L4PY3Z15t1LS"
|
||
},
|
||
"source": [
|
||
"## Install Dependencies"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "6_H7qbzIt1LS"
|
||
},
|
||
"source": [
|
||
"```\n",
|
||
"pip install geopandas shapely matplotlib\n",
|
||
"```"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "hvA0SEXVt1LS"
|
||
},
|
||
"source": [
|
||
"## Download Data\n",
|
||
"\n",
|
||
"Download datasets (_Admin 0 - Countries_ in [1:10](https://www.naturalearthdata.com/downloads/10m-cultural-vectors/), and _Admin 1 – States, Provinces_ in 1:10 and [1:50](https://www.naturalearthdata.com/downloads/50m-cultural-vectors/)) from Natural Earch Data:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Dependencies\n",
|
||
"\n",
|
||
"import os\n",
|
||
"import requests\n",
|
||
"import geopandas as gpd\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import shapely\n",
|
||
"import pandas as pd\n",
|
||
"import shapely.geometry\n",
|
||
"import shapely.ops\n",
|
||
"import shapely.affinity\n",
|
||
"from shapely.geometry import Polygon, MultiPolygon\n",
|
||
"import shutil"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "VjGrqW4Kt1LS",
|
||
"outputId": "2e2accda-5ee4-4270-872e-ecb78d0d02a2"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_dir = os.path.expanduser(\"~/Downloads\")\n",
|
||
"if not os.path.exists(data_dir):\n",
|
||
" os.mkdir(data_dir)\n",
|
||
"\n",
|
||
"def download_files(skip_existing=True):\n",
|
||
" for url in [\n",
|
||
" \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip\",\n",
|
||
" \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_1_states_provinces.zip\",\n",
|
||
" \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_1_states_provinces.zip\"\n",
|
||
" ]:\n",
|
||
" file_name = url.split('/')[-1]\n",
|
||
" full_file_name = f'{data_dir}/{file_name}'\n",
|
||
" with requests.get(\n",
|
||
" url,\n",
|
||
" headers={\n",
|
||
" \"accept-encoding\": \"gzip, deflate, br\",\n",
|
||
" \"user-agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36\"\n",
|
||
" },\n",
|
||
" stream=True,\n",
|
||
" ) as res:\n",
|
||
" file_size = int(res.headers['content-length'])\n",
|
||
" if res.status_code != 200:\n",
|
||
" print(\"Error downloading files. Please open the URL to download them from browser manually.\")\n",
|
||
" break\n",
|
||
" if (\n",
|
||
" skip_existing and\n",
|
||
" os.path.exists(full_file_name) and\n",
|
||
" file_size == os.path.getsize(full_file_name)\n",
|
||
" ):\n",
|
||
" print(f\"Skip {file_name} because it already exists\")\n",
|
||
" continue\n",
|
||
" print(f\"Downloading {file_name}... \\r\", end=\"\")\n",
|
||
" with open(full_file_name, \"wb\") as fh:\n",
|
||
" fh.write(res.content)\n",
|
||
" print(\"Done. \")\n",
|
||
"\n",
|
||
"download_files(skip_existing=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "EL0e9DEVt1LT",
|
||
"outputId": "16cd6450-d4a3-457a-b205-9797bbce33fc"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Read Natural Earth data files into GeoDataFrames\n",
|
||
"df_admin0_10m = gpd.read_file(f\"{data_dir}/ne_10m_admin_0_countries.zip\")\n",
|
||
"df_10m = gpd.read_file(f\"{data_dir}/ne_10m_admin_1_states_provinces.zip\")\n",
|
||
"df_50m = gpd.read_file(f\"{data_dir}/ne_50m_admin_1_states_provinces.zip\")\n",
|
||
"\n",
|
||
"# Convert column names to lowercase\n",
|
||
"df_admin0_10m.columns = df_admin0_10m.columns.str.lower()\n",
|
||
"\n",
|
||
"# Download and load the GeoJSON file for India\n",
|
||
"india_geojson_url = \"https://github.com/geohacker/india/raw/bcb920c7d3c686f01d085f7661c9ba89bf9bf65e/state/india_state_kashmir_ladakh.geojson\"\n",
|
||
"\n",
|
||
"try:\n",
|
||
" india_gdf = gpd.read_file(india_geojson_url)\n",
|
||
" print(\"GeoJSON file for India downloaded and loaded successfully.\")\n",
|
||
"except Exception as e:\n",
|
||
" print(f\"Unable to download or load the GeoJSON file for India. Error: {str(e)}\")\n",
|
||
" print(\"Please download the file from the URL and try again.\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "DUrz04nYt1LT",
|
||
"outputId": "18d7cdb0-8ab6-4238-e50c-925c5dc117b0"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_50m.groupby('admin').count()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 1000
|
||
},
|
||
"id": "eUlJjdRkt1LT",
|
||
"outputId": "60df2dc3-800e-40ac-f151-696a7f91cff4"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_50m[df_50m.adm0_a3 == 'USA'].plot(figsize=(20,10))\n",
|
||
"plt.show()\n",
|
||
"\n",
|
||
"india_gdf.plot(figsize=(20, 10))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "pr1jqM3kt1LU",
|
||
"outputId": "7211a182-b64a-469b-fadb-af2148ec6852"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Use 1:50m geometry for some large countries:\n",
|
||
"\n",
|
||
"print(*df_50m['admin'].unique(), sep='\\n')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "k-KuZ8L4t1LU"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"df = pd.concat([df_10m[~df_10m['admin'].isin(df_50m['admin'].unique())], df_50m])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "06nhCSvTt1LU"
|
||
},
|
||
"source": [
|
||
"## Adjust the Maps\n",
|
||
"\n",
|
||
"<span style=\"color: red; font-size: 1.5em\">TO SUPPORT NEW COUNTRIES, ADD COUNTRY NAME BELOW</span>"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "-4uH5XaEt1LU"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Country names used in file names\n",
|
||
"countries = [\n",
|
||
" 'afghanistan',\n",
|
||
" 'albania',\n",
|
||
" 'algeria',\n",
|
||
" 'argentina',\n",
|
||
" 'australia',\n",
|
||
" 'austria',\n",
|
||
" 'belgium',\n",
|
||
" 'bolivia',\n",
|
||
" 'brazil',\n",
|
||
" 'bulgaria',\n",
|
||
" 'burundi',\n",
|
||
" 'canada',\n",
|
||
" 'chile',\n",
|
||
" 'china',\n",
|
||
" 'colombia',\n",
|
||
" 'costa rica',\n",
|
||
" 'cuba',\n",
|
||
" 'cyprus',\n",
|
||
" 'denmark',\n",
|
||
" 'dominican republic',\n",
|
||
" 'ecuador',\n",
|
||
" 'egypt',\n",
|
||
" 'el salvador',\n",
|
||
" 'estonia',\n",
|
||
" 'ethiopia',\n",
|
||
" 'france',\n",
|
||
" 'france_regions', # this is one derived from france - see below\n",
|
||
" 'finland',\n",
|
||
" 'germany',\n",
|
||
" 'guatemala',\n",
|
||
" 'haiti',\n",
|
||
" 'honduras',\n",
|
||
" 'iceland',\n",
|
||
" 'india',\n",
|
||
" 'indonesia',\n",
|
||
" 'iran',\n",
|
||
" 'italy',\n",
|
||
" 'italy_regions', # this one is derived from italy - see below\n",
|
||
" 'japan',\n",
|
||
" 'jordan',\n",
|
||
" 'kazakhstan',\n",
|
||
" 'kenya',\n",
|
||
" 'korea',\n",
|
||
" 'kuwait',\n",
|
||
" 'kyrgyzstan',\n",
|
||
" 'latvia',\n",
|
||
" 'liechtenstein',\n",
|
||
" 'lithuania',\n",
|
||
" 'malaysia',\n",
|
||
" 'mexico',\n",
|
||
" 'morocco',\n",
|
||
" 'myanmar',\n",
|
||
" 'netherlands',\n",
|
||
" 'nicaragua',\n",
|
||
" 'nigeria',\n",
|
||
" 'norway',\n",
|
||
" 'oman',\n",
|
||
" 'pakistan',\n",
|
||
" 'panama',\n",
|
||
" 'papua new guinea',\n",
|
||
" 'paraguay',\n",
|
||
" 'peru',\n",
|
||
" 'philippines',\n",
|
||
" 'portugal',\n",
|
||
" 'poland',\n",
|
||
" 'puerto rico',\n",
|
||
" 'qatar',\n",
|
||
" 'russia',\n",
|
||
" 'rwanda',\n",
|
||
" 'saint barthelemy',\n",
|
||
" 'saint martin',\n",
|
||
" 'saudi arabia',\n",
|
||
" 'singapore',\n",
|
||
" 'slovenia',\n",
|
||
" 'spain',\n",
|
||
" 'sri lanka',\n",
|
||
" 'sweden',\n",
|
||
" 'switzerland',\n",
|
||
" 'syria',\n",
|
||
" 'tajikistan',\n",
|
||
" 'tanzania',\n",
|
||
" 'thailand',\n",
|
||
" 'timorleste',\n",
|
||
" 'turkey',\n",
|
||
" 'turkey_regions', # this one derived from turkey - see below\n",
|
||
" 'turkmenistan',\n",
|
||
" 'uganda',\n",
|
||
" 'uk',\n",
|
||
" 'ukraine',\n",
|
||
" 'united arab emirates',\n",
|
||
" 'uruguay',\n",
|
||
" 'usa',\n",
|
||
" 'uzbekistan',\n",
|
||
" 'venezuela',\n",
|
||
" 'vietnam',\n",
|
||
" 'zambia',\n",
|
||
"]\n",
|
||
"\n",
|
||
"# country name used in dataset\n",
|
||
"country_name_aliases = {\n",
|
||
" \"uk\": \"united kingdom\",\n",
|
||
" \"usa\": \"united states of america\",\n",
|
||
" \"korea\": \"south korea\",\n",
|
||
" \"timorleste\": \"east timor\",\n",
|
||
" \"tanzania\": \"united republic of tanzania\",\n",
|
||
"}\n",
|
||
"\n",
|
||
"# CSV files that exist specifically on the repo, rather than in the dataset\n",
|
||
"custom_countries = [\n",
|
||
" \n",
|
||
"]\n",
|
||
"\n",
|
||
"# Make sure all country names are covered:\n",
|
||
"invalid_countries = [x for x in countries if (country_name_aliases.get(x, x) not in df[\"admin\"].str.lower().unique()) and (x not in custom_countries)]\n",
|
||
"\n",
|
||
"if invalid_countries:\n",
|
||
" print(f\"Following country names are not valid: {invalid_countries}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "7z--iQz4t1LU"
|
||
},
|
||
"source": [
|
||
"Preview all countries:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "tJ_WNQl8t1LU",
|
||
"outputId": "4f601ce0-26e4-4a40-c36c-8449420e9406"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"alt_maps = dict()\n",
|
||
"\n",
|
||
"def get_gdf(country):\n",
|
||
" country_alias = country_name_aliases.get(country, country)\n",
|
||
" if country in alt_maps:\n",
|
||
" gdf = alt_maps[country]\n",
|
||
" elif country in custom_countries:\n",
|
||
" gdf = gpd.read_file(f'../src/countries_custom/{country}.geojson')\n",
|
||
" else:\n",
|
||
" gdf = df[df[\"admin\"].str.lower() == country_alias]\n",
|
||
" return gdf.copy()\n",
|
||
"\n",
|
||
"def plot_all_countries():\n",
|
||
" plt.figure(figsize=(20, 20))\n",
|
||
"\n",
|
||
" for i, country in enumerate(countries):\n",
|
||
" # create subplot axes in a 3x3 grid\n",
|
||
" ax = plt.subplot(len(countries) // 5, 6, i + 1) # nrows, ncols, axes position\n",
|
||
" gdf = get_gdf(country)\n",
|
||
" if not gdf.empty: # check if GeoDataFrame is not empty\n",
|
||
" gdf.plot(ax=ax)\n",
|
||
" ax.set_aspect('equal', adjustable='datalim')\n",
|
||
" else: # if GeoDataFrame is empty\n",
|
||
" ax.text(0.5, 0.5, country, ha='center', va='center') # add country name to the center of the subplot\n",
|
||
" ax.set_title(country)\n",
|
||
"\n",
|
||
" plt.tight_layout()\n",
|
||
" plt.show()\n",
|
||
"\n",
|
||
"plot_all_countries()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "7Ab0_rHVt1LU"
|
||
},
|
||
"source": [
|
||
"### Handle countries with flying islands\n",
|
||
"\n",
|
||
"- For countries with flying islands, we need to move the islands closer to the mainland."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "Z4y46Zuot1LU"
|
||
},
|
||
"source": [
|
||
"#### USA"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "xx8IbBKtt1LU",
|
||
"outputId": "025139d2-ba0b-43a9-e2ec-f4608e6ecad2"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"usa = df[df['adm0_a3'] == 'USA']\n",
|
||
"usa.plot()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "ixC6KENXt1LU",
|
||
"outputId": "8e63cb2d-d733-4a9f-caf6-ccc843f15b5d"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"def reposition(df, idx, xoff=None, yoff=None, xscale=None, yscale=None, simplify=None):\n",
|
||
"\n",
|
||
" def move_and_scale(series):\n",
|
||
" if xoff or yoff:\n",
|
||
" series = shapely.affinity.translate(series, xoff or 0, yoff or 0)\n",
|
||
" if xscale or yscale:\n",
|
||
" series = shapely.affinity.scale(series, xscale or 1, yscale or 1)\n",
|
||
" if simplify:\n",
|
||
" series = series.simplify(simplify, preserve_topology=False)\n",
|
||
" return series\n",
|
||
"\n",
|
||
" df.loc[idx, 'geometry'] = df.loc[idx, 'geometry'].apply(move_and_scale)\n",
|
||
"\n",
|
||
"\n",
|
||
"usa_copy = usa.copy()\n",
|
||
"reposition(usa_copy, usa.name == 'Hawaii', 51, 5.5)\n",
|
||
"reposition(usa_copy, usa.name == 'Alaska', 35, -34, 0.35, 0.35)\n",
|
||
"\n",
|
||
"usa_copy.plot(figsize=(8,8))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "d1p9cWNxt1LU"
|
||
},
|
||
"source": [
|
||
"#### China\n",
|
||
"\n",
|
||
"China claims sovereign over Taiwan. For disputed territories, we respect each country and give them what they want.\n",
|
||
"\n",
|
||
"In addition, Hong Kong and Macau should also be included in a China map."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "vN4Ngpe7t1LU",
|
||
"outputId": "3bcdc612-cc01-49be-fe19-f6e08e833fca"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Chinese Special Administrative Regions\n",
|
||
"china_sars = df_admin0_10m.loc[\n",
|
||
" df_admin0_10m.name_en.isin(['Taiwan', 'Hong Kong', 'Macau']),\n",
|
||
" [x for x in df_admin0_10m.columns if x in df.columns]\n",
|
||
"]\n",
|
||
"china_sars = china_sars.merge(pd.DataFrame(\n",
|
||
" data={\n",
|
||
" \"name_en\": [\"Taiwan\", \"Hong Kong\", \"Macau\"],\n",
|
||
" \"name_zh\": [\"中国台湾\", \"香港特别行政区\", \"澳门特别行政区\"],\n",
|
||
" \"iso_3166_2\": [\"CN-71\", \"CN-91\", \"CN-92\"],\n",
|
||
" },\n",
|
||
"), on=\"name_en\", how=\"left\")\n",
|
||
"china_sars"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "PP6E24eEt1LV",
|
||
"outputId": "2621d5f1-1edc-42fc-e8df-8afd6a525cc6",
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"china = df[df.admin == \"China\"]\n",
|
||
"china_copy = pd.concat([china, china_sars], ignore_index=True)\n",
|
||
"\n",
|
||
"# Combine the 'name_zh' columns\n",
|
||
"china_copy[\"name_zh\"] = china_copy[\"name_zh\"].combine_first(china_copy[\"name_zh_y\"])\n",
|
||
"\n",
|
||
"# Drop the extra 'name_zh_x' and 'name_zh_y' columns, if they exist\n",
|
||
"china_copy = china_copy.drop([\"name_zh_x\", \"name_zh_y\"], axis=1)\n",
|
||
"\n",
|
||
"# Plotting the DataFrame\n",
|
||
"china_copy.plot(figsize=(12, 12))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "nqn5qsR-t1LV"
|
||
},
|
||
"source": [
|
||
"Note [ISO-3166-2:CN](https://en.wikipedia.org/wiki/ISO_3166-2:CN) has updated subdivisions to use letters instead of numbers (e.g. `CN-91` -> `CN-HK`). We kept the numeric code for backward compatibility."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "iNnVk5dut1LV"
|
||
},
|
||
"source": [
|
||
"#### Finland\n",
|
||
"\n",
|
||
"- The Åland Islands (ISO country code AX) is an autonomous region of Finland, and carries the ISO-3166 code FI-01."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "LuNGgwiQt1LV"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"finland_aland = df_admin0_10m.loc[\n",
|
||
" df_admin0_10m.name_en.isin(['Åland']),\n",
|
||
" [x for x in df_admin0_10m.columns if x in df.columns]\n",
|
||
"]\n",
|
||
"finland_aland = finland_aland.merge(pd.DataFrame(\n",
|
||
" data={\n",
|
||
" \"name_en\": [\"Åland\"],\n",
|
||
" \"name_fi\": [\"Ahvenanmaan maakunta\"],\n",
|
||
" \"iso_3166_2\": [\"FI-01\"],\n",
|
||
" },\n",
|
||
"), on=\"name_en\", how=\"left\")\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "v8ig_jQDt1LV",
|
||
"outputId": "3f10b14d-dde2-46d9-f4f6-6f4311fb3e73"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"finland = df[df.admin == \"Finland\"]\n",
|
||
"\n",
|
||
"# Concatenate the 'finland' DataFrame with 'finland_aland' DataFrame\n",
|
||
"finland_copy = pd.concat([finland, finland_aland], ignore_index=True)\n",
|
||
"\n",
|
||
"# Combine 'name_fi' columns. However, since both columns are named 'name_fi', this might be redundant\n",
|
||
"# If you have two different columns for 'name_fi' values in each DataFrame, specify them as 'name_fi_x' and 'name_fi_y'\n",
|
||
"finland_copy[\"name_fi\"] = finland_copy[\"name_fi\"].combine_first(finland_copy[\"name_fi\"])\n",
|
||
"\n",
|
||
"# Drop the 'name_fi' column, if that's intended. This will remove the 'name_fi' data entirely.\n",
|
||
"# If you meant to drop other columns (like 'name_fi_x' and 'name_fi_y'), update the column names accordingly\n",
|
||
"finland_copy = finland_copy.drop([\"name_fi\"], axis=1)\n",
|
||
"\n",
|
||
"# Plotting the DataFrame\n",
|
||
"finland_copy.plot(figsize=(12, 12))\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "UP0QB9BZt1LV"
|
||
},
|
||
"source": [
|
||
"#### Norway\n",
|
||
"\n",
|
||
"- Remove NO-X01~ (The uninhabited Bouvet Island) and move Svalbard closer to mainland"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "8zBzSIqQt1LV",
|
||
"outputId": "cc8b6fbf-accb-44ba-b80a-a837df398c96"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"norway = df[df['adm0_a3'] == 'NOR']\n",
|
||
"norway.plot()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "-LXcKKOjt1LV",
|
||
"outputId": "546a286e-9682-4f9a-c57e-b19250d88a34"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"norway_copy = norway.copy()\n",
|
||
"\n",
|
||
"norway_copy = norway_copy[norway_copy[\"iso_3166_2\"] != \"NO-X01~\"]\n",
|
||
"reposition(norway_copy, norway.name == 'Svalbard', -12, -8, 0.5, 0.5)\n",
|
||
"#reposition(norway_copy, norway.name == 'Nordland', 10, 0, 2, 2)\n",
|
||
"\n",
|
||
"norway_copy.plot()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "NqdSwt2ct1LV"
|
||
},
|
||
"source": [
|
||
"#### Portugal"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "mznw0XOgt1LV",
|
||
"outputId": "7e8085bc-abd9-4592-f047-62fa1a45eb01"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"portugal = df[df.admin == 'Portugal']\n",
|
||
"portugal.plot()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "yfAO1qFrt1LV",
|
||
"outputId": "9151ce8f-2412-415b-da73-eeec613276d8"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"portugal_copy = portugal.copy()\n",
|
||
"\n",
|
||
"reposition(portugal_copy, portugal.name == 'Azores', 11, 0)\n",
|
||
"reposition(portugal_copy, portugal.name == 'Madeira', 6, 2, simplify=0.015)\n",
|
||
"\n",
|
||
"portugal_copy.plot(figsize=(8, 8))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "jJyypJbJt1LV"
|
||
},
|
||
"source": [
|
||
"#### Spain"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "bbyDCO0Qt1LV",
|
||
"outputId": "f2a0594d-999b-4573-d008-5158f898a1c6"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"spain = df[df.admin == 'Spain']\n",
|
||
"spain.plot()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "yJ_Ueh7Rt1LV",
|
||
"outputId": "16fe59db-4be4-4e02-d37b-3098bdfa945a"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"spain_copy = spain.copy()\n",
|
||
"\n",
|
||
"reposition(spain_copy, spain.name.isin(['Las Palmas', 'Santa Cruz de Tenerife']), 3, 7, 1, 1)\n",
|
||
"\n",
|
||
"spain_copy.plot(figsize=(8, 8))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "-SNb1b-Et1LV"
|
||
},
|
||
"source": [
|
||
"#### Russia"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "60UpJMNwt1LV",
|
||
"outputId": "1c9ff3fa-83e6-411e-9dc3-0c718ee97d39"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"russia = df[df.admin == 'Russia']\n",
|
||
"russia.plot()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "IOuQ_OzMt1LW"
|
||
},
|
||
"source": [
|
||
"- Russia looks off because of Chukchi runs across E180. We need to move the parts on the other side of the map to the right."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "rfBkQf78t1LW",
|
||
"outputId": "8342e4b8-2483-4aac-8a79-e88d455297e2",
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"def shift_geom(geom, cutoff=0):\n",
|
||
" border = shapely.geometry.LineString([(cutoff, -90), (cutoff, 90)])\n",
|
||
" splitted_geom = shapely.ops.split(geom, border)\n",
|
||
"\n",
|
||
" # Create a list to store moved geometries\n",
|
||
" moved_geom = []\n",
|
||
"\n",
|
||
" # Check if the split operation returned a GeometryCollection\n",
|
||
" if isinstance(splitted_geom, shapely.geometry.GeometryCollection):\n",
|
||
" # Iterate over each geometry in the GeometryCollection\n",
|
||
" for item in splitted_geom.geoms:\n",
|
||
" minx, miny, maxx, maxy = item.bounds\n",
|
||
" if minx < cutoff:\n",
|
||
" # Translate the geometry\n",
|
||
" moved_geom.append(shapely.affinity.translate(item, xoff=360 - cutoff))\n",
|
||
" else:\n",
|
||
" moved_geom.append(item)\n",
|
||
" else:\n",
|
||
" # If the result is not a GeometryCollection, it means no split occurred\n",
|
||
" moved_geom.append(geom)\n",
|
||
"\n",
|
||
" # Combine all moved geometries into a single geometry\n",
|
||
" return shapely.ops.unary_union(moved_geom)\n",
|
||
"\n",
|
||
"# Applying the function to the DataFrame\n",
|
||
"russia_copy = russia.copy()\n",
|
||
"russia_copy.loc[\n",
|
||
" russia.name == 'Chukchi Autonomous Okrug', 'geometry'\n",
|
||
"] = russia_copy.loc[\n",
|
||
" russia.name == 'Chukchi Autonomous Okrug', 'geometry'\n",
|
||
"].apply(shift_geom)\n",
|
||
"\n",
|
||
"# Plotting\n",
|
||
"russia_copy.plot(figsize=(20, 20))\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Turkey"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Turkey Regions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"turkey = df[df.admin == 'Turkey'][['iso_3166_2','geometry']]\n",
|
||
"turkey.plot()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# NUTS - 1 Codes for Turkey and correspong region - city names\n",
|
||
"\n",
|
||
"region_dict = {\n",
|
||
" 'TR1': ['TR-34'],\n",
|
||
" 'TR2': ['TR-59', 'TR-22', 'TR-39', 'TR-10', 'TR-17'],\n",
|
||
" 'TR3': ['TR-35', 'TR-09', 'TR-20', 'TR-48', 'TR-45', 'TR-03', 'TR-43', 'TR-64'],\n",
|
||
" 'TR4': ['TR-16', 'TR-26', 'TR-11', 'TR-41', 'TR-54', 'TR-81', 'TR-14', 'TR-77'],\n",
|
||
" 'TR5': ['TR-06', 'TR-42', 'TR-70'],\n",
|
||
" 'TR6': ['TR-07', 'TR-32', 'TR-15', 'TR-01', 'TR-33', 'TR-31', 'TR-46', 'TR-80'],\n",
|
||
" 'TR7': ['TR-71', 'TR-68', 'TR-51', 'TR-50', 'TR-40', 'TR-38', 'TR-58', 'TR-66'],\n",
|
||
" 'TR8': ['TR-67', 'TR-78', 'TR-74', 'TR-37', 'TR-18', 'TR-57', 'TR-55', 'TR-60', 'TR-19', 'TR-05'],\n",
|
||
" 'TR9': ['TR-61', 'TR-52', 'TR-28', 'TR-53', 'TR-08', 'TR-29'],\n",
|
||
" 'TRA': ['TR-25', 'TR-24', 'TR-69', 'TR-04', 'TR-36', 'TR-76', 'TR-75'],\n",
|
||
" 'TRB': ['TR-44', 'TR-23', 'TR-12', 'TR-62', 'TR-65', 'TR-49', 'TR-13', 'TR-30'],\n",
|
||
" 'TRC': ['TR-27', 'TR-02', 'TR-79', 'TR-63', 'TR-21', 'TR-47', 'TR-72', 'TR-73', 'TR-56']}\n",
|
||
"\n",
|
||
"# Region names corresponding to NUTS-1\n",
|
||
"\n",
|
||
"region_name_dict = {'TR1':'İstanbul',\n",
|
||
" 'TR2':'Batı Marmara',\n",
|
||
" 'TR3':'Ege',\n",
|
||
" 'TR4':'Doğu Marmara',\n",
|
||
" 'TR5':'Batı Anadolu',\n",
|
||
" 'TR6':'Akdeniz',\n",
|
||
" 'TR7':'Orta Anadolu',\n",
|
||
" 'TR8':'Batı Karadeniz',\n",
|
||
" 'TR9':'Doğu Karadeniz',\n",
|
||
" 'TRA':'Kuzeydoğu Anadolu',\n",
|
||
" 'TRC':'Güneydoğu Anadolu',\n",
|
||
" 'TRB':'Ortadoğu Anadolu'\n",
|
||
" }\n",
|
||
"\n",
|
||
"\n",
|
||
"def create_region_polygons(region_dict, turkey_gdf):\n",
|
||
" # Create a reverse dictionary where city codes map to region codes\n",
|
||
" city_to_region = {city_code: region_code for region_code, city_codes in region_dict.items() for city_code in city_codes}\n",
|
||
"\n",
|
||
" # Create a new column 'REGION' in the GeoDataFrame that maps each city to its region\n",
|
||
" turkey_gdf['REGION'] = turkey_gdf['iso_3166_2'].map(city_to_region)\n",
|
||
"\n",
|
||
" # Dissolve the GeoDataFrame on the 'REGION' column to combine city polygons into region polygons\n",
|
||
" region_gdf = turkey_gdf.dissolve(by='REGION')\n",
|
||
"\n",
|
||
" # Reset the index of the new GeoDataFrame\n",
|
||
" region_gdf.reset_index(inplace=True)\n",
|
||
" \n",
|
||
" return region_gdf.drop(columns=['iso_3166_2'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"turkey_regions = create_region_polygons(region_dict, turkey)\n",
|
||
"\n",
|
||
"# Rename 'REGION' column to 'ISO'\n",
|
||
"turkey_regions = turkey_regions.rename(columns={'REGION': 'iso_3166_2'})\n",
|
||
"\n",
|
||
"# Map the region_name_dict to a new 'NAME_1' column\n",
|
||
"turkey_regions['name'] = turkey_regions['iso_3166_2'].map(region_name_dict)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"turkey_regions.plot(figsize=(10, 7), edgecolor='black', column='name', legend=False, cmap='tab20')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "aYFQYe8-t1LW"
|
||
},
|
||
"source": [
|
||
"### France"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "AcT31Diyt1LW",
|
||
"outputId": "cd6cc6ef-43ba-478e-b183-84eb7e003e17"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"france = df[df.admin == 'France']\n",
|
||
"france.plot()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "p7Y4Vf6pt1LW"
|
||
},
|
||
"source": [
|
||
"Move the [Overseas departments and regions of France](https://en.wikipedia.org/wiki/Overseas_departments_and_regions_of_France) closer to mainland."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Fix some department names\n",
|
||
"\n",
|
||
"- Seien-et-Marne => Seine-et-Marne\n",
|
||
"- Haute-Rhin => Haut-Rhin"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def replace_name(df, old, new):\n",
|
||
" if old in list(df.name): \n",
|
||
" index = df[df.name == old].index[0]\n",
|
||
" df.at[index, 'name'] = new\n",
|
||
" \n",
|
||
"replace_name(france, 'Seien-et-Marne', 'Seine-et-Marne')\n",
|
||
"replace_name(france, 'Haute-Rhin', 'Haut-Rhin')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "yjKX9Pbbt1LW",
|
||
"outputId": "14caae01-b1b0-4775-a00e-a9e4f30fdf73"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"france_copy = france.copy()\n",
|
||
"reposition(france_copy, france.name=='Guadeloupe', 57.4, 25.4, 1.5, 1.5)\n",
|
||
"reposition(france_copy, france.name=='Martinique', 58.4, 27.1, 1.5, 1.5)\n",
|
||
"reposition(france_copy, france.name=='Guyane française', 52, 37.7, 0.35, 0.35)\n",
|
||
"reposition(france_copy, france.name=='La Réunion', -55, 62.8, 1.5, 1.5)\n",
|
||
"reposition(france_copy, france.name=='Mayotte', -43, 54.3, 1.5, 1.5)\n",
|
||
"\n",
|
||
"france_copy.plot(figsize=(8, 8))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### France Regions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"france_regions = france_copy[['geometry','region_cod','region']]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"france_regions = france_regions.dissolve(by=['region_cod', 'region']).reset_index()\n",
|
||
"\n",
|
||
"france_regions = france_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"france_regions.plot(figsize=(10, 7), edgecolor='black', column='iso_3166_2', legend=False, cmap='tab20')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Italy"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Italy Regions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"italy_regions = df[df.admin == 'Italy'][['geometry','region_cod','region']]\n",
|
||
"\n",
|
||
"italy_regions = italy_regions.dissolve(by=['region_cod', 'region']).reset_index()\n",
|
||
"\n",
|
||
"italy_regions = italy_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"italy_regions.plot(figsize=(10, 7), edgecolor='black', column='iso_3166_2', legend=False, cmap='tab20')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "d1T6jfJPt1LW"
|
||
},
|
||
"source": [
|
||
"#### Netherlands"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def apply_bounds(df, northwest, southeast):\n",
|
||
" x1, y1 = northwest\n",
|
||
" x2, y2 = southeast\n",
|
||
" boundry = shapely.geometry.Polygon([(x1, y1),(x1, y2), (x2, y2), (x2, y1)])\n",
|
||
" df = df.copy()\n",
|
||
" return df[df.geometry.apply(lambda x: boundry.contains(x))]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "IS5Gcxgct1LW",
|
||
"outputId": "b8dbb05f-4ca9-4884-83ac-a7c169a9830a"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"netherlands = df[df.admin == 'Netherlands']\n",
|
||
"netherlands.plot()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "nwUGucQ1t1LW",
|
||
"outputId": "26c5aede-c587-4d88-cfe0-30ecaec9ede3"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"netherlands_copy = apply_bounds(netherlands, (-20, 60), (20, 20))\n",
|
||
"netherlands_copy.plot(figsize=(8, 8))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "tTpJe28jt1LW"
|
||
},
|
||
"source": [
|
||
"#### UK"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "xfMx6gJmt1LW",
|
||
"outputId": "5278dfc3-3f51-4c21-84cc-922251b1d0cb"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"uk = df[df.admin == 'United Kingdom']\n",
|
||
"uk.plot()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "28VU40f9t1LW",
|
||
"outputId": "45585067-de13-4e02-8147-053ef0115d2d"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"uk_copy = apply_bounds(uk, (-10, 60), (20, 20))\n",
|
||
"uk_copy.plot(figsize=(8, 8))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "Fb58eGlIt1LW"
|
||
},
|
||
"source": [
|
||
"## Output GeoJSON"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "5xOVyzXCt1LW"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"alt_maps = {\n",
|
||
" \"finland\": finland_copy,\n",
|
||
" \"china\": china_copy,\n",
|
||
" \"usa\": usa_copy,\n",
|
||
" \"france\": france_copy,\n",
|
||
" \"france_regions\": france_regions,\n",
|
||
" \"turkey_regions\": turkey_regions,\n",
|
||
" \"italy_regions\": italy_regions,\n",
|
||
" \"netherlands\": netherlands_copy,\n",
|
||
" \"norway\": norway_copy,\n",
|
||
" \"uk\": uk_copy,\n",
|
||
" \"russia\": russia_copy,\n",
|
||
" \"spain\": spain_copy,\n",
|
||
" \"portugal\": portugal_copy,\n",
|
||
"}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "tM1F5d0Vt1LW",
|
||
"outputId": "75abad9b-9442-4279-d66d-a0cd5fb97198"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"plot_all_countries()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "8U3S1PUbt1LW",
|
||
"outputId": "cfb8d229-ffdf-473f-d516-6aa136e41a60",
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"simplify_factors = {\n",
|
||
" \"uk\": 0.005,\n",
|
||
"}\n",
|
||
"useful_columns = [\"ISO\", \"NAME_1\", \"geometry\"]\n",
|
||
"\n",
|
||
"def get_simplify_factor_by_size(gdf):\n",
|
||
" xmin, ymin, xmax, ymax = shapely.ops.unary_union(gdf[\"geometry\"]).bounds\n",
|
||
" size = (xmax - xmin) * (ymax - ymin)\n",
|
||
" print(\"Size\", round(size, 3), end=\"\\t\")\n",
|
||
" if size > 1000: return 0.03\n",
|
||
" if size > 300: return 0.02\n",
|
||
" if size > 100: return 0.01\n",
|
||
" return 0\n",
|
||
"\n",
|
||
"def simplify_if_needed(country, gdf):\n",
|
||
" \"\"\"Simplify the maps based on country size\"\"\"\n",
|
||
" country_alias = country_name_aliases.get(country, country)\n",
|
||
" if country_alias in df_50m[\"admin\"].str.lower().unique():\n",
|
||
" return\n",
|
||
"\n",
|
||
" factor = simplify_factors.get(country) or get_simplify_factor_by_size(gdf)\n",
|
||
"\n",
|
||
" if factor:\n",
|
||
" gdf[\"geometry\"] = gdf.simplify(factor)\n",
|
||
"\n",
|
||
"def save_geojson(country):\n",
|
||
" if country in custom_countries:\n",
|
||
" shutil.copy(f\"../src/countries_custom/{country}.geojson\", f\"../src/countries/{country}.geojson\")\n",
|
||
" else:\n",
|
||
" gdf = get_gdf(country)\n",
|
||
" print(country, end=\"\\t\")\n",
|
||
" \n",
|
||
" # For backward compatibility\n",
|
||
" gdf[\"ISO\"] = gdf[\"iso_3166_2\"]\n",
|
||
" gdf[\"NAME_1\"] = gdf[\"name\"]\n",
|
||
" \n",
|
||
" simplify_if_needed(country, gdf)\n",
|
||
" \n",
|
||
" print(f'Saving geojson for {country}...')\n",
|
||
" filename_country = country.replace(' ', '_')\n",
|
||
" gdf[useful_columns].to_file(f\"../src/countries/{filename_country}.geojson\", driver=\"GeoJSON\")\n",
|
||
"\n",
|
||
"for country in countries:\n",
|
||
" save_geojson(country)\n",
|
||
"\n",
|
||
"print(\"Done. \")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "Fb58eGlIt1LW"
|
||
},
|
||
"source": [
|
||
"## Output Typescript"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Function to convert country name to a valid JavaScript identifier\n",
|
||
"def to_js_identifier(name):\n",
|
||
" return name.replace(' ', '_').replace('-', '_')\n",
|
||
"\n",
|
||
"# License boilerplate\n",
|
||
"license_boilerplate = \"\"\"/*\n",
|
||
" * Licensed to the Apache Software Foundation (ASF) under one\n",
|
||
" * or more contributor license agreements. See the NOTICE file\n",
|
||
" * distributed with this work for additional information\n",
|
||
" * regarding copyright ownership. The ASF licenses this file\n",
|
||
" * to you under the Apache License, Version 2.0 (the\n",
|
||
" * \"License\"); you may not use this file except in compliance\n",
|
||
" * with the License. You may obtain a copy of the License at\n",
|
||
" *\n",
|
||
" * http://www.apache.org/licenses/LICENSE-2.0\n",
|
||
" *\n",
|
||
" * Unless required by applicable law or agreed to in writing,\n",
|
||
" * software distributed under the License is distributed on an\n",
|
||
" * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
|
||
" * KIND, either express or implied. See the License for the\n",
|
||
" * specific language governing permissions and limitations\n",
|
||
" * under the License.\n",
|
||
" */\n",
|
||
"\"\"\"\n",
|
||
"\n",
|
||
"# Generate TypeScript import statements\n",
|
||
"imports = \"\\n\".join([f\"import {to_js_identifier(country)} from './countries/{to_js_identifier(country)}.geojson';\" for country in countries])\n",
|
||
"\n",
|
||
"# Generate the export object\n",
|
||
"exports = \"export const countries = {\\n \" + \",\\n \".join([to_js_identifier(country) for country in countries]) + \",\\n};\"\n",
|
||
"\n",
|
||
"# Additional exports\n",
|
||
"additional_exports = \"\"\"\n",
|
||
"export const countryOptions = Object.keys(countries).map(x => {\n",
|
||
" if (x === 'uk' || x === 'usa') {\n",
|
||
" return [x, x.toUpperCase()];\n",
|
||
" }\n",
|
||
" if (x === 'italy_regions') {\n",
|
||
" return [x, 'Italy (regions)'];\n",
|
||
" }\n",
|
||
" if (x === 'france_regions') {\n",
|
||
" return [x, 'France (regions)'];\n",
|
||
" }\n",
|
||
" if (x === 'turkey_regions') {\n",
|
||
" return [x, 'Turkey (regions)'];\n",
|
||
" }\n",
|
||
" return [\n",
|
||
" x,\n",
|
||
" x\n",
|
||
" .split('_')\n",
|
||
" .map(e => e[0].toUpperCase() + e.slice(1))\n",
|
||
" .join(' '),\n",
|
||
" ];\n",
|
||
"});\n",
|
||
"\n",
|
||
"export default countries;\n",
|
||
"\"\"\"\n",
|
||
"\n",
|
||
"# Combine license, imports, exports, and additional exports\n",
|
||
"typescript_code = f\"{license_boilerplate}\\n{imports}\\n\\n{exports}\\n{additional_exports}\"\n",
|
||
"\n",
|
||
"# Write to a file\n",
|
||
"with open(\"../src/countries.ts\", \"w\") as file:\n",
|
||
" file.write(typescript_code)\n",
|
||
"\n",
|
||
"print(\"TypeScript code written to src/countries.ts\")"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"provenance": []
|
||
},
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.8"
|
||
},
|
||
"vscode": {
|
||
"interpreter": {
|
||
"hash": "bd385fe162c5ca0c84973b7dd5c518456272446b2b64e67c2a69f949ca7a1754"
|
||
}
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|