{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Generate GeoJSON from Natural Earth Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Install Dependencies" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```\n", "pip install geopandas shapely matplotlib\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download Data\n", "\n", "Download datasets (_Admin 0 - Countries_ in [1:10](https://www.naturalearthdata.com/downloads/10m-cultural-vectors/), and _Admin 1 – States, Provinces_ in 1:10 and [1:50](https://www.naturalearthdata.com/downloads/50m-cultural-vectors/)) from Natural Earch Data:" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Skip ne_10m_admin_0_countries.zip because it already exists\n", "Skip ne_10m_admin_1_states_provinces.zip because it already exists\n", "Skip ne_50m_admin_1_states_provinces.zip because it already exists\n", "Done. \n" ] } ], "source": [ "import os\n", "import requests\n", "\n", "data_dir = os.path.expanduser(\"~/Downloads\")\n", "if not os.path.exists(data_dir):\n", " os.mkdir(data_dir)\n", "\n", "def download_files(skip_existing=True):\n", " for url in [\n", " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip\",\n", " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_1_states_provinces.zip\",\n", " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_1_states_provinces.zip\"\n", " ]:\n", " file_name = url.split('/')[-1]\n", " full_file_name = f'{data_dir}/{file_name}'\n", " with requests.get(\n", " url,\n", " headers={\n", " \"accept-encoding\": \"gzip, deflate, br\",\n", " \"user-agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36\"\n", " },\n", " stream=True,\n", " ) as res:\n", " file_size = int(res.headers['content-length'])\n", " if res.status_code != 200:\n", " print(\"Error downloading files. Please open the URL to download them from browser manually.\")\n", " break\n", " if (\n", " skip_existing and\n", " os.path.exists(full_file_name) and\n", " file_size == os.path.getsize(full_file_name)\n", " ):\n", " print(f\"Skip {file_name} because it already exists\")\n", " continue\n", " print(f\"Downloading {file_name}... \\r\", end=\"\")\n", " with open(full_file_name, \"wb\") as fh:\n", " fh.write(res.content)\n", " print(\"Done. \")\n", "\n", "download_files(skip_existing=True)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "import os\n", "import geopandas as gpd\n", "import matplotlib.pyplot as plt\n", "import shapely\n", "\n", "df_admin0_10m = gpd.read_file(f\"{data_dir}/ne_10m_admin_0_countries.zip\")\n", "df_10m = gpd.read_file(f\"{data_dir}/ne_10m_admin_1_states_provinces.zip\")\n", "df_50m = gpd.read_file(f\"{data_dir}/ne_50m_admin_1_states_provinces.zip\")" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['featurecla', 'scalerank', 'adm1_code', 'diss_me', 'iso_3166_2',\n", " 'wikipedia', 'iso_a2', 'adm0_sr', 'name', 'name_alt',\n", " ...\n", " 'FCLASS_ID', 'FCLASS_PL', 'FCLASS_GR', 'FCLASS_IT', 'FCLASS_NL',\n", " 'FCLASS_SE', 'FCLASS_BD', 'FCLASS_UA', 'FCLASS_TLC', 'geometry'],\n", " dtype='object', length=122)" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_50m.columns" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['featurecla', 'scalerank', 'labelrank', 'sovereignt', 'sov_a3',\n", " 'adm0_dif', 'level', 'type', 'tlc', 'admin',\n", " ...\n", " 'fclass_tr', 'fclass_id', 'fclass_pl', 'fclass_gr', 'fclass_it',\n", " 'fclass_nl', 'fclass_se', 'fclass_bd', 'fclass_ua', 'geometry'],\n", " dtype='object', length=169)" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_admin0_10m.columns = df_admin0_10m.columns.str.lower()\n", "df_admin0_10m.columns" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | featurecla | \n", "scalerank | \n", "adm1_code | \n", "diss_me | \n", "iso_3166_2 | \n", "wikipedia | \n", "iso_a2 | \n", "adm0_sr | \n", "name | \n", "name_alt | \n", "... | \n", "FCLASS_ID | \n", "FCLASS_PL | \n", "FCLASS_GR | \n", "FCLASS_IT | \n", "FCLASS_NL | \n", "FCLASS_SE | \n", "FCLASS_BD | \n", "FCLASS_UA | \n", "FCLASS_TLC | \n", "geometry | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
admin | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
Australia | \n", "9 | \n", "9 | \n", "9 | \n", "9 | \n", "9 | \n", "0 | \n", "9 | \n", "9 | \n", "9 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "9 | \n", "
Brazil | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "0 | \n", "27 | \n", "27 | \n", "27 | \n", "13 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "27 | \n", "
Canada | \n", "13 | \n", "13 | \n", "13 | \n", "13 | \n", "13 | \n", "13 | \n", "13 | \n", "13 | \n", "13 | \n", "9 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "13 | \n", "
China | \n", "31 | \n", "31 | \n", "31 | \n", "31 | \n", "31 | \n", "0 | \n", "31 | \n", "31 | \n", "31 | \n", "30 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "31 | \n", "
India | \n", "36 | \n", "36 | \n", "36 | \n", "36 | \n", "36 | \n", "0 | \n", "36 | \n", "36 | \n", "36 | \n", "13 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "36 | \n", "
Indonesia | \n", "33 | \n", "33 | \n", "33 | \n", "33 | \n", "33 | \n", "0 | \n", "33 | \n", "33 | \n", "33 | \n", "30 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "33 | \n", "
Russia | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "1 | \n", "85 | \n", "85 | \n", "85 | \n", "84 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "85 | \n", "
South Africa | \n", "9 | \n", "9 | \n", "9 | \n", "9 | \n", "9 | \n", "0 | \n", "9 | \n", "9 | \n", "9 | \n", "9 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "9 | \n", "
United States of America | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "51 | \n", "
9 rows × 121 columns
\n", "\n", " | featurecla | \n", "scalerank | \n", "labelrank | \n", "sov_a3 | \n", "type | \n", "admin | \n", "adm0_a3 | \n", "gu_a3 | \n", "name | \n", "abbrev | \n", "... | \n", "name_sv | \n", "name_tr | \n", "name_uk | \n", "name_ur | \n", "name_vi | \n", "name_zh_x | \n", "name_zht | \n", "geometry | \n", "name_zh_y | \n", "iso_3166_2 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Admin-0 country | \n", "0 | \n", "4 | \n", "CH1 | \n", "Country | \n", "Hong Kong S.A.R. | \n", "HKG | \n", "HKG | \n", "Hong Kong | \n", "H.K. | \n", "... | \n", "Hongkong | \n", "Hong Kong | \n", "Гонконг | \n", "ہانگ کانگ | \n", "Hồng Kông | \n", "香港 | \n", "香港 | \n", "MULTIPOLYGON (((114.22983 22.55581, 114.23471 ... | \n", "香港特别行政区 | \n", "CN-91 | \n", "
1 | \n", "Admin-0 country | \n", "0 | \n", "3 | \n", "TWN | \n", "Sovereign country | \n", "Taiwan | \n", "TWN | \n", "TWN | \n", "Taiwan | \n", "Taiwan | \n", "... | \n", "Taiwan | \n", "Çin Cumhuriyeti | \n", "Республіка Китай | \n", "تائیوان | \n", "Đài Loan | \n", "中华民国 | \n", "中華民國 | \n", "MULTIPOLYGON (((121.90577 24.95010, 121.83473 ... | \n", "中国台湾 | \n", "CN-71 | \n", "
2 | \n", "Admin-0 country | \n", "0 | \n", "4 | \n", "CH1 | \n", "Country | \n", "Macao S.A.R | \n", "MAC | \n", "MAC | \n", "Macao | \n", "Mac. | \n", "... | \n", "Macao | \n", "Makao | \n", "Аоминь | \n", "مکاؤ | \n", "Ma Cao | \n", "澳门 | \n", "澳門 | \n", "MULTIPOLYGON (((113.55860 22.16303, 113.56943 ... | \n", "澳门特别行政区 | \n", "CN-92 | \n", "
3 rows × 51 columns
\n", "