import boto3
import configparser
import os
import urllib3
import folium
import geopandas as gpd
import tempfile
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 1
----> 1 import boto3
2 import configparser
3 import os
ModuleNotFoundError: No module named 'boto3'
urllib3.disable_warnings()
Connection with S3 Bucket#
All NUTS datasets are available on S3 Bucket. Below configuration allows to list and download defined datasets from there.
def s3_connection(credentials: dict) -> boto3.session.Session:
"""Establishes a connection to an S3 bucket.
Args:
credentials (dict): A dictionary containing AWS S3 credentials with keys
'host_base', 'access_key', and 'secret_key'.
Returns:
boto3.session.Session: A boto3 session client configured with the provided
credentials for interacting with the S3 service.
"""
s3 = boto3.client('s3',
endpoint_url=credentials['host_base'],
aws_access_key_id=credentials['access_key'],
aws_secret_access_key=credentials['secret_key'],
use_ssl=True,
verify=False)
return s3
# Load s3 credentials
config = configparser.ConfigParser()
config.read('/home/eouser/.s3cfg')
credentials = dict(config['default'].items())
# Connection with S3 eodata
s3 = s3_connection(credentials)
Browsing S3 bucket content#
response = s3.list_objects_v2(Bucket='ESTAT', Prefix='NUTS')
if 'Contents' in response:
print("Objects in bucket:")
# Iterate over each object
for obj in response['Contents']:
print(obj['Key'])
else:
print("No objects found in the bucket.")
Objects in bucket:
NUTS/2021/NUTS_RG_01M_2021_3035.shp.zip
NUTS/2021/NUTS_RG_01M_2021_3857.shp.zip
NUTS/2021/NUTS_RG_01M_2021_4326.shp.zip
NUTS/2024/NUTS_RG_01M_2024_3035.shp.zip
NUTS/2024/NUTS_RG_01M_2024_3857.shp.zip
NUTS/2024/NUTS_RG_01M_2024_4326.shp.zip
Reading single vector file to GeoDataFrame#
object_path = 'NUTS/2021/NUTS_RG_01M_2021_3035.shp.zip'
# Create a temporary directory to store the shapefile
with tempfile.TemporaryDirectory() as tmpdirname:
# Define local path to save shapefile
local_shapefile_path = os.path.join(tmpdirname, object_path.split('/')[-1])
# Download the shapefile from S3
s3.download_file('ESTAT', object_path, local_shapefile_path)
# Read the shapefile into a GeoDataFrame
gdf = gpd.read_file(local_shapefile_path)
# Geodata parameters
print(gdf.info())
print('----')
print(f'Coordinate system: {gdf.crs}')
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 2010 entries, 0 to 2009
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 NUTS_ID 2010 non-null object
1 LEVL_CODE 2010 non-null int32
2 CNTR_CODE 2010 non-null object
3 NAME_LATN 2010 non-null object
4 NUTS_NAME 2010 non-null object
5 MOUNT_TYPE 2009 non-null float64
6 URBN_TYPE 2010 non-null int32
7 COAST_TYPE 2010 non-null int32
8 geometry 2010 non-null geometry
dtypes: float64(1), geometry(1), int32(3), object(4)
memory usage: 117.9+ KB
None
----
Coordinate system: EPSG:3035
gdf.head()
NUTS_ID | LEVL_CODE | CNTR_CODE | NAME_LATN | NUTS_NAME | MOUNT_TYPE | URBN_TYPE | COAST_TYPE | geometry | |
---|---|---|---|---|---|---|---|---|---|
0 | AL | 0 | AL | Shqipëria | Shqipëria | 0.0 | 0 | 0 | MULTIPOLYGON (((5121233.536 2221719.441, 51208... |
1 | CZ | 0 | CZ | Česko | Česko | 0.0 | 0 | 0 | POLYGON ((4624843.654 3112209.741, 4625546.618... |
2 | DE | 0 | DE | Deutschland | Deutschland | 0.0 | 0 | 0 | MULTIPOLYGON (((4355225.365 2715902.993, 43541... |
3 | DK | 0 | DK | Danmark | Danmark | 0.0 | 0 | 0 | MULTIPOLYGON (((4650502.736 3591342.844, 46503... |
4 | CY | 0 | CY | Kýpros | Κύπρος | 0.0 | 0 | 0 | MULTIPOLYGON (((6527040.718 1762367.593, 65267... |
GeoDataFrame explanation#
GeoDataFrame inherits most of pandas DataFrame methods. That allows to work with GeoDataFrame on the same way.
# Filtering unique values of attribute
gdf['LEVL_CODE'].unique()
array([0, 1, 3, 2], dtype=int32)
# Filtering records of defined NUT Level
gdf[gdf['LEVL_CODE']==3].head()
NUTS_ID | LEVL_CODE | CNTR_CODE | NAME_LATN | NUTS_NAME | MOUNT_TYPE | URBN_TYPE | COAST_TYPE | geometry | |
---|---|---|---|---|---|---|---|---|---|
93 | HR064 | 3 | HR | Krapinsko-zagorska županija | Krapinsko-zagorska županija | 4.0 | 3 | 3 | POLYGON ((4774720.63 2587061.468, 4774957.107 ... |
342 | DE933 | 3 | DE | Harburg | Harburg | 4.0 | 2 | 3 | MULTIPOLYGON (((4306636.876 3376936.96, 430661... |
343 | DE934 | 3 | DE | Lüchow-Dannenberg | Lüchow-Dannenberg | 4.0 | 3 | 3 | POLYGON ((4385936.052 3343753.673, 4388637.334... |
344 | BG314 | 3 | BG | Pleven | Плевен | 4.0 | 2 | 3 | POLYGON ((5499886.087 2403686.082, 5500149.527... |
345 | BG331 | 3 | BG | Varna | Варна | 4.0 | 2 | 1 | POLYGON ((5717444.811 2440210.249, 5718627.744... |
Displaying geometries on basemap#
To display vector geometry on map we recommend folium. Folium allows displaying different types of geometries like Polygons, Lines and Points.
IMPORTANT: Each geometry presenting on map must be transformed to EPSG:4326 coordinates system
# Filtering single polygon
single_polygon = gdf.to_crs(4326).loc[1, 'geometry']
# Create a Folium map centered around the polygon
m = folium.Map(location=[single_polygon.centroid.y, single_polygon.centroid.x], zoom_start=7)
# Add the polygon to the map
folium.GeoJson(single_polygon).add_to(m)
# Map
m
Make this Notebook Trusted to load map: File -> Trust Notebook
# Filtering many polygons
gdf_filter = gdf[gdf['NUTS_ID'].str.startswith('HU')]
# Add the polygons to the map
m1 = folium.Map(location=[50.118453, 6.420942], zoom_start=5)
for _, r in gdf_filter.to_crs(4326).iterrows():
sim_geo = gpd.GeoSeries(r["geometry"]).simplify(tolerance=0.001)
geo_j = sim_geo.to_json()
geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "orange"})
folium.Popup(r["NUTS_NAME"]).add_to(geo_j)
geo_j.add_to(m1)
m1
Make this Notebook Trusted to load map: File -> Trust Notebook