import boto3
import configparser
import os
import urllib3

import folium
import geopandas as gpd

import tempfile
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 1
----> 1 import boto3
      2 import configparser
      3 import os

ModuleNotFoundError: No module named 'boto3'
urllib3.disable_warnings()

Connection with S3 Bucket#

All NUTS datasets are available on S3 Bucket. Below configuration allows to list and download defined datasets from there.

def s3_connection(credentials: dict) -> boto3.session.Session:
    """Establishes a connection to an S3 bucket.

    Args:
        credentials (dict): A dictionary containing AWS S3 credentials with keys 
                            'host_base', 'access_key', and 'secret_key'.

    Returns:
        boto3.session.Session: A boto3 session client configured with the provided 
                               credentials for interacting with the S3 service.
    """
    s3 = boto3.client('s3',
                      endpoint_url=credentials['host_base'],
                      aws_access_key_id=credentials['access_key'],
                      aws_secret_access_key=credentials['secret_key'],
                      use_ssl=True,
                      verify=False)
    return s3

# Load s3 credentials
config = configparser.ConfigParser()
config.read('/home/eouser/.s3cfg')
credentials = dict(config['default'].items())

# Connection with S3 eodata
s3 = s3_connection(credentials)

Browsing S3 bucket content#

response = s3.list_objects_v2(Bucket='ESTAT', Prefix='NUTS')
if 'Contents' in response:
    print("Objects in bucket:")
    # Iterate over each object
    for obj in response['Contents']:
        print(obj['Key'])
else:
    print("No objects found in the bucket.")
Objects in bucket:
NUTS/2021/NUTS_RG_01M_2021_3035.shp.zip
NUTS/2021/NUTS_RG_01M_2021_3857.shp.zip
NUTS/2021/NUTS_RG_01M_2021_4326.shp.zip
NUTS/2024/NUTS_RG_01M_2024_3035.shp.zip
NUTS/2024/NUTS_RG_01M_2024_3857.shp.zip
NUTS/2024/NUTS_RG_01M_2024_4326.shp.zip

Reading single vector file to GeoDataFrame#

object_path = 'NUTS/2021/NUTS_RG_01M_2021_3035.shp.zip'

# Create a temporary directory to store the shapefile
with tempfile.TemporaryDirectory() as tmpdirname:
    # Define local path to save shapefile
    local_shapefile_path = os.path.join(tmpdirname, object_path.split('/')[-1])

    # Download the shapefile from S3
    s3.download_file('ESTAT', object_path, local_shapefile_path)

    # Read the shapefile into a GeoDataFrame
    gdf = gpd.read_file(local_shapefile_path)
# Geodata parameters
print(gdf.info())
print('----')
print(f'Coordinate system: {gdf.crs}')
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 2010 entries, 0 to 2009
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   NUTS_ID     2010 non-null   object  
 1   LEVL_CODE   2010 non-null   int32   
 2   CNTR_CODE   2010 non-null   object  
 3   NAME_LATN   2010 non-null   object  
 4   NUTS_NAME   2010 non-null   object  
 5   MOUNT_TYPE  2009 non-null   float64 
 6   URBN_TYPE   2010 non-null   int32   
 7   COAST_TYPE  2010 non-null   int32   
 8   geometry    2010 non-null   geometry
dtypes: float64(1), geometry(1), int32(3), object(4)
memory usage: 117.9+ KB
None
----
Coordinate system: EPSG:3035
gdf.head()
NUTS_ID LEVL_CODE CNTR_CODE NAME_LATN NUTS_NAME MOUNT_TYPE URBN_TYPE COAST_TYPE geometry
0 AL 0 AL Shqipëria Shqipëria 0.0 0 0 MULTIPOLYGON (((5121233.536 2221719.441, 51208...
1 CZ 0 CZ Česko Česko 0.0 0 0 POLYGON ((4624843.654 3112209.741, 4625546.618...
2 DE 0 DE Deutschland Deutschland 0.0 0 0 MULTIPOLYGON (((4355225.365 2715902.993, 43541...
3 DK 0 DK Danmark Danmark 0.0 0 0 MULTIPOLYGON (((4650502.736 3591342.844, 46503...
4 CY 0 CY Kýpros Κύπρος 0.0 0 0 MULTIPOLYGON (((6527040.718 1762367.593, 65267...

GeoDataFrame explanation#

GeoDataFrame inherits most of pandas DataFrame methods. That allows to work with GeoDataFrame on the same way.

# Filtering unique values of attribute
gdf['LEVL_CODE'].unique()
array([0, 1, 3, 2], dtype=int32)
# Filtering records of defined NUT Level
gdf[gdf['LEVL_CODE']==3].head()
NUTS_ID LEVL_CODE CNTR_CODE NAME_LATN NUTS_NAME MOUNT_TYPE URBN_TYPE COAST_TYPE geometry
93 HR064 3 HR Krapinsko-zagorska županija Krapinsko-zagorska županija 4.0 3 3 POLYGON ((4774720.63 2587061.468, 4774957.107 ...
342 DE933 3 DE Harburg Harburg 4.0 2 3 MULTIPOLYGON (((4306636.876 3376936.96, 430661...
343 DE934 3 DE Lüchow-Dannenberg Lüchow-Dannenberg 4.0 3 3 POLYGON ((4385936.052 3343753.673, 4388637.334...
344 BG314 3 BG Pleven Плевен 4.0 2 3 POLYGON ((5499886.087 2403686.082, 5500149.527...
345 BG331 3 BG Varna Варна 4.0 2 1 POLYGON ((5717444.811 2440210.249, 5718627.744...

Displaying geometries on basemap#

To display vector geometry on map we recommend folium. Folium allows displaying different types of geometries like Polygons, Lines and Points.
IMPORTANT: Each geometry presenting on map must be transformed to EPSG:4326 coordinates system

# Filtering single polygon
single_polygon = gdf.to_crs(4326).loc[1, 'geometry']
# Create a Folium map centered around the polygon
m = folium.Map(location=[single_polygon.centroid.y, single_polygon.centroid.x], zoom_start=7)

# Add the polygon to the map
folium.GeoJson(single_polygon).add_to(m)

# Map
m
Make this Notebook Trusted to load map: File -> Trust Notebook
# Filtering many polygons
gdf_filter = gdf[gdf['NUTS_ID'].str.startswith('HU')]
# Add the polygons to the map

m1 = folium.Map(location=[50.118453, 6.420942], zoom_start=5)

for _, r in gdf_filter.to_crs(4326).iterrows():
    sim_geo = gpd.GeoSeries(r["geometry"]).simplify(tolerance=0.001)
    geo_j = sim_geo.to_json()
    geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "orange"})
    folium.Popup(r["NUTS_NAME"]).add_to(geo_j)
    geo_j.add_to(m1)

m1
Make this Notebook Trusted to load map: File -> Trust Notebook