import boto3
import configparser
import os
import urllib3

import folium
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.plot import show
import numpy as np
from matplotlib import pyplot

import tempfile
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 1
----> 1 import boto3
      2 import configparser
      3 import os

ModuleNotFoundError: No module named 'boto3'
urllib3.disable_warnings()

Connection with S3 Bucket#

All GISCO Reference Grid datasets are available on S3 Bucket. Below configuration allows to list and download defined datasets from there.

def s3_connection(credentials: dict) -> boto3.session.Session:
    """Establishes a connection to an S3 bucket.

    Args:
        credentials (dict): A dictionary containing AWS S3 credentials with keys 
                            'host_base', 'access_key', and 'secret_key'.

    Returns:
        boto3.session.Session: A boto3 session client configured with the provided 
                               credentials for interacting with the S3 service.
    """
    s3 = boto3.client('s3',
                      endpoint_url=credentials['host_base'],
                      aws_access_key_id=credentials['access_key'],
                      aws_secret_access_key=credentials['secret_key'],
                      use_ssl=True,
                      verify=False)
    return s3

# Load s3 credentials
config = configparser.ConfigParser()
config.read('/home/eouser/.s3cfg')
credentials = dict(config['default'].items())

# Connection with S3 eodata
s3 = s3_connection(credentials)

Browsing S3 bucket content#

response = s3.list_objects_v2(Bucket='ESTAT', Prefix='Natura_2000/2022/')
if 'Contents' in response:
    print("Objects in bucket:")
    # Iterate over each object
    for obj in response['Contents']:
        print(obj['Key'])
else:
    print("No objects found in the bucket.")
Objects in bucket:
Natura_2000/2022/Natura2000_end2022_rev1.gpkg

Reading vector file to GeoDataFrame#

As Natura2000_end2022_rev1.gpkg contains more than one layer, it is recommended to list all layer names and use the specific name to load data from the desired layer. By default, GeoPandas will load the layer containing the geometry attribute.

def list_geopackage_layers(object_path):
    # Create a temporary directory to store GeoPackage file
    with tempfile.TemporaryDirectory() as tmpdirname:
        # Define local path to save GeoPackage file
        local_geopackage_path = os.path.join(tmpdirname, object_path.split('/')[-1])
    
        # Download the GeoPackage from S3
        s3.download_file('ESTAT', object_path, local_geopackage_path)
    
        # Print all available layers
        print(gpd.list_layers(local_geopackage_path))
        

def read_geopackage_layer(object_path,layer_name=None):
    # Create a temporary directory to store GeoPackage file
    with tempfile.TemporaryDirectory() as tmpdirname:
        # Define local path to save GeoPackage file
        local_geopackage_path = os.path.join(tmpdirname, object_path.split('/')[-1])
    
        # Download the GeoPackage from S3
        s3.download_file('ESTAT', object_path, local_geopackage_path)
    
        # Read the GeoPackage into a GeoDataFrame
        return gpd.read_file(local_geopackage_path, layer=layer_name)
object_path = 'Natura_2000/2022/Natura2000_end2022_rev1.gpkg'

# Listing all available layers
list_geopackage_layers(object_path)
                  name geometry_type
0   NaturaSite_polygon       Unknown
1            BIOREGION          None
2    DESIGNATIONSTATUS          None
3             HABITATS          None
4         HABITATCLASS          None
5      NATURA2000SITES          None
6         OTHERSPECIES          None
7             METADATA          None
8               IMPACT          None
9           MANAGEMENT          None
10             SPECIES          None
# Reading layer with geometry
gdf = read_geopackage_layer(object_path,layer_name='NaturaSite_polygon')
# Geodata parameters
print(gdf.info())
print('----')
print(f'Coordinate system: {gdf.crs}')
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 27193 entries, 0 to 27192
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   SITECODE    27193 non-null  object  
 1   SITENAME    27193 non-null  object  
 2   MS          27193 non-null  object  
 3   SITETYPE    27193 non-null  object  
 4   INSPIRE_ID  24408 non-null  object  
 5   geometry    27193 non-null  geometry
dtypes: geometry(1), object(5)
memory usage: 1.2+ MB
None
----
Coordinate system: EPSG:3035
gdf.head()
SITECODE SITENAME MS SITETYPE INSPIRE_ID geometry
0 DE7016401 Kälberklamm und Hasenklamm DE A POLYGON ((4207310.759 2871325.563, 4207347.631...
1 DE7017341 Pfinzgau Ost DE B MULTIPOLYGON (((4218181.292 2865435.762, 42181...
2 DE7017342 Pfinzgau West DE B MULTIPOLYGON (((4211759.368 2879842.953, 42117...
3 DE7018341 Stromberg DE B MULTIPOLYGON (((4229153.06 2876063.581, 422913...
4 DE7018342 Enztal bei Mühlacker DE B MULTIPOLYGON (((4228619.767 2869820.254, 42286...

GeoDataFrame explanation#

GeoDataFrame inherits most of pandas DataFrame methods. That allows to work with GeoDataFrame on the same way.

# Filtering records based on attribute value 
gdf[gdf['MS']=='PL'].head()
SITECODE SITENAME MS SITETYPE INSPIRE_ID geometry
685 PLH100034 Wola Cyrusowa PL B PL.ZIPOP.1393.N2K.PLH100034 POLYGON ((4989522.379 3240766.369, 4989495.378...
1444 PLH120001 Ostoja Babiogórska PL B PL.ZIPOP.1393.N2K.PLH120001 POLYGON ((5008593.156 2989585.39, 5008554.061 ...
1445 PLH120002 Czarna Orawa PL B PL.ZIPOP.1393.N2K.PLH120002 MULTIPOLYGON (((5018457.3 2970753.064, 5018457...
1446 PLH120004 Dolina Prądnika PL B PL.ZIPOP.1393.N2K.PLH120004 MULTIPOLYGON (((5022821.97 3053173.79, 5022795...
1447 PLH120005 Dolinki Jurajskie PL B PL.ZIPOP.1393.N2K.PLH120005 MULTIPOLYGON (((5007291.249 3052406.174, 50072...

Displaying geometries on basemap#

To display vector geometry on map we recommend folium. Folium allows displaying different types of geometries like Polygons, Lines and Points.
IMPORTANT: Each geometry presenting on map must be transformed to EPSG:4326 coordinates system

# Filtering 10 biggest polygons for Poland
gdf_filter = gdf[gdf['MS']=='PL']
gdf_filter['area'] = gdf_filter.geometry.area
gdf_filter = gdf_filter.sort_values(by='area', ascending=False).head(10)
/opt/jupyterhub/lib/python3.10/site-packages/geopandas/geodataframe.py:1819: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
gdf_filter
SITECODE SITENAME MS SITETYPE INSPIRE_ID geometry area
12501 PLB220009 Bory Tucholskie PL A PL.ZIPOP.1393.N2K.PLB220009 POLYGON ((4815713.67 3426933.566, 4815733.24 3... 3.229529e+09
12545 PLB990003 Zatoka Pomorska PL A PL.ZIPOP.1393.N2K.PLB990003 POLYGON ((4720807.256 3491939.352, 4720659.49 ... 3.090660e+09
13321 PLH990002 Ostoja na Zatoce Pomorskiej PL B PL.ZIPOP.1393.N2K.PLH990002 POLYGON ((4634753.162 3494270.27, 4634553.584 ... 2.429501e+09
12544 PLB990002 Przybrzeżne wody Bałtyku PL A PL.ZIPOP.1393.N2K.PLB990002 POLYGON ((4804471.794 3542906.122, 4804505.422... 1.948361e+09
12540 PLB320016 Lasy Puszczy nad Drawą PL A PL.ZIPOP.1393.N2K.PLB320016 POLYGON ((4706793.874 3371722.24, 4706746.606 ... 1.903382e+09
12526 PLB300015 Puszcza Notecka PL A PL.ZIPOP.1393.N2K.PLB300015 MULTIPOLYGON (((4739320.429 3326280.584, 47393... 1.783370e+09
12510 PLB280008 Puszcza Piska PL A PL.ZIPOP.1393.N2K.PLB280008 MULTIPOLYGON (((5061933.013 3431955.325, 50617... 1.729284e+09
12450 PLB020005 Bory Dolnośląskie PL A PL.ZIPOP.1393.N2K.PLB020005 POLYGON ((4688184.261 3176847.838, 4688169.168... 1.720691e+09
12543 PLB320019 Ostoja Drawska PL A PL.ZIPOP.1393.N2K.PLB320019 MULTIPOLYGON (((4706793.874 3371722.24, 470678... 1.539845e+09
3998 PLB180002 Beskid Niski PL A PL.ZIPOP.1393.N2K.PLB180002 POLYGON ((5136875.541 3010865.401, 5136865.021... 1.520561e+09
# Add the polygons to the map

m1 = folium.Map(location=[52.182275, 19.356636], zoom_start=6)

for _, r in gdf_filter.to_crs(4326).iterrows():
    sim_geo = gpd.GeoSeries(r["geometry"])
    geo_j = sim_geo.to_json()
    geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "orange"})
    folium.Popup(r["SITENAME"]).add_to(geo_j)
    geo_j.add_to(m1)

m1
Make this Notebook Trusted to load map: File -> Trust Notebook

Reading non-vector data from GeoPackage file#

# Reading layers without geometry
gdf = read_geopackage_layer(object_path,layer_name='HABITATS')
# Printing DataFrame

gdf.head()
COUNTRY_CODE SITECODE HABITATCODE DESCRIPTION HABITAT_PRIORITY PRIORITY_FORM_HABITAT_TYPE NON_PRESENCE_IN_SITE COVER_HA CAVES REPRESENTATIVITY RELSURFACE CONSERVATION GLOBAL DATAQUALITY PERCENTAGE_COVER INTRODUCTION_CANDIDATE
0 HU HUKN30002 1530 Pannonic salt steppes and salt marshes * False NaN NaN NaN B C B None G NaN False
1 LT LTPRI0014 2330 Inland dunes with open Corynephorus and Agrost... None False 0.0 NaN 0.0 C C C None - NaN False
2 LT LTPRI0014 9010 Western Taïga * False 0.0 NaN 0.0 C C C None - NaN False
3 LT LTPRI0014 9050 Fennoscandian herb-rich forests with Picea abies None False 0.0 NaN 0.0 D - - None - NaN False
4 LT LTPRI0014 9160 Sub-Atlantic and medio-European oak or oak-hor... None False 0.0 NaN 0.0 C C C None - NaN False
# Filtering all records for Poland
gdf[gdf.COUNTRY_CODE == 'PL']
COUNTRY_CODE SITECODE HABITATCODE DESCRIPTION HABITAT_PRIORITY PRIORITY_FORM_HABITAT_TYPE NON_PRESENCE_IN_SITE COVER_HA CAVES REPRESENTATIVITY RELSURFACE CONSERVATION GLOBAL DATAQUALITY PERCENTAGE_COVER INTRODUCTION_CANDIDATE
6 PL PLH020055 3150 Natural eutrophic lakes with Magnopotamion or ... None False 0.0 NaN NaN A C A None G NaN False
7 PL PLH020055 3260 Water courses of plain to montane levels with ... None False 0.0 NaN NaN B C C None M NaN False
8 PL PLH020055 6410 Molinia meadows on calcareous, peaty or clayey... None False 0.0 NaN NaN B C B None G NaN False
9 PL PLH020055 6430 Hydrophilous tall herb fringe communities of p... None False 0.0 NaN NaN A C A None G NaN False
10 PL PLH020055 6510 Lowland hay meadows (Alopecurus pratensis, San... None False 0.0 NaN NaN A C A None G NaN False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
75873 PL PLH320023 3140 Hard oligo-mesotrophic waters with benthic veg... None False 0.0 NaN NaN B B B None M NaN False
75874 PL PLH200008 6440 Alluvial meadows of river valleys of the Cnidi... None False 0.0 NaN NaN C C A None G NaN False
75875 PL PLH180030 91E0 Alluvial forests with Alnus glutinosa and Frax... * False 0.0 NaN NaN B C B None M NaN False
75876 PL PLH220098 9190 Old acidophilous oak woods with Quercus robur ... None False 0.0 NaN NaN C C C None M NaN False
75877 PL PLH320023 3150 Natural eutrophic lakes with Magnopotamion or ... None False 0.0 NaN NaN A C C None M NaN False

5807 rows × 16 columns