import boto3
import configparser
import os
import urllib3
import folium
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.plot import show
import numpy as np
from matplotlib import pyplot
import tempfile
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 1
----> 1 import boto3
2 import configparser
3 import os
ModuleNotFoundError: No module named 'boto3'
urllib3.disable_warnings()
Connection with S3 Bucket#
All GISCO Reference Grid datasets are available on S3 Bucket. Below configuration allows to list and download defined datasets from there.
def s3_connection(credentials: dict) -> boto3.session.Session:
"""Establishes a connection to an S3 bucket.
Args:
credentials (dict): A dictionary containing AWS S3 credentials with keys
'host_base', 'access_key', and 'secret_key'.
Returns:
boto3.session.Session: A boto3 session client configured with the provided
credentials for interacting with the S3 service.
"""
s3 = boto3.client('s3',
endpoint_url=credentials['host_base'],
aws_access_key_id=credentials['access_key'],
aws_secret_access_key=credentials['secret_key'],
use_ssl=True,
verify=False)
return s3
# Load s3 credentials
config = configparser.ConfigParser()
config.read('/home/eouser/.s3cfg')
credentials = dict(config['default'].items())
# Connection with S3 eodata
s3 = s3_connection(credentials)
Browsing S3 bucket content#
response = s3.list_objects_v2(Bucket='ESTAT', Prefix='Natura_2000/2022/')
if 'Contents' in response:
print("Objects in bucket:")
# Iterate over each object
for obj in response['Contents']:
print(obj['Key'])
else:
print("No objects found in the bucket.")
Objects in bucket:
Natura_2000/2022/Natura2000_end2022_rev1.gpkg
Reading vector file to GeoDataFrame#
As Natura2000_end2022_rev1.gpkg contains more than one layer, it is recommended to list all layer names and use the specific name to load data from the desired layer. By default, GeoPandas will load the layer containing the geometry attribute.
def list_geopackage_layers(object_path):
# Create a temporary directory to store GeoPackage file
with tempfile.TemporaryDirectory() as tmpdirname:
# Define local path to save GeoPackage file
local_geopackage_path = os.path.join(tmpdirname, object_path.split('/')[-1])
# Download the GeoPackage from S3
s3.download_file('ESTAT', object_path, local_geopackage_path)
# Print all available layers
print(gpd.list_layers(local_geopackage_path))
def read_geopackage_layer(object_path,layer_name=None):
# Create a temporary directory to store GeoPackage file
with tempfile.TemporaryDirectory() as tmpdirname:
# Define local path to save GeoPackage file
local_geopackage_path = os.path.join(tmpdirname, object_path.split('/')[-1])
# Download the GeoPackage from S3
s3.download_file('ESTAT', object_path, local_geopackage_path)
# Read the GeoPackage into a GeoDataFrame
return gpd.read_file(local_geopackage_path, layer=layer_name)
object_path = 'Natura_2000/2022/Natura2000_end2022_rev1.gpkg'
# Listing all available layers
list_geopackage_layers(object_path)
name geometry_type
0 NaturaSite_polygon Unknown
1 BIOREGION None
2 DESIGNATIONSTATUS None
3 HABITATS None
4 HABITATCLASS None
5 NATURA2000SITES None
6 OTHERSPECIES None
7 METADATA None
8 IMPACT None
9 MANAGEMENT None
10 SPECIES None
# Reading layer with geometry
gdf = read_geopackage_layer(object_path,layer_name='NaturaSite_polygon')
# Geodata parameters
print(gdf.info())
print('----')
print(f'Coordinate system: {gdf.crs}')
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 27193 entries, 0 to 27192
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 SITECODE 27193 non-null object
1 SITENAME 27193 non-null object
2 MS 27193 non-null object
3 SITETYPE 27193 non-null object
4 INSPIRE_ID 24408 non-null object
5 geometry 27193 non-null geometry
dtypes: geometry(1), object(5)
memory usage: 1.2+ MB
None
----
Coordinate system: EPSG:3035
gdf.head()
SITECODE | SITENAME | MS | SITETYPE | INSPIRE_ID | geometry | |
---|---|---|---|---|---|---|
0 | DE7016401 | Kälberklamm und Hasenklamm | DE | A | POLYGON ((4207310.759 2871325.563, 4207347.631... | |
1 | DE7017341 | Pfinzgau Ost | DE | B | MULTIPOLYGON (((4218181.292 2865435.762, 42181... | |
2 | DE7017342 | Pfinzgau West | DE | B | MULTIPOLYGON (((4211759.368 2879842.953, 42117... | |
3 | DE7018341 | Stromberg | DE | B | MULTIPOLYGON (((4229153.06 2876063.581, 422913... | |
4 | DE7018342 | Enztal bei Mühlacker | DE | B | MULTIPOLYGON (((4228619.767 2869820.254, 42286... |
GeoDataFrame explanation#
GeoDataFrame inherits most of pandas DataFrame methods. That allows to work with GeoDataFrame on the same way.
# Filtering records based on attribute value
gdf[gdf['MS']=='PL'].head()
SITECODE | SITENAME | MS | SITETYPE | INSPIRE_ID | geometry | |
---|---|---|---|---|---|---|
685 | PLH100034 | Wola Cyrusowa | PL | B | PL.ZIPOP.1393.N2K.PLH100034 | POLYGON ((4989522.379 3240766.369, 4989495.378... |
1444 | PLH120001 | Ostoja Babiogórska | PL | B | PL.ZIPOP.1393.N2K.PLH120001 | POLYGON ((5008593.156 2989585.39, 5008554.061 ... |
1445 | PLH120002 | Czarna Orawa | PL | B | PL.ZIPOP.1393.N2K.PLH120002 | MULTIPOLYGON (((5018457.3 2970753.064, 5018457... |
1446 | PLH120004 | Dolina Prądnika | PL | B | PL.ZIPOP.1393.N2K.PLH120004 | MULTIPOLYGON (((5022821.97 3053173.79, 5022795... |
1447 | PLH120005 | Dolinki Jurajskie | PL | B | PL.ZIPOP.1393.N2K.PLH120005 | MULTIPOLYGON (((5007291.249 3052406.174, 50072... |
Displaying geometries on basemap#
To display vector geometry on map we recommend folium. Folium allows displaying different types of geometries like Polygons, Lines and Points.
IMPORTANT: Each geometry presenting on map must be transformed to EPSG:4326 coordinates system
# Filtering 10 biggest polygons for Poland
gdf_filter = gdf[gdf['MS']=='PL']
gdf_filter['area'] = gdf_filter.geometry.area
gdf_filter = gdf_filter.sort_values(by='area', ascending=False).head(10)
/opt/jupyterhub/lib/python3.10/site-packages/geopandas/geodataframe.py:1819: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
super().__setitem__(key, value)
gdf_filter
SITECODE | SITENAME | MS | SITETYPE | INSPIRE_ID | geometry | area | |
---|---|---|---|---|---|---|---|
12501 | PLB220009 | Bory Tucholskie | PL | A | PL.ZIPOP.1393.N2K.PLB220009 | POLYGON ((4815713.67 3426933.566, 4815733.24 3... | 3.229529e+09 |
12545 | PLB990003 | Zatoka Pomorska | PL | A | PL.ZIPOP.1393.N2K.PLB990003 | POLYGON ((4720807.256 3491939.352, 4720659.49 ... | 3.090660e+09 |
13321 | PLH990002 | Ostoja na Zatoce Pomorskiej | PL | B | PL.ZIPOP.1393.N2K.PLH990002 | POLYGON ((4634753.162 3494270.27, 4634553.584 ... | 2.429501e+09 |
12544 | PLB990002 | Przybrzeżne wody Bałtyku | PL | A | PL.ZIPOP.1393.N2K.PLB990002 | POLYGON ((4804471.794 3542906.122, 4804505.422... | 1.948361e+09 |
12540 | PLB320016 | Lasy Puszczy nad Drawą | PL | A | PL.ZIPOP.1393.N2K.PLB320016 | POLYGON ((4706793.874 3371722.24, 4706746.606 ... | 1.903382e+09 |
12526 | PLB300015 | Puszcza Notecka | PL | A | PL.ZIPOP.1393.N2K.PLB300015 | MULTIPOLYGON (((4739320.429 3326280.584, 47393... | 1.783370e+09 |
12510 | PLB280008 | Puszcza Piska | PL | A | PL.ZIPOP.1393.N2K.PLB280008 | MULTIPOLYGON (((5061933.013 3431955.325, 50617... | 1.729284e+09 |
12450 | PLB020005 | Bory Dolnośląskie | PL | A | PL.ZIPOP.1393.N2K.PLB020005 | POLYGON ((4688184.261 3176847.838, 4688169.168... | 1.720691e+09 |
12543 | PLB320019 | Ostoja Drawska | PL | A | PL.ZIPOP.1393.N2K.PLB320019 | MULTIPOLYGON (((4706793.874 3371722.24, 470678... | 1.539845e+09 |
3998 | PLB180002 | Beskid Niski | PL | A | PL.ZIPOP.1393.N2K.PLB180002 | POLYGON ((5136875.541 3010865.401, 5136865.021... | 1.520561e+09 |
# Add the polygons to the map
m1 = folium.Map(location=[52.182275, 19.356636], zoom_start=6)
for _, r in gdf_filter.to_crs(4326).iterrows():
sim_geo = gpd.GeoSeries(r["geometry"])
geo_j = sim_geo.to_json()
geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "orange"})
folium.Popup(r["SITENAME"]).add_to(geo_j)
geo_j.add_to(m1)
m1
Reading non-vector data from GeoPackage file#
# Reading layers without geometry
gdf = read_geopackage_layer(object_path,layer_name='HABITATS')
# Printing DataFrame
gdf.head()
COUNTRY_CODE | SITECODE | HABITATCODE | DESCRIPTION | HABITAT_PRIORITY | PRIORITY_FORM_HABITAT_TYPE | NON_PRESENCE_IN_SITE | COVER_HA | CAVES | REPRESENTATIVITY | RELSURFACE | CONSERVATION | GLOBAL | DATAQUALITY | PERCENTAGE_COVER | INTRODUCTION_CANDIDATE | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | HU | HUKN30002 | 1530 | Pannonic salt steppes and salt marshes | * | False | NaN | NaN | NaN | B | C | B | None | G | NaN | False |
1 | LT | LTPRI0014 | 2330 | Inland dunes with open Corynephorus and Agrost... | None | False | 0.0 | NaN | 0.0 | C | C | C | None | - | NaN | False |
2 | LT | LTPRI0014 | 9010 | Western Taïga | * | False | 0.0 | NaN | 0.0 | C | C | C | None | - | NaN | False |
3 | LT | LTPRI0014 | 9050 | Fennoscandian herb-rich forests with Picea abies | None | False | 0.0 | NaN | 0.0 | D | - | - | None | - | NaN | False |
4 | LT | LTPRI0014 | 9160 | Sub-Atlantic and medio-European oak or oak-hor... | None | False | 0.0 | NaN | 0.0 | C | C | C | None | - | NaN | False |
# Filtering all records for Poland
gdf[gdf.COUNTRY_CODE == 'PL']
COUNTRY_CODE | SITECODE | HABITATCODE | DESCRIPTION | HABITAT_PRIORITY | PRIORITY_FORM_HABITAT_TYPE | NON_PRESENCE_IN_SITE | COVER_HA | CAVES | REPRESENTATIVITY | RELSURFACE | CONSERVATION | GLOBAL | DATAQUALITY | PERCENTAGE_COVER | INTRODUCTION_CANDIDATE | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
6 | PL | PLH020055 | 3150 | Natural eutrophic lakes with Magnopotamion or ... | None | False | 0.0 | NaN | NaN | A | C | A | None | G | NaN | False |
7 | PL | PLH020055 | 3260 | Water courses of plain to montane levels with ... | None | False | 0.0 | NaN | NaN | B | C | C | None | M | NaN | False |
8 | PL | PLH020055 | 6410 | Molinia meadows on calcareous, peaty or clayey... | None | False | 0.0 | NaN | NaN | B | C | B | None | G | NaN | False |
9 | PL | PLH020055 | 6430 | Hydrophilous tall herb fringe communities of p... | None | False | 0.0 | NaN | NaN | A | C | A | None | G | NaN | False |
10 | PL | PLH020055 | 6510 | Lowland hay meadows (Alopecurus pratensis, San... | None | False | 0.0 | NaN | NaN | A | C | A | None | G | NaN | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
75873 | PL | PLH320023 | 3140 | Hard oligo-mesotrophic waters with benthic veg... | None | False | 0.0 | NaN | NaN | B | B | B | None | M | NaN | False |
75874 | PL | PLH200008 | 6440 | Alluvial meadows of river valleys of the Cnidi... | None | False | 0.0 | NaN | NaN | C | C | A | None | G | NaN | False |
75875 | PL | PLH180030 | 91E0 | Alluvial forests with Alnus glutinosa and Frax... | * | False | 0.0 | NaN | NaN | B | C | B | None | M | NaN | False |
75876 | PL | PLH220098 | 9190 | Old acidophilous oak woods with Quercus robur ... | None | False | 0.0 | NaN | NaN | C | C | C | None | M | NaN | False |
75877 | PL | PLH320023 | 3150 | Natural eutrophic lakes with Magnopotamion or ... | None | False | 0.0 | NaN | NaN | A | C | C | None | M | NaN | False |
5807 rows × 16 columns