Skip to content

utils

This module contains utility functions for the multimno package.

apply_schema_casting(sdf, schema)

This function takes a DataFrame and a schema, and applies the schema to the DataFrame. It selects the columns in the DataFrame that are in the schema, and casts each column to the type specified in the schema.

Parameters:

Name Type Description Default
sdf DataFrame

The DataFrame to apply the schema to.

required
schema StructType

The schema to apply to the DataFrame.

required

Returns:

Name Type Description
DataFrame DataFrame

A new DataFrame that includes the same rows as the input DataFrame,

DataFrame

but with the columns cast to the types specified in the schema.

Source code in multimno/core/utils.py
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
def apply_schema_casting(sdf: DataFrame, schema: StructType) -> DataFrame:
    """
    This function takes a DataFrame and a schema, and applies the schema to the DataFrame.
    It selects the columns in the DataFrame that are in the schema, and casts each column to the type specified in the schema.

    Args:
        sdf (DataFrame): The DataFrame to apply the schema to.
        schema (StructType): The schema to apply to the DataFrame.

    Returns:
        DataFrame: A new DataFrame that includes the same rows as the input DataFrame,
        but with the columns cast to the types specified in the schema.
    """

    sdf = sdf.select(*[F.col(field.name) for field in schema.fields])
    for field in schema.fields:
        sdf = sdf.withColumn(field.name, F.col(field.name).cast(field.dataType))

    return sdf

assign_quadkey(sdf, crs_in, zoom_level)

Assigns a quadkey to each row in a DataFrame based on the centroid of its geometry.

Parameters:

Name Type Description Default
sdf DataFrame

The DataFrame to assign quadkeys to. The DataFrame must contain a geometry column.

required
crs_in int

The CRS of the dataframe to project to 4326 before assigning quadkeys.

required
zoom_level int

The zoom level to use when assigning quadkeys.

required

Returns:

Name Type Description
DataFrame DataFrame

A DataFrame containing the same rows as the input DataFrame, but with an additional quadkey column.

Source code in multimno/core/utils.py
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
def assign_quadkey(sdf: DataFrame, crs_in: int, zoom_level: int) -> DataFrame:
    """
    Assigns a quadkey to each row in a DataFrame based on the centroid of its geometry.

    Args:
        sdf (DataFrame): The DataFrame to assign quadkeys to. The DataFrame must contain a geometry column.
        crs_in (int): The CRS of the dataframe to project to 4326 before assigning quadkeys.
        zoom_level (int): The zoom level to use when assigning quadkeys.

    Returns:
        DataFrame: A DataFrame containing the same rows as the input DataFrame, but with an additional quadkey column.
    """

    quadkey_udf = F.udf(latlon_to_quadkey, StringType())
    sdf = sdf.withColumn("centroid", STF.ST_Centroid(ColNames.geometry))

    if crs_in != 4326:
        sdf = project_to_crs(sdf, crs_in, 4326, "centroid")

    sdf = sdf.withColumn(
        "quadkey",
        quadkey_udf(
            STF.ST_Y(F.col("centroid")),
            STF.ST_X(F.col("centroid")),
            F.lit(zoom_level),
        ),
    ).drop("centroid")

    return sdf

calc_hashed_user_id(df, user_column=ColNames.user_id)

Calculates SHA2 hash of user id, takes the first 31 bits and converts them to a non-negative 32-bit integer.

Parameters:

Name Type Description Default
df DataFrame

Data of clean synthetic events with a user id column.

required

Returns:

Type Description
DataFrame

pyspark.sql.DataFrame: Dataframe, where user_id column is transformered to a hashed value.

Source code in multimno/core/utils.py
457
458
459
460
461
462
463
464
465
466
467
468
469
470
def calc_hashed_user_id(df: DataFrame, user_column: str = ColNames.user_id) -> DataFrame:
    """
    Calculates SHA2 hash of user id, takes the first 31 bits and converts them to a non-negative 32-bit integer.

    Args:
        df (pyspark.sql.DataFrame): Data of clean synthetic events with a user id column.

    Returns:
        pyspark.sql.DataFrame: Dataframe, where user_id column is transformered to a hashed value.

    """

    df = df.withColumn(user_column, F.unhex(F.sha2(F.col(user_column).cast("string"), 256)))
    return df

cut_geodata_to_extent(sdf, extent, target_crs, geometry_column='geometry')

Cuts geometries in a DataFrame to a specified extent.

Parameters:

Name Type Description Default
sdf DataFrame

The DataFrame to filter. The DataFrame must contain a geometry column.

required
extent tuple

A tuple representing the extent. The tuple contains four elements: (west, south, east, north), which are the western, southern, eastern, and northern bounds of the WGS84 extent.

required
target_crs int

The CRS of DataFrame to transform the extent to.

required
geometry_column str

The name of the geometry column. Defaults to "geometry".

'geometry'

Returns:

Name Type Description
DataFrame DataFrame

A DataFrame containing the same rows as the input DataFrame, but with the geometries cut to the extent.

Source code in multimno/core/utils.py
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
def cut_geodata_to_extent(
    sdf: DataFrame,
    extent: Tuple[float, float, float, float],
    target_crs: int,
    geometry_column: str = "geometry",
) -> DataFrame:
    """
    Cuts geometries in a DataFrame to a specified extent.

    Args:
        sdf (DataFrame): The DataFrame to filter. The DataFrame must contain a geometry column.
        extent (tuple): A tuple representing the extent. The tuple contains four elements:
            (west, south, east, north), which are the western, southern, eastern, and northern bounds of the WGS84 extent.
        target_crs (int): The CRS of DataFrame to transform the extent to.
        geometry_column (str, optional): The name of the geometry column. Defaults to "geometry".

    Returns:
        DataFrame: A DataFrame containing the same rows as the input DataFrame, but with the geometries cut to the extent.
    """

    extent = STC.ST_PolygonFromEnvelope(*extent)
    if target_crs != 4326:
        extent = STF.ST_Transform(extent, F.lit("EPSG:4326"), F.lit(f"EPSG:{target_crs}"))

    sdf = sdf.withColumn(geometry_column, STF.ST_Intersection(F.col(geometry_column), extent))

    return sdf

cut_polygons_with_mask_polygons(input_sdf, mask_sdf, cols_to_keep, self_intersection=False, geometry_column='geometry')

Cuts polygons in the input DataFrame with mask polygons from another DataFrame. This function takes two DataFrames: one with input polygons and another with mask polygons. It cuts the input polygons with the mask polygons, and returns a new DataFrame with the resulting polygons. Both dataframes have to have same coordinate system. Args: input_sdf (DataFrame): A DataFrame containing the input polygons. mask_sdf (DataFrame): A DataFrame containing the mask polygons. cols_to_keep (list): A list of column names to keep from the input DataFrame. geometry_column (str, optional): The name of the geometry column in the DataFrames. Defaults to "geometry". Returns: DataFrame: A DataFrame containing the resulting polygons after cutting the input polygons with the mask polygons.

Source code in multimno/core/utils.py
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
def cut_polygons_with_mask_polygons(
    input_sdf: DataFrame,
    mask_sdf: DataFrame,
    cols_to_keep: List[str],
    self_intersection=False,
    geometry_column: str = "geometry",
) -> DataFrame:
    """
    Cuts polygons in the input DataFrame with mask polygons from another DataFrame.
    This function takes two DataFrames: one with input polygons and another with mask polygons.
    It cuts the input polygons with the mask polygons, and returns a new DataFrame with the resulting polygons.
    Both dataframes have to have same coordinate system.
    Args:
        input_sdf (DataFrame): A DataFrame containing the input polygons.
        mask_sdf (DataFrame): A DataFrame containing the mask polygons.
        cols_to_keep (list): A list of column names to keep from the input DataFrame.
        geometry_column (str, optional): The name of the geometry column in the DataFrames.
            Defaults to "geometry".
    Returns:
        DataFrame: A DataFrame containing the resulting polygons after cutting the input polygons with the mask polygons.
    """
    input_sdf = input_sdf.withColumn("id", F.monotonically_increasing_id())
    cols_to_keep = [f"a.{col}" for col in cols_to_keep]
    if self_intersection:
        input_sdf = input_sdf.withColumn("area", STF.ST_Area(geometry_column))
        intersection = input_sdf.alias("a").join(
            input_sdf.alias("b"),
            STP.ST_Intersects("a.geometry", "b.geometry") & (F.col("a.area") > F.col("b.area")),
        )
        input_sdf = input_sdf.drop("area")
    else:
        intersection = input_sdf.alias("a").join(
            mask_sdf.alias("b"),
            STP.ST_Intersects("a.geometry", "b.geometry"),
        )
    intersection_cut = intersection.groupby("a.id", *cols_to_keep).agg(
        STA.ST_Union_Aggr(f"b.{geometry_column}").alias("cut_geometry")
    )
    intersection_cut = fix_geometry(intersection_cut, 3, "cut_geometry")
    intersection_cut = intersection_cut.withColumn(
        geometry_column, STF.ST_Difference(f"a.{geometry_column}", "cut_geometry")
    ).drop("cut_geometry")

    non_intersection = input_sdf.join(intersection_cut, ["id"], "left_anti")

    return non_intersection.union(intersection_cut).drop("id")

filter_geodata_to_extent(sdf, extent, target_crs, geometry_column='geometry')

Filters a DataFrame to include only rows with geometries that intersect a specified extent.

Parameters:

Name Type Description Default
sdf DataFrame

The DataFrame to filter. The DataFrame must contain a geometry column.

required
extent tuple

A tuple representing the extent. The tuple contains four elements: (west, south, east, north), which are the western, southern, eastern, and northern bounds of the WGS84 extent.

required
target_crs int

The CRS of DataFrame to transform the extent to.

required
geometry_column str

The name of the geometry column. Defaults to "geometry".

'geometry'

Returns:

Name Type Description
DataFrame DataFrame

A DataFrame containing only the rows from the input DataFrame where the geometry intersects the extent.

Source code in multimno/core/utils.py
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
def filter_geodata_to_extent(
    sdf: DataFrame,
    extent: Tuple[float, float, float, float],
    target_crs: int,
    geometry_column: str = "geometry",
) -> DataFrame:
    """
    Filters a DataFrame to include only rows with geometries that intersect a specified extent.

    Args:
        sdf (DataFrame): The DataFrame to filter. The DataFrame must contain a geometry column.
        extent (tuple): A tuple representing the extent. The tuple contains four elements:
            (west, south, east, north), which are the western, southern, eastern, and northern bounds of the WGS84 extent.
        target_crs (int): The CRS of DataFrame to transform the extent to.
        geometry_column (str, optional): The name of the geometry column. Defaults to "geometry".

    Returns:
        DataFrame: A DataFrame containing only the rows from the input DataFrame where the geometry intersects the extent.
    """

    extent = STC.ST_PolygonFromEnvelope(*extent)
    if target_crs != 4326:
        extent = STF.ST_Transform(extent, F.lit("EPSG:4326"), F.lit(f"EPSG:{target_crs}"))

    sdf = sdf.filter(STP.ST_Intersects(extent, F.col(geometry_column)))

    return sdf

fix_geometry(sdf, geometry_type, geometry_column='geometry')

Fixes the geometry of a given type in a DataFrame. This function applies several operations to the geometries in the specified geometry column of the DataFrame: 1. If a geometry is a collection of geometries, extracts only the geometries of the given type. 2. Filters out any geometries of type other than given. 3. Removes any invalid geometries. 4. Removes any empty geometries. Args: sdf (DataFrame): The DataFrame containing the geometries to check. geometry_column (str, optional): The name of the column containing the geometries. Defaults to "geometry". Returns: DataFrame: The DataFrame with the fixed polygon geometries.

Source code in multimno/core/utils.py
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
def fix_geometry(sdf: DataFrame, geometry_type: int, geometry_column: str = "geometry") -> DataFrame:
    """
    Fixes the geometry of a given type in a DataFrame.
    This function applies several operations to the geometries in the specified geometry column of the DataFrame:
    1. If a geometry is a collection of geometries, extracts only the geometries of the given type.
    2. Filters out any geometries of type other than given.
    3. Removes any invalid geometries.
    4. Removes any empty geometries.
    Args:
        sdf (DataFrame): The DataFrame containing the geometries to check.
        geometry_column (str, optional): The name of the column containing the geometries. Defaults to "geometry".
    Returns:
        DataFrame: The DataFrame with the fixed polygon geometries.
    """
    geometry_name = "Polygon" if geometry_type == 3 else ("Line" if geometry_type == 2 else "Point")
    if geometry_type == 3:
        sdf = sdf.withColumn(geometry_column, STF.ST_ReducePrecision(F.col(geometry_column), F.lit(4)))
    sdf = (
        sdf.withColumn(
            geometry_column,
            F.when(
                STF.ST_IsCollection(F.col(geometry_column)),
                STF.ST_CollectionExtract(geometry_column, F.lit(geometry_type)),
            ).otherwise(F.col(geometry_column)),
        )
        .filter(~STF.ST_IsEmpty(F.col(geometry_column)))
        .filter(STF.ST_GeometryType(F.col(geometry_column)).like(f"%{geometry_name}%"))
        .filter(STF.ST_IsValid(geometry_column))
    )
    return sdf

get_epsg_from_geometry_column(df)

Get the EPSG code from the geometry column of a DataFrame.

Parameters:

Name Type Description Default
df DataFrame

DataFrame with a geometry column.

required

Raises:

Type Description
ValueError

If the DataFrame contains multiple EPSG codes.

Returns:

Name Type Description
int int

EPSG code of the geometry column.

Source code in multimno/core/utils.py
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
def get_epsg_from_geometry_column(df: DataFrame) -> int:
    """
    Get the EPSG code from the geometry column of a DataFrame.

    Args:
        df (DataFrame): DataFrame with a geometry column.

    Raises:
        ValueError: If the DataFrame contains multiple EPSG codes.

    Returns:
        int: EPSG code of the geometry column.
    """
    # Get the EPSG code from the geometry column
    temp = df.select(STF.ST_SRID("geometry")).distinct().persist()
    if temp.count() > 1:
        raise ValueError("Dataframe contains multiple EPSG codes")

    epsg = temp.collect()[0][0]
    return epsg

get_quadkeys_for_bbox(extent, level_of_detail)

Generates a list of quadkeys for a bounding box at a specific zoom level.

This function takes a bounding box defined by its lon min, lat min, lon max, and lat max extents, and a zoom level, and generates a list of quadkeys that cover the bounding box at the specified zoom level. The quadkeys are strings of digits that represent specific tiles in a quadtree-based spatial index.

Parameters:

Name Type Description Default
extent tuple

A tuple representing the bounding box. The tuple contains four elements: (west, south, east, north), which are the western, southern, eastern, and northern extents of the bounding box, respectively. Each extent is a float representing a geographic coordinate in degrees.

required
level_of_detail int

The zoom level.

required

Returns:

Name Type Description
list List[str]

A list of quadkeys that cover the bounding box at the specified zoom level. Each quadkey is a string.

Source code in multimno/core/utils.py
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
def get_quadkeys_for_bbox(extent: Tuple[float, float, float, float], level_of_detail: int) -> List[str]:
    """
    Generates a list of quadkeys for a bounding box at a specific zoom level.

    This function takes a bounding box defined by its lon min, lat min, lon max, and lat max extents,
    and a zoom level, and generates a list of quadkeys that cover the bounding box at the specified zoom level.
    The quadkeys are strings of digits that represent specific tiles in a quadtree-based spatial index.

    Args:
        extent (tuple): A tuple representing the bounding box. The tuple contains four elements:
            (west, south, east, north), which are the western, southern, eastern, and northern extents
            of the bounding box, respectively. Each extent is a float representing a geographic coordinate in degrees.
        level_of_detail (int): The zoom level.

    Returns:
        list: A list of quadkeys that cover the bounding box at the specified zoom level. Each quadkey is a string.
    """
    west, south, east, north = extent
    min_tile_x, min_tile_y = latlon_to_tilexy(north, west, level_of_detail)
    max_tile_x, max_tile_y = latlon_to_tilexy(south, east, level_of_detail)
    quadkeys = []
    for x in range(min_tile_x, max_tile_x + 1):
        for y in range(min_tile_y, max_tile_y + 1):
            quadkeys.append(tilexy_to_quadkey(x, y, level_of_detail))
    return quadkeys

latlon_to_quadkey(latitude, longitude, level_of_detail)

Converts a geographic coordinate to a quadkey at a specific zoom level.

This function takes a latitude and longitude in degrees, and a zoom level, and converts them to a quadkey. The quadkey is a string of digits that represents a specific tile in a quadtree-based spatial index. The conversion process involves first converting the geographic coordinate to tile coordinates, and then converting the tile coordinates to a quadkey.

Parameters:

Name Type Description Default
latitude float

The latitude of the geographic coordinate, in degrees.

required
longitude float

The longitude of the geographic coordinate, in degrees.

required
level_of_detail int

The zoom level.

required

Returns:

Name Type Description
str str

The quadkey representing the geographic coordinate at the specified zoom level.

Source code in multimno/core/utils.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def latlon_to_quadkey(latitude: float, longitude: float, level_of_detail: int) -> str:
    """
    Converts a geographic coordinate to a quadkey at a specific zoom level.

    This function takes a latitude and longitude in degrees, and a zoom level, and converts them to a quadkey.
    The quadkey is a string of digits that represents a specific tile in a quadtree-based spatial index.
    The conversion process involves first converting the geographic coordinate to tile coordinates,
    and then converting the tile coordinates to a quadkey.

    Args:
        latitude (float): The latitude of the geographic coordinate, in degrees.
        longitude (float): The longitude of the geographic coordinate, in degrees.
        level_of_detail (int): The zoom level.

    Returns:
        str: The quadkey representing the geographic coordinate at the specified zoom level.
    """
    x, y = latlon_to_tilexy(latitude, longitude, level_of_detail)
    return tilexy_to_quadkey(x, y, level_of_detail)

latlon_to_tilexy(latitude, longitude, level_of_detail)

Converts a geographic coordinate to tile coordinates at a specific zoom level.

This function takes a latitude and longitude in degrees, and a zoom level, and converts them to tile coordinates (tile_x, tile_y) at the specified zoom level. The tile coordinates are in the tile system used by Bing Maps, OpenStreetMap, MapBox and other map providers.

Parameters:

Name Type Description Default
latitude float

The latitude of the geographic coordinate, in degrees.

required
longitude float

The longitude of the geographic coordinate, in degrees.

required
level_of_detail int

The zoom level.

required

Returns:

Name Type Description
tuple int

A tuple representing the tile coordinates of the geographic coordinate at the specified

int

zoom level. The tuple contains two elements: (tile_x, tile_y).

Source code in multimno/core/utils.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def latlon_to_tilexy(latitude: float, longitude: float, level_of_detail: int) -> Tuple[int, int]:
    """
    Converts a geographic coordinate to tile coordinates at a specific zoom level.

    This function takes a latitude and longitude in degrees, and a zoom level, and converts them to
    tile coordinates (tile_x, tile_y) at the specified zoom level. The tile coordinates are in the
    tile system used by Bing Maps, OpenStreetMap, MapBox and other map providers.

    Args:
        latitude (float): The latitude of the geographic coordinate, in degrees.
        longitude (float): The longitude of the geographic coordinate, in degrees.
        level_of_detail (int): The zoom level.

    Returns:
        tuple: A tuple representing the tile coordinates of the geographic coordinate at the specified
        zoom level. The tuple contains two elements: (tile_x, tile_y).
    """
    if not -90 <= latitude <= 90:
        raise ValueError(f"Latitude must be in the range [-90, 90], got {latitude}")
    if not -180 <= longitude <= 180:
        raise ValueError(f"Longitude must be in the range [-180, 180], got {longitude}")
    latitude = math.radians(latitude)
    longitude = math.radians(longitude)

    sinLatitude = math.sin(latitude)
    pixelX = ((longitude + math.pi) / (2 * math.pi)) * 256 * 2**level_of_detail
    pixelY = (0.5 - math.log((1 + sinLatitude) / (1 - sinLatitude)) / (4 * math.pi)) * 256 * 2**level_of_detail
    tileX = int(math.floor(pixelX / 256))
    tileY = int(math.floor(pixelY / 256))
    return tileX, tileY

project_to_crs(sdf, crs_in, crs_out, geometry_column='geometry')

Projects geometry to CRS.

Parameters:

Name Type Description Default
sdf DataFrame

Input DataFrame.

required
crs_in int

Input CRS.

required
crs_out int

Output CRS.

required

Returns:

Name Type Description
DataFrame DataFrame

DataFrame with geometry projected to cartesian CRS.

Source code in multimno/core/utils.py
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
def project_to_crs(sdf: DataFrame, crs_in: int, crs_out: int, geometry_column="geometry") -> DataFrame:
    """
    Projects geometry to CRS.

    Args:
        sdf (DataFrame): Input DataFrame.
        crs_in (int): Input CRS.
        crs_out (int): Output CRS.

    Returns:
        DataFrame: DataFrame with geometry projected to cartesian CRS.
    """
    crs_in = f"EPSG:{crs_in}"
    crs_out = f"EPSG:{crs_out}"

    sdf = sdf.withColumn(
        geometry_column,
        STF.ST_Transform(sdf[geometry_column], F.lit(crs_in), F.lit(crs_out)),
    )
    return sdf

quadkey_to_extent(quadkey)

Converts a quadkey to a geographic extent (bounding box).

This function takes a quadkey and converts it to a geographic extent represented as a tuple of (longitude_min, latitude_min, longitude_max, latitude_max).

Parameters:

Name Type Description Default
quadkey str

The quadkey to convert. A quadkey is a string of digits that represents a

required

Returns:

Name Type Description
tuple Tuple[float, float, float, float]

A tuple representing the geographic extent of the quadkey. The tuple contains four elements: (longitude_min, latitude_min, longitude_max, latitude_max).

Source code in multimno/core/utils.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def quadkey_to_extent(quadkey: str) -> Tuple[float, float, float, float]:
    """
    Converts a quadkey to a geographic extent (bounding box).

    This function takes a quadkey and converts it to a geographic extent represented as a tuple of
    (longitude_min, latitude_min, longitude_max, latitude_max).

    Args:
        quadkey (str): The quadkey to convert. A quadkey is a string of digits that represents a
        specific tile in a quadtree-based spatial index.

    Returns:
        tuple: A tuple representing the geographic extent of the quadkey. The tuple contains four
            elements: (longitude_min, latitude_min, longitude_max, latitude_max).
    """
    tile_x, tile_y, zoom_level = quadkey_to_tile(quadkey)
    n = 2.0**zoom_level
    lon_min = tile_x / n * 360.0 - 180.0
    lat_min = math.degrees(math.atan(math.sinh(math.pi * (1 - 2 * (tile_y + 1) / n))))
    lon_max = (tile_x + 1) / n * 360.0 - 180.0
    lat_max = math.degrees(math.atan(math.sinh(math.pi * (1 - 2 * tile_y / n))))

    return (lon_min, lat_min, lon_max, lat_max)

quadkey_to_tile(quadkey)

Converts a quadkey to tile coordinates and zoom level.

This function takes a quadkey and converts it to tile coordinates (tile_x, tile_y) and zoom level. A quadkey is a string of digits that represents a specific tile in a quadtree-based spatial index.

Parameters:

Name Type Description Default
quadkey str

The quadkey to convert.

required

Returns:

Name Type Description
tuple int

A tuple representing the tile coordinates and zoom level of the quadkey. The tuple contains three

elements int

(tile_x, tile_y, zoom_level).

Raises:

Type Description
ValueError

If the quadkey contains an invalid character.

Source code in multimno/core/utils.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def quadkey_to_tile(quadkey: str) -> Tuple[int, int, int]:
    """
    Converts a quadkey to tile coordinates and zoom level.

    This function takes a quadkey and converts it to tile coordinates (tile_x, tile_y) and zoom level.
    A quadkey is a string of digits that represents a specific tile in a quadtree-based spatial index.

    Args:
        quadkey (str): The quadkey to convert.

    Returns:
        tuple: A tuple representing the tile coordinates and zoom level of the quadkey. The tuple contains three
        elements: (tile_x, tile_y, zoom_level).

    Raises:
        ValueError: If the quadkey contains an invalid character.
    """
    tile_x = tile_y = 0
    zoom_level = len(quadkey)
    for i in range(zoom_level):
        bit = zoom_level - i - 1
        mask = 1 << bit
        if quadkey[i] == "0":
            pass
        elif quadkey[i] == "1":
            tile_x |= mask
        elif quadkey[i] == "2":
            tile_y |= mask
        elif quadkey[i] == "3":
            tile_x |= mask
            tile_y |= mask
        else:
            raise ValueError("Invalid quadkey character.")
    return tile_x, tile_y, zoom_level

spark_to_geopandas(df, epsg=None)

Convert a Spark DataFrame to a geopandas GeoDataFrame.

Parameters:

Name Type Description Default
df DataFrame

Spark DataFrame to convert.

required

Returns:

Type Description
GeoDataFrame

gpd.GeoDataFrame: GeoDataFrame with the same data as the input DataFrame.

Source code in multimno/core/utils.py
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
def spark_to_geopandas(df: DataFrame, epsg: int = None) -> gpd.GeoDataFrame:
    """
    Convert a Spark DataFrame to a geopandas GeoDataFrame.

    Args:
        df (DataFrame): Spark DataFrame to convert.

    Returns:
        gpd.GeoDataFrame: GeoDataFrame with the same data as the input DataFrame.
    """
    # Convert the DataFrame to a GeoDataFrame
    if epsg is None:
        epsg = get_epsg_from_geometry_column(df)
    gdf = gpd.GeoDataFrame(df.toPandas(), crs=f"EPSG:{epsg}")

    return gdf

tilexy_to_quadkey(x, y, level_of_detail)

Converts tile coordinates to a quadkey at a specific zoom level.

This function takes tile coordinates (x, y) and a zoom level, and converts them to a quadkey. The quadkey is a string of digits that represents a specific tile in a quadtree-based spatial index. The conversion process involves bitwise operations on the tile coordinates.

Parameters:

Name Type Description Default
x int

The x-coordinate of the tile.

required
y int

The y-coordinate of the tile.

required
level_of_detail int

The zoom level.

required

Returns:

Name Type Description
str str

The quadkey representing the tile at the specified zoom level.

Source code in multimno/core/utils.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def tilexy_to_quadkey(x: int, y: int, level_of_detail: int) -> str:
    """
    Converts tile coordinates to a quadkey at a specific zoom level.

    This function takes tile coordinates (x, y) and a zoom level, and converts them to a quadkey.
    The quadkey is a string of digits that represents a specific tile in a quadtree-based spatial index.
    The conversion process involves bitwise operations on the tile coordinates.

    Args:
        x (int): The x-coordinate of the tile.
        y (int): The y-coordinate of the tile.
        level_of_detail (int): The zoom level.

    Returns:
        str: The quadkey representing the tile at the specified zoom level.
    """
    quadkey = ""
    for i in range(level_of_detail, 0, -1):
        digit = 0
        mask = 1 << (i - 1)
        if (x & mask) != 0:
            digit += 1
        if (y & mask) != 0:
            digit += 2
        quadkey += str(digit)
    return quadkey