Skip to content

grid

This module provides functionality for generating a grid based on the INSPIRE grid system specification.

InspireGridGenerator

A class used to generate a grid based on the INSPIRE 100m grid system specification.

Source code in multimno/core/grid.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
class InspireGridGenerator:
    """A class used to generate a grid based on the INSPIRE 100m grid system specification."""

    GRID_CRS_EPSG_CODE = 3035
    ACCEPTED_RESOLUTIONS = [100, 1000]

    def __init__(
        self,
        spark: SparkSession,
        geometry_col: str = ColNames.geometry,
        grid_id_col: str = ColNames.grid_id,
        grid_partition_size: int = 2000,
    ):
        self.spark = spark
        self.resolution = 100
        self.resolution_str = self._format_distance(self.resolution)
        self.geometry_col = geometry_col
        self.grid_id_col = grid_id_col
        self.grid_partition_size = grid_partition_size

    @staticmethod
    def _format_distance(value: int) -> str:
        """Formats the given distance value to string.

        Args:
            value (int): The distance value to format.

        Returns:
            str: The formatted distance value.
        """
        if value < 1000:
            return f"{value}m"
        else:
            if value % 1000 != 0:
                raise ValueError(f"Distance to be formatted not multiple of 1000: {value}")
            return f"{value // 1000}km"

    def _project_latlon_extent(self, extent: List[float]) -> Tuple[List[float], List[float]]:
        """Projects the given extent from lat/lon to the grid's CRS.

        Args:
            extent (List[float]): The extent to project. Order: [lon_min, lat_min, lon_max, lat_max]

        Returns:
            List[float]: The projected extent. Order: [northing_bottomleft, easting_bottomleft, northing_topright,
                easting_topright]
            list[float]: Auxiliar coordinates. Order: [northing_bottomright, easting_bottomright, northing_topleft,
                easting_topleft]
        """
        transformer = Transformer.from_crs("EPSG:4326", f"EPSG:{self.GRID_CRS_EPSG_CODE}")
        # This transformer follows the following convention (xx, yy are the first and second coords, respetively)
        # EPSG4326: xx -> lat, yy -> lon
        # EPSG3035: xx -> northing, yy -> easting
        nn_bottomleft, ee_bottomleft = transformer.transform(extent[1], extent[0])  # bottom-left corner
        nn_topright, ee_topright = transformer.transform(extent[3], extent[2])  # top-right corner
        nn_bottomright, ee_bottomright = transformer.transform(extent[1], extent[2])  # bottom-right corner
        nn_topleft, ee_topleft = transformer.transform(extent[3], extent[0])

        return (
            [nn_bottomleft, ee_bottomleft, nn_topright, ee_topright],
            [nn_bottomright, ee_bottomright, nn_topleft, ee_topleft],
        )

    @staticmethod
    def _project_bounding_box(extent: List[float], auxiliar_coords: List[float]) -> Tuple[List[float], List[float]]:
        """Returns the bottom-left and top-right coordinates of the rectangular bounding box in the projected CRS
        that covers the bounding box defined from the bottom-left and top-right corners in lat/lon.

        Args:
            extent (list[float]): Coordinates in the projected CRS that are the transformation of the minimum and
                maximum latitude and longitude, in [n_bottomleft, e_bottomleft, n_topright, e_topright] order.
            auxiliar_coords (list[float]): Auxiliar coordinates in the projected CRS that are the transformation
                of the other two corners of the rectangular bounding box, in
                [n_bottomright, e_bottomright, n_topleft, e_topleft] order

        Returns:
            list[float]: The projected extent, in [n_bottomleft, e_bottomleft, n_topright, e_topright] order.
            list[float]: Raster cover bounds, in [n_topleft, e_topleft, n_bottomright, e_bottomright] order.
        """
        cover_n_bottomleft = min(extent[0], auxiliar_coords[0])  # min lat
        cover_e_bottomleft = min(extent[1], auxiliar_coords[3])  # min lon

        cover_n_topright = max(extent[2], auxiliar_coords[2])  # max lat
        cover_e_topright = max(extent[3], auxiliar_coords[1])  # max lon

        cover_n_topleft = max(extent[2], auxiliar_coords[2])  # max lat
        cover_e_topleft = min(extent[1], auxiliar_coords[3])  # min lon

        cover_n_bottomright = min(extent[0], auxiliar_coords[0])  # min lat
        cover_e_bottomright = max(extent[3], auxiliar_coords[1])  # max lon

        return (
            [cover_n_bottomleft, cover_e_bottomleft, cover_e_topright, cover_n_topright],
            [cover_n_topleft, cover_e_topleft, cover_n_bottomright, cover_e_bottomright],
        )

    def _snap_extent_to_grid(self, extent: List[float]) -> List[float]:
        """Snaps the given extent to the grid.

        Args:
            extent (list[float]): The extent to snap.

        Returns:
            list[float]: The snapped extent.
        """
        return [round(coord / self.resolution) * self.resolution for coord in extent]

    def _extend_grid_extent(self, extent: List[float], extension_factor: int = 5) -> List[float]:
        """Extends the given extent by the specified factor in all directions.

        Args:
            extent (list[float]): The extent to extend.
            extension_factor (int, optional): The factor by which to extend the extent. Defaults to 5.

        Returns:
            list[float]: The extended extent.
        """
        extension_size = self.resolution * extension_factor
        return [
            extent[0] - extension_size,
            extent[1] - extension_size,
            extent[2] + extension_size,
            extent[3] + extension_size,
        ]

    def _extend_grid_raster_bounds(self, raster_bounds: List[float], extension_factor: int = 5) -> List[float]:
        """Extends the given extent by the specified factor in all directions.

        Args:.
            extent (list[float]): Raster cover bounds, in [x_topleft, y_topleft, x_bottomright, y_bottomright] order.
            extension_factor (int, optional): The factor by which to extend the extent. Defaults to 5.

        Returns:
            list[float]: The extended extent.
        """
        extension_size = self.resolution * extension_factor
        return [
            raster_bounds[0] + extension_size,  # n topleft
            raster_bounds[1] - extension_size,  # e topleft
            raster_bounds[2] - extension_size,  # n bottomright
            raster_bounds[3] + extension_size,  # e bottomright
        ]

    def _get_grid_height(self, raster_bounds: List[float]) -> int:
        """Calculates the height of the grid for the given extent.

        Args:
            raster_bounds (list[float]): The raster_bounds for which to calculate the grid height.

        Returns:
            int: The grid height.
        """
        return int((raster_bounds[0] - raster_bounds[2]) / self.resolution)

    def _get_grid_width(self, raster_bounds: List[float]) -> int:
        """Calculates the width of the grid for the given extent.

        Args:
            raster_bounds (list[float]): The raster_bounds for which to calculate the grid width.

        Returns:
            int: The grid width.
        """
        return int((raster_bounds[3] - raster_bounds[1]) / self.resolution)

    def process_latlon_extent(self, extent: List[float]) -> Tuple[List[float], List[float]]:
        """Takes an extent expressed in latitude and longitude (EPSG 4326), projects it into EPSG 3035, creates
        bounding box, snaps to grid, and extends it some extra tiles in each direction.

        Args:
            extent (list[float]): The extent in lat/lon to process. Ordering is [lon_min, lat_min, lon_max, lat_max].

        Returns:
            extent (list[float]): Coordinates of the rectangle/bounding box that covers the projected and extended
                extent. Order is [n_min, e_min, n_max, e_max] (bottom-left and top-right corners)
            raster_bounds (list[float]): Appropriate raster bounds
        """
        extent, auxiliar_coords = self._project_latlon_extent(extent)
        extent, raster_bounds = self._project_bounding_box(extent, auxiliar_coords)

        extent = self._snap_extent_to_grid(extent)
        raster_bounds = self._snap_extent_to_grid(raster_bounds)

        extent = self._extend_grid_extent(extent)
        raster_bounds = self._extend_grid_raster_bounds(raster_bounds)
        return extent, raster_bounds

    def _get_grid_blueprint(self, extent: List[float]) -> Tuple[DataFrame, List[float]]:
        """Generates a blueprint for the grid for the given extent as a raster of grid resolution.
        Splits initial raster into smaller rasters of size grid_partition_size x grid_partition_size.

        Args:
            extent (List[float]): The extent in lat/lon for which to generate the grid blueprint. Ordering must be
                [lon_min, lat_min, lon_max, lat_max].

        Returns:
            DataFrame: The grid blueprint.
            proj_extent (List[float]): Coordinates of the rectangle/bounding box that covers the projected and extended
                extent. Order is [n_min, e_min, n_max, e_max] (bottom-left and top-right corners)
        """
        proj_extent, raster_bounds = self.process_latlon_extent(extent)

        grid_height = self._get_grid_height(raster_bounds)
        grid_width = self._get_grid_width(raster_bounds)

        sdf = self.spark.sql(
            f"""SELECT RS_MakeEmptyRaster(1, "B", {grid_width}, 
                                {grid_height}, 
                                {raster_bounds[1]},
                                {raster_bounds[0]}, 
                                {self.resolution}, 
                               -{self.resolution}, 0.0, 0.0, {self.GRID_CRS_EPSG_CODE}) as raster"""
        )

        sdf = sdf.selectExpr(f"RS_TileExplode(raster,{self.grid_partition_size}, {self.grid_partition_size})")
        return sdf.repartition(sdf.count()), proj_extent

    @staticmethod
    def _get_polygon_sdf_extent(polygon_sdf: DataFrame) -> List[float]:
        """Gets the extent of the given polygon DataFrame. This method is currently used with geometry in
        EPSG 4326, following order used by Sedona where the first coordinate X contains longitude and the second
            coordinate Y contains latitude.

        Args:
            polygon_sdf (DataFrame): The polygon DataFrame.

        Returns:
            list[float]: The extent of the polygon DataFrame. Order: [x_min, y_min, x_max, y_max] where x and y
                refer to the horizontal (longitude) and vertical (latitude) coordinates of the geometry, respectively.
        """
        polygon_sdf = polygon_sdf.withColumn("bbox", STF.ST_Envelope(polygon_sdf["geometry"]))
        polygon_sdf = (
            polygon_sdf.withColumn("x_min", STF.ST_XMin(polygon_sdf["bbox"]))
            .withColumn("y_min", STF.ST_YMin(polygon_sdf["bbox"]))
            .withColumn("x_max", STF.ST_XMax(polygon_sdf["bbox"]))
            .withColumn("y_max", STF.ST_YMax(polygon_sdf["bbox"]))
        )

        return polygon_sdf.select("x_min", "y_min", "x_max", "y_max").collect()[0][0:]

    def _get_grid_intersection_with_mask(self, sdf: DataFrame, polygon_sdf: DataFrame) -> DataFrame:
        """Gets the intersection of the grid with the given polygon mask.

        Args:
            sdf (DataFrame): The DataFrame representing the grid.
            polygon_sdf (DataFrame): The DataFrame representing the mask in EPSG:4326. Sedona function expects
                first coordinate (X) to be longitude and second coordinate (Y) to be latitude.

        Returns:
            DataFrame: The DataFrame representing the intersection of the grid with the mask.
        """
        polygon_sdf = polygon_sdf.withColumn(
            "geometry",
            STF.ST_Transform(polygon_sdf["geometry"], F.lit("EPSG:4326"), F.lit(f"EPSG:{self.GRID_CRS_EPSG_CODE}")),
        )

        sdf = sdf.join(polygon_sdf, STP.ST_Intersects(sdf[self.geometry_col], polygon_sdf["geometry"]), "inner").drop(
            polygon_sdf["geometry"]
        )

        return sdf

    def _get_grid_id_from_centroids(self, sdf: DataFrame, n_origin: int, e_origin: int) -> DataFrame:
        """Takes a DataFrame that has point geometries in [self.geometry_col] column in EPSG:3035, representing the
        centroids of grid tiles, and creates the internal unsigned 4-byte identifier used internally by the pipeline.

        The 4-byte identifier consists of two parts:
            The two most significant bytes represent the easting coordinate (horizontal coordinate of EPSG:3035)
            The two least significant bytes represent the northing coordinate (vertical coordinate of EPSG:3035)

        Sedona represents the X and Y coordinates as the horizontal and vertical coordinates, respectively. It does
        not follow the coordinate order of the CRS.

        In order to fit all the necessary tiles into 4 bytes, we do a traslation of the coordinate system to have
        a different origin, defined by (e_origin, n_origin)

        Args:
            sdf (DataFrame): DataFrame containing grid tile centroids to which we want to add the grid ID.
            n_origin (int): Northing origin to be used in the internal 4-byte ID. In metres.
            e_origin (int): Easting origin to be used in the internal 4-byte ID. In metres.

        Returns:
            DataFrame: DataFrame with a grid ID column added
        """
        sdf = sdf.withColumn(
            self.grid_id_col,
            (
                F.shiftleft(
                    ((STF.ST_X(F.col(self.geometry_col)) - e_origin - self.resolution / 2) / self.resolution).cast(
                        IntegerType()
                    ),
                    16,
                )
                + (
                    ((STF.ST_Y(F.col(self.geometry_col)) - n_origin - self.resolution / 2) / self.resolution).cast(
                        IntegerType()
                    )
                )
            ),
        )

        # Add origin column
        sdf = sdf.withColumn(
            ColNames.origin,
            (
                F.shiftleft(F.lit(e_origin / self.resolution).cast(LongType()), 32)
                + F.lit(n_origin / self.resolution).cast(LongType())
            ).cast(LongType()),
        )
        return sdf

    def _get_grid_id_from_grid_tiles(self, sdf: DataFrame, n_origin: int, e_origin: int) -> DataFrame:
        """Takes a DataFrame that has tile geometries in [self.geometry_col] column in EPSG:3035, representing the
        grid tiles, and creates the internal unsigned 4-byte identifier used internally by the pipeline.

        The 4-byte identifier consists of two parts:
            The two most significant bytes represent the easting coordinate (horizontal coordinate of EPSG:3035)
            The two least significant bytes represent the northing coordinate (vertical coordinate of EPSG:3035)

        Sedona represents the X and Y coordinates as the horizontal and vertical coordinates, respectively. It does
        not follow the coordinate order of the CRS.

        In order to fit all the necessary tiles into 4 bytes, we do a traslation of the coordinate system to have
        a different origin, defined by (e_origin, n_origin)

        Args:
            sdf (DataFrame): DataFrame containing grid tile centroids to which we want to add the grid ID.
            n_origin (int): Northing origin to be used in the internal 4-byte ID. In metres.
            e_origin (int): Easting origin to be used in the internal 4-byte ID. In metres.

        Returns:
            DataFrame: DataFrame with a grid ID column added
        """
        sdf = sdf.withColumn(
            self.grid_id_col,
            (
                F.shiftleft(
                    ((STF.ST_XMin(F.col(self.geometry_col)) - e_origin) / self.resolution).cast(IntegerType()), 16
                )
                + (((STF.ST_YMin(F.col(self.geometry_col)) - n_origin) / self.resolution).cast(IntegerType()))
            ),
        )

        # Add origin column
        sdf = sdf.withColumn(
            ColNames.origin,
            (
                F.shiftleft(F.lit(e_origin / self.resolution).cast(LongType()), 32)
                + F.lit(n_origin / self.resolution).cast(LongType())
            ).cast(LongType()),
        )
        return sdf

    def cover_extent_with_grid_centroids(
        self, extent: List[float], n_origin: int = None, e_origin: int = None
    ) -> DataFrame:
        """Covers the given extent with grid centroids. It takes an extent expressed in EPSG:4326 and covers it
        with grid centroid point geometries in EPSG:3035, returning a DataFrame with these geometries, the internal
        4-byte grid ID and the origin used to define the 4-byte ID. If both `n_origin` and `e_origin` are provided,
        they are used as the origin of the ID; if not, the origin is taken from the provided extent.

        It is desirable to define the origin using `n_origin` and `e_origin` when one wants to cover several extents
        sharing the same origin, i.e. using the 4-byte grid ID defined in the same way for all of them.

        Args:
            extent (list[float]): The extent in lat/lon (EPSG:4326) to cover with grid centroids. Ordering must be
                [lon_min, lat_min, lon_max, lat_max].
            n_origin (int, optional): northing origin to be used for the 4-byte grid ID, in EPSG:3035 (metres).
                Defaults to None.
            e_origin (int, optional): easting origin to be used for the 4-byte grid ID, in EPSG:3035 (metres).
                Defaults to None.

        Returns:
            DataFrame: The DataFrame representing the grid centroids covering the extent, with their grid ID and origin
                columns.
        """
        if (n_origin is None and e_origin is not None) or (n_origin is not None and e_origin is None):
            raise ValueError("Either both or none of the arguments `n_origin` and `e_origin` must be passed")

        sdf, proj_extent = self._get_grid_blueprint(extent)

        sdf = sdf.selectExpr("explode(RS_PixelAsCentroids(tile, 1)) as exploded").selectExpr(
            f"exploded.geom as {self.geometry_col}"
        )

        if n_origin is not None:
            sdf = self._get_grid_id_from_centroids(sdf, n_origin=n_origin, e_origin=e_origin)
        else:
            sdf = self._get_grid_id_from_centroids(sdf, n_origin=proj_extent[0], e_origin=proj_extent[1])

        return sdf

    def cover_polygon_with_grid_centroids(
        self, polygon_sdf: DataFrame, n_origin: int = None, e_origin: int = None
    ) -> DataFrame:
        """Covers the given polygon with grid centroids. It takes an polygon expressed in EPSG:4326 and covers it
        with grid centroid point geometries in EPSG:3035, returning a DataFrame with these geometries, the internal
        4-byte grid ID and the origin used to define the 4-byte ID. If both `n_origin` and `e_origin` are provided,
        they are used as the origin of the ID; if not, the origin is taken from the extent covering the provided
        polygon.

        It is desirable to define the origin using `n_origin` and `e_origin` when one wants to cover several polygons
        sharing the same origin, i.e. using the 4-byte grid ID defined in the same way for all of them.

        Args:
            polygon_sdf (DataFrame): DataFrame containing a single row with a polygon in EPSG:4326 in a column named
                `geometry`.
            n_origin (int, optional): northing origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.
            e_origin (int, optional): easting origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

        Returns:
            DataFrame: The DataFrame representing the grid centroids covering the polygon, with their grid ID and origin
                columns.
        """
        extent = self._get_polygon_sdf_extent(polygon_sdf)

        sdf = self.cover_extent_with_grid_centroids(extent, n_origin, e_origin)

        sdf = self._get_grid_intersection_with_mask(sdf, polygon_sdf)

        return sdf

    def cover_extent_with_grid_tiles(
        self, extent: List[float], n_origin: int = None, e_origin: int = None
    ) -> Tuple[DataFrame, List[float]]:
        """Covers the given extent with grid tiles. It takes an extent expressed in EPSG:4326 and covers it
        with grid tile polygon geometries in EPSG:3035, returning a DataFrame with these geometries, the internal
        4-byte grid ID and the origin used to define the 4-byte ID. If both `n_origin` and `e_origin` are provided,
        they are used as the origin of the ID; if not, the origin is taken from the provided extent.

        It is desirable to define the origin using `n_origin` and `e_origin` when one wants to cover several extents
        sharing the same origin, i.e. using the 4-byte grid ID defined in the same way for all of them.

        Args:
            extent (list[float]): The extent in lat/lon (EPSG:4326) to cover with grid tiles. Ordering must be
                [lon_min, lat_min, lon_max, lat_max].
            n_origin (int, optional): northing origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.
            e_origin (int, optional): easting origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

        Returns:
            DataFrame: The DataFrame representing the grid tiles covering the extent, with their grid ID and origin
                columns.
        """
        if (n_origin is None and e_origin is not None) or (n_origin is not None and e_origin is None):
            raise ValueError("Either both or none of the arguments `n_origin` and `e_origin` must be passed")

        sdf, proj_extent = self._get_grid_blueprint(extent)

        sdf = sdf.selectExpr("explode(RS_PixelAsPolygons(tile, 1)) as exploded").selectExpr(
            f"exploded.geom as {self.geometry_col}"
        )

        if n_origin is not None:
            sdf = self._get_grid_id_from_grid_tiles(sdf, n_origin=n_origin, e_origin=e_origin)
        else:
            sdf = self._get_grid_id_from_grid_tiles(sdf, n_origin=proj_extent[0], e_origin=proj_extent[1])

        return sdf

    def cover_polygon_with_grid_tiles(self, polygon_sdf: DataFrame, n_origin: int, e_origin: int) -> DataFrame:
        """Covers the given polygon with grid tiles. It takes an polygon expressed in EPSG:4326 and covers it
        with grid tile polygon geometries in EPSG:3035, returning a DataFrame with these geometries, the internal
        4-byte grid ID and the origin used to define the 4-byte ID. If both `n_origin` and `e_origin` are provided,
        they are used as the origin of the ID; if not, the origin is taken from the polygon covering the provided
        polygon.

        It is desirable to define the origin using `n_origin` and `e_origin` when one wants to cover several polygons
        sharing the same origin, i.e. using the 4-byte grid ID defined in the same way for all of them.

        Args:
            polygon_sdf (DataFrame): DataFrame containing a single row with a polygon in EPSG:4326 in a column named
                `geometry`.
            n_origin (int, optional): northing origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.
            e_origin (int, optional): easting origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

        Returns:
            DataFrame: The DataFrame representing the grid tiles covering the polygon, with their grid ID and origin
                columns.
        """
        extent = self._get_polygon_sdf_extent(polygon_sdf)

        sdf = self.cover_extent_with_grid_tiles(extent, n_origin, e_origin)

        sdf = self._get_grid_intersection_with_mask(sdf, polygon_sdf)

        return sdf

    def grid_id_to_inspire_id(
        self, sdf: DataFrame, inspire_resolution: int, grid_id_col: str = None, origin: int = None
    ) -> DataFrame:
        """Function that takes a DataFrame containing 4-byte grid IDs and returns it with a new column containing
        the official INSPIRE grid ID string. Only accepted INSPIRE grid resolutions are 100m and 1km.

        It is expected that the grid ID column contains the internal representation for 100m grid tiles, and not for
        a coarser resolution. If the 100m INSPIRE grid ID was requested, the ID corresponding to the 100m grid tile
        represented by the internal grid ID is constructed. If the 1km INSPIRE grid ID was requested, the ID
        corresponding to the 1km grid tile containing the internal grid ID is constructed.

        By default, the function will use a ColNames.origin column of `sdf`. Only if the `origin` parameter is passed,
        the existence of this column will not be checked, and `origin` will be used as the origin of the 4-byte grid ID
        definition even if the column exists. This origin will be treated as an 8-byte integer, where the first (most
        significant) 4 bytes hold the easting origin divided by 100 and the last (least significant) 4 bytes hold the
        northing origin divided by 100. That is, taking the first 4 bytes and multiplying by 100 gets the easting
        value in metres (analogous for northing).

        Args:
            sdf (DataFrame): DataFrame containing the grid ID column, and a `ColNames.origin` column, to which the
                INSPIRE grid ID is to be added
            inspire_resolution (int): resolution for the INSPIRE grid ID. Currently accepts two value: `100` and `1000`.
            grid_id_col (str, optional): Name of the column containing the internal 4-byte grid ID. If None, the value
                `self.grid_id_col` is taken by default. Defaults to None
            origin (int, optional): If provided, it will be used as the origin of the definition of the 4-byte grid ID.
                It will ignore the ColNames.origin column even if it exists. If not provided, it is expected that
                `sdf` contains a ColNames.origin column, and throws an error otherwise.

        Returns:
            DataFrame: DataFrame with a new column, `ColNames.inspire_id`, containing the INSPIRE grid ID strings.

        Raises:
            ValueError: If the `inspire_resolution` is not 100 or 1000.
            ValueError: If the `origin` is not an integer.
            ValueError: If the `sdf` does not contain a ColNames.origin column and `origin` is not passed.
        """
        if grid_id_col is None:
            grid_id_col = self.grid_id_col
        if inspire_resolution not in self.ACCEPTED_RESOLUTIONS:
            raise ValueError(
                f"Expected INSPIRE resolutions are {self.ACCEPTED_RESOLUTIONS} -- received `{inspire_resolution}`"
            )
        if origin is not None:
            if not isinstance(origin, int):
                raise ValueError(f"`origin` parameter must be an integer if used -- found type {type(origin)}")
            origin_column = F.lit(origin).cast(LongType())
        else:
            if ColNames.origin not in sdf.columns:
                raise ValueError(f"`sdf` must contain a {ColNames.origin} column, or `origin` parameter must be passed")
            origin_column = F.col(ColNames.origin)

        sdf = sdf.withColumn(
            "easting",
            F.shiftrightunsigned(grid_id_col, 16).cast(LongType()) + F.shiftrightunsigned(origin_column, 32),
        ).withColumn(
            "northing",
            F.col(grid_id_col).bitwiseAND((1 << 16) - 1).cast(LongType()) + origin_column.bitwiseAND((1 << 32) - 1),
        )

        # Substract the units digit to get the ID for 1km
        if inspire_resolution == 1000:
            sdf = sdf.withColumn("northing", F.expr("northing DIV 10")).withColumn("easting", F.expr("easting DIV 10"))
        sdf = sdf.withColumn(
            ColNames.inspire_id,
            F.concat(
                F.lit(self._format_distance(inspire_resolution)),
                F.lit("N"),
                F.col("northing"),
                F.lit("E"),
                F.col("easting"),
            ),
        ).drop("northing", "easting")
        return sdf

    def grid_id_to_coarser_resolution(
        self, sdf: DataFrame, coarse_resolution: int, coarse_grid_id_col: str = None
    ) -> DataFrame:
        """This function takes a DataFrame that contains the grid ID representation of 100m grid tiles, and transforms
        it into a coarser resolution. It is always expected that the provided DataFrame has a grid ID that represents
        100m grid tiles (in the `self.grid_id_col column`), and not a different resolution.

        Notice that this method does not take into account the origin of the 4-byte grid IDs. Thus, the coarser grids
        need not be compatible with the INSPIRE definition of a resolution coarser than 100m.

        Args:
            sdf (DataFrame): DataFrame for which a coarser resolution grid ID will be computed
            coarse_resolution (int): coarser resolution to compute. Must be a multiple of `self.resolution`, i.e., 100.
            coarse_grid_id_col (str, optional): column that will hold the IDs of the grid tiles in the coarser
                resolution. If None, it will replace the original grid ID column. Defaults to None.

        Returns:
            DataFrame: DataFrame with the coarser grid IDs.
        """
        if coarse_resolution % self.resolution != 0:
            raise ValueError(f"Coarser resolution {coarse_resolution} must be a multiple of {self.resolution}")
        if coarse_resolution <= self.resolution:
            raise ValueError(f"Coarser resolution {coarse_resolution} must be greater than {self.resolution}")

        factor = coarse_resolution // self.resolution

        if coarse_grid_id_col is None:
            coarse_grid_id_col = self.grid_id_col

        sdf = sdf.withColumn("easting", F.shiftrightunsigned(ColNames.grid_id, 16)).withColumn(
            "northing", F.col(ColNames.grid_id).bitwiseAND((1 << 16) - 1)
        )

        sdf = sdf.withColumn("northing", F.col("northing") - F.col("northing") % factor).withColumn(
            "easting", F.col("easting") - F.col("easting") % factor
        )

        sdf = sdf.withColumn(coarse_grid_id_col, F.shiftleft(F.col("easting"), 16) + F.col("northing"))

        sdf = sdf.drop("northing", "easting")

        return sdf

    def grid_id_from_coarser_resolution(
        self, sdf: DataFrame, coarse_resolution: int, coarse_grid_id_col: str, new_grid_id_col: str = None
    ) -> DataFrame:
        """This function takes a DataFrame that contains the grid ID representation of grid tiles in a resolution
        coarser than 100m, and transforms it back into 100m.

        Args:
            sdf (DataFrame): DataFrame with grid IDs in a coarser resolution.
            coarse_resolution (int): coarser resolution of the grid IDs of the provided DataFrame. Must be a multiple
                of `self.resolution`, i.e., 100.
            coarse_grid_id_col (str): column that currently holds the IDs of the grid tiles in the coarser
                resolution.
            new_grid_id_col (str, optional): column that will hold the IDs of the grid tiles in the 100m resolution.
                If None, it will be set (and possible replace an existing column) as `self.grid_id_col`.
                    Defaults to None.

        Returns:
            DataFrame: DataFrame with the coarser grid IDs.
        """
        if coarse_resolution % self.resolution != 0:
            raise ValueError(f"Coarser resolution {coarse_resolution} must be a multiple of {self.resolution}")
        if coarse_resolution <= self.resolution:
            raise ValueError(f"Coarser resolution {coarse_resolution} must be greater than {self.resolution}")
        if new_grid_id_col is None:
            new_grid_id_col = self.grid_id_col

        factor = coarse_resolution // self.resolution
        offsets_df = self.spark.createDataFrame(
            [(i << 16) + j for i in range(factor) for j in range(factor)],
            schema=StructType([StructField("offset", IntegerType(), False)]),
        )

        offsets_df = F.broadcast(offsets_df)

        sdf = (
            sdf.crossJoin(offsets_df)
            .withColumn(new_grid_id_col, F.col(coarse_grid_id_col) + F.col("offset"))
            .drop("offset")
        )

        return sdf

    def inspire_id_to_grid_centroids(
        self, sdf: DataFrame, inspire_id_col: str = None, geometry_col: str = None
    ) -> DataFrame:
        """Function that takes a DataFrame containing INSPIRE grid ID strings and returns it with point geometries
        of the centroids of the corresponding grid tiles. It extracts the units and grid size from the first element
        of the DataFrame and uses it to construct the necessary geometries.

        Args:
            sdf (DataFrame): DataFrame containing the INSPIRE grid ID strings.
            inspire_id_col (str, optional): name of the column holding the INSPIRE grid IDs. If None, it is set to
                `ColNames.inspire_id`. Defaults to None.
            geometry_col (str, optional): column that will hold the grid centroid geometries. If None, it is set to
                `self.geometry`. Defaults to None.

        Returns:
            DataFrame: DataFrame with the grid centroid geometries
        """
        if inspire_id_col is None:
            inspire_id_col = ColNames.inspire_id
        if geometry_col is None:
            geometry_col = self.geometry_col

        # First, get the INSPIRE resolution
        resolution_str = sdf.select(F.regexp_extract(F.col(inspire_id_col), r"^(.*?)N", 1).alias("prefix")).first()[
            "prefix"
        ]

        # Parse and validate the INSPIRE resolution. Get the units and the grid size/resolution
        if resolution_str[-2:] == "km":
            try:
                grid_size = int(resolution_str[:-2])
            except ValueError:
                raise ValueError(f"Unexpected INSPIRE grid resolution string `{resolution_str}`")
            resolution_unit = 1000
        elif resolution_str[-1:] == "m":
            try:
                grid_size = int(resolution_str[:-1])
            except ValueError:
                raise ValueError(f"Unexpected INSPIRE grid resolution string `{resolution_str}`")
            resolution_unit = 100
        else:
            raise ValueError(f"Unexpected INSPIRE grid resolution string `{resolution_str}`")

        # Create geometries. Multiply INSPIRE ID northing and easting values by the resolution unit, and add half
        # the grid size to get the centroid of each tile

        # Sedona has (X, Y) = (Easting, Northing) for EPSG 3035
        sdf = sdf.withColumn(
            geometry_col,
            STC.ST_Point(
                F.regexp_extract(inspire_id_col, r"E(\d+)", 1).cast(LongType()) * resolution_unit + grid_size // 2,
                F.regexp_extract(inspire_id_col, r"N(\d+)E", 1).cast(LongType()) * resolution_unit + grid_size // 2,
            ),
        )

        # Set the CRS of the geometry
        sdf = sdf.withColumn(geometry_col, STF.ST_SetSRID(geometry_col, self.GRID_CRS_EPSG_CODE))

        return sdf

    def inspire_id_to_grid_tiles(
        self, sdf: DataFrame, inspire_id_col: str = None, geometry_col: str = None
    ) -> DataFrame:
        """Function that takes a DataFrame containing INSPIRE grid ID strings and returns it with polygon geometries
        of the corresponding grid tiles. It extracts the units and grid size from the first element of the DataFrame
        and uses it to construct the necessary geometries.

        Args:
            sdf (DataFrame): DataFrame containing the INSPIRE grid ID strings.
            inspire_id_col (str, optional): name of the column holding the INSPIRE grid IDs. If None, it is set to
                `ColNames.inspire_id`. Defaults to None.
            geometry_col (str, optional): column that will hold the grid tile geometries. If None, it is set to
                `ColNames.geometry`. Defaults to None.

        Returns:
            DataFrame: DataFrame with the grid centroid geometries
        """
        if inspire_id_col is None:
            inspire_id_col = ColNames.inspire_id
        if geometry_col is None:
            geometry_col = self.geometry_col

        # First, get the INSPIRE resolution
        resolution_str = sdf.select(F.regexp_extract(F.col(inspire_id_col), r"^(.*?)N", 1).alias("prefix")).first()[
            "prefix"
        ]

        # Parse and validate the INSPIRE resolution. Get the units and the grid size/resolution
        if resolution_str[-2:] == "km":
            try:
                grid_size = int(resolution_str[:-2])
            except ValueError:
                raise ValueError(f"Unexpected INSPIRE grid resolution string `{resolution_str}`")
            resolution_unit = 1000
            sdf = sdf.withColumn(
                "northing", F.regexp_extract(inspire_id_col, r"N(\d+)E", 1).cast(LongType()) * resolution_unit
            ).withColumn("easting", F.regexp_extract(inspire_id_col, r"E(\d+)", 1).cast(LongType()) * resolution_unit)
        elif resolution_str[-1:] == "m":
            try:
                grid_size = int(resolution_str[:-1])
            except ValueError:
                raise ValueError(f"Unexpected INSPIRE grid resolution string `{resolution_str}`")
            resolution_unit = 1
            sdf = sdf.withColumn(
                "northing", F.regexp_extract(inspire_id_col, r"N(\d+)E", 1).cast(LongType()) * grid_size
            ).withColumn("easting", F.regexp_extract(inspire_id_col, r"E(\d+)", 1).cast(LongType()) * grid_size)
        else:
            raise ValueError(f"Unexpected INSPIRE grid resolution string `{resolution_str}`")

        # Sedona has (X, Y) = (Easting, Northing) for EPSG 3035
        sdf = sdf.withColumn(
            geometry_col,
            STC.ST_PolygonFromEnvelope(
                F.col("easting"),  # min_x (min_easting)
                F.col("northing"),  # min_y, (min_northing)
                F.col("easting") + F.lit(resolution_unit * grid_size),  # max_x (max_easting)
                F.col("northing") + F.lit(resolution_unit * grid_size),  # max_y (max_northing)
            ),
        )

        sdf = sdf.drop("northing", "easting")

        # Set the CRS of the geometry
        sdf = sdf.withColumn(geometry_col, STF.ST_SetSRID(geometry_col, self.GRID_CRS_EPSG_CODE))

        return sdf

    def grid_ids_to_grid_centroids(
        self,
        sdf: DataFrame,
        grid_resolution: int,
        grid_id_col: str = None,
        geometry_col: str = None,
        origin: int = None,
    ) -> DataFrame:
        """Function that takes a DataFrame containing internal 4-byte grid IDs and returns it with point geometries
        of the centroids of the corresponding grid tiles.

        By default, the function will use a ColNames.origin column of `sdf`. Only if the `origin` parameter is passed,
        the existence of this column will not be checked, and `origin` will be used as the origin of the 4-byte grid ID
        definition even if the column exists. This origin will be treated as an 8-byte integer, where the first (most
        significant) 4 bytes hold the easting origin divided by 100 and the last (least significant) 4 bytes hold the
        northing origin divided by 100. That is, taking the first 4 bytes and multiplying by 100 gets the easting
        value in metres (analogous for northing).

        Args:
            sdf (DataFrame): DataFrame containing the internal 4-byte grid IDs.
            grid_resolution (int): resolution, in metres, of the current grid as represented by the internal 4-byte
                grid IDs. Must be a multiple of `self.resolution`. i.e. 100.
            grid_id_col (str, optional): column that holds the internal grid IDs. If None, it is set to
                `self.grid_id_col`. Defaults to None.
            geometry_col (str, optional): column that will hold the grid centroid geometries. If None, it is set to
                `self.geometry_col`. Defaults to None.
            origin (int, optional): If provided, it will be used as the origin of the definition of the 4-byte grid ID.
                It will ignore the ColNames.origin column even if it exists. If not provided, it is expected that
                `sdf` contains a ColNames.origin column, and throws an error otherwise.

        Returns:
            DataFrame: DataFrame with the grid centroid geometries
        """

        if grid_resolution % self.resolution != 0:
            raise ValueError(f"Grid resolution must be a multiple of {self.resolution}")
        if geometry_col is None:
            geometry_col = self.geometry_col
        if grid_id_col is None:
            grid_id_col = self.grid_id_col
        if origin is not None:
            if not isinstance(origin, int):
                raise ValueError(f"`origin` parameter must be an integer if used -- found type {type(origin)}")
            origin_column = F.lit(origin).cast(LongType())
        else:
            if ColNames.origin not in sdf.columns:
                raise ValueError(f"`sdf` must contain a {ColNames.origin} column, or `origin` parameter must be passed")
            origin_column = F.col(ColNames.origin)

        # For Sedona, (X, Y) == (Easting, Northing) in EPSG 3035
        sdf = sdf.withColumn(
            geometry_col,
            STC.ST_Point(
                (F.shiftrightunsigned(ColNames.grid_id, 16).cast(LongType()) + F.shiftrightunsigned(origin_column, 32))
                * self.resolution
                + grid_resolution // 2,
                (
                    F.col(ColNames.grid_id).bitwiseAND((1 << 16) - 1).cast(LongType())
                    + origin_column.bitwiseAND((1 << 32) - 1)
                )
                * self.resolution
                + grid_resolution // 2,
            ),
        )

        sdf = sdf.withColumn(geometry_col, STF.ST_SetSRID(geometry_col, self.GRID_CRS_EPSG_CODE))

        return sdf

    def grid_ids_to_grid_tiles(
        self, sdf: DataFrame, grid_resolution: int, geometry_col: str = None, origin: int = None
    ) -> DataFrame:
        """Function that takes a DataFrame containing internal 4-byte grid IDs and returns it with polygon geometries
        of the corresponding grid tiles.

        By default, the function will use a ColNames.origin column of `sdf`. Only if the `origin` parameter is passed,
        the existence of this column will not be checked, and `origin` will be used as the origin of the 4-byte grid ID
        definition even if the column exists. This origin will be treated as an 8-byte integer, where the first (most
        significant) 4 bytes hold the easting origin divided by 100 and the last (least significant) 4 bytes hold the
        northing origin divided by 100. That is, taking the first 4 bytes and multiplying by 100 gets the easting
        value in metres (analogous for northing).

        Args:
            sdf (DataFrame): DataFrame containing the internal 4-byte grid IDs.
            grid_resolution (int): resolution, in metres, of the current grid as represented by the internal 4-byte
                grid IDs. Must be a multiple of `self.resolution`. i.e. 100.
            geometry_col (str, optional): column that will hold the grid tile geometries. If None, it is set to
                `self.geometry_col`. Defaults to None.
            origin (int, optional): If provided, it will be used as the origin of the definition of the 4-byte grid ID.
                It will ignore the ColNames.origin column even if it exists. If not provided, it is expected that
                `sdf` contains a ColNames.origin column, and throws an error otherwise.

        Returns:
            DataFrame: DataFrame with the grid centroid geometries
        """
        if grid_resolution % self.resolution != 0:
            raise ValueError(f"Grid resolution must be a multiple of {self.resolution}")
        if geometry_col is None:
            geometry_col = self.geometry_col
        if origin is not None:
            if not isinstance(origin, int):
                raise ValueError(f"`origin` parameter must be an integer if used -- found type {type(origin)}")
            origin_column = F.lit(origin).cast(LongType())
        else:
            if ColNames.origin not in sdf.columns:
                raise ValueError(f"`sdf` must contain a {ColNames.origin} column, or `origin` parameter must be passed")
            origin_column = F.col(ColNames.origin)

        sdf = sdf.withColumn(
            "easting",
            (F.shiftrightunsigned(ColNames.grid_id, 16).cast(LongType()) + F.shiftrightunsigned(origin_column, 32))
            * self.resolution,
        ).withColumn(
            "northing",
            (
                F.col(ColNames.grid_id).bitwiseAND((1 << 16) - 1).cast(LongType())
                + origin_column.bitwiseAND((1 << 32) - 1)
            )
            * self.resolution,
        )

        # For Sedona, (X, Y) == (Easting, Northing) in EPSG 3035
        sdf = sdf.withColumn(
            geometry_col,
            STC.ST_PolygonFromEnvelope(
                F.col("easting"),  # min_x (min_easting)
                F.col("northing"),  # min_y (min_northing)
                F.col("easting") + grid_resolution,  # max_x (max_easting)
                F.col("northing") + grid_resolution,  # max_y (max_northing)
            ),
        )

        sdf = sdf.drop("northing", "easting")

        # Set the CRS of the geometries
        sdf = sdf.withColumn(geometry_col, STF.ST_SetSRID(geometry_col, self.GRID_CRS_EPSG_CODE))

        return sdf

cover_extent_with_grid_centroids(extent, n_origin=None, e_origin=None)

Covers the given extent with grid centroids. It takes an extent expressed in EPSG:4326 and covers it with grid centroid point geometries in EPSG:3035, returning a DataFrame with these geometries, the internal 4-byte grid ID and the origin used to define the 4-byte ID. If both n_origin and e_origin are provided, they are used as the origin of the ID; if not, the origin is taken from the provided extent.

It is desirable to define the origin using n_origin and e_origin when one wants to cover several extents sharing the same origin, i.e. using the 4-byte grid ID defined in the same way for all of them.

Parameters:

Name Type Description Default
extent list[float]

The extent in lat/lon (EPSG:4326) to cover with grid centroids. Ordering must be [lon_min, lat_min, lon_max, lat_max].

required
n_origin int

northing origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

None
e_origin int

easting origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

None

Returns:

Name Type Description
DataFrame DataFrame

The DataFrame representing the grid centroids covering the extent, with their grid ID and origin columns.

Source code in multimno/core/grid.py
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
def cover_extent_with_grid_centroids(
    self, extent: List[float], n_origin: int = None, e_origin: int = None
) -> DataFrame:
    """Covers the given extent with grid centroids. It takes an extent expressed in EPSG:4326 and covers it
    with grid centroid point geometries in EPSG:3035, returning a DataFrame with these geometries, the internal
    4-byte grid ID and the origin used to define the 4-byte ID. If both `n_origin` and `e_origin` are provided,
    they are used as the origin of the ID; if not, the origin is taken from the provided extent.

    It is desirable to define the origin using `n_origin` and `e_origin` when one wants to cover several extents
    sharing the same origin, i.e. using the 4-byte grid ID defined in the same way for all of them.

    Args:
        extent (list[float]): The extent in lat/lon (EPSG:4326) to cover with grid centroids. Ordering must be
            [lon_min, lat_min, lon_max, lat_max].
        n_origin (int, optional): northing origin to be used for the 4-byte grid ID, in EPSG:3035 (metres).
            Defaults to None.
        e_origin (int, optional): easting origin to be used for the 4-byte grid ID, in EPSG:3035 (metres).
            Defaults to None.

    Returns:
        DataFrame: The DataFrame representing the grid centroids covering the extent, with their grid ID and origin
            columns.
    """
    if (n_origin is None and e_origin is not None) or (n_origin is not None and e_origin is None):
        raise ValueError("Either both or none of the arguments `n_origin` and `e_origin` must be passed")

    sdf, proj_extent = self._get_grid_blueprint(extent)

    sdf = sdf.selectExpr("explode(RS_PixelAsCentroids(tile, 1)) as exploded").selectExpr(
        f"exploded.geom as {self.geometry_col}"
    )

    if n_origin is not None:
        sdf = self._get_grid_id_from_centroids(sdf, n_origin=n_origin, e_origin=e_origin)
    else:
        sdf = self._get_grid_id_from_centroids(sdf, n_origin=proj_extent[0], e_origin=proj_extent[1])

    return sdf

cover_extent_with_grid_tiles(extent, n_origin=None, e_origin=None)

Covers the given extent with grid tiles. It takes an extent expressed in EPSG:4326 and covers it with grid tile polygon geometries in EPSG:3035, returning a DataFrame with these geometries, the internal 4-byte grid ID and the origin used to define the 4-byte ID. If both n_origin and e_origin are provided, they are used as the origin of the ID; if not, the origin is taken from the provided extent.

It is desirable to define the origin using n_origin and e_origin when one wants to cover several extents sharing the same origin, i.e. using the 4-byte grid ID defined in the same way for all of them.

Parameters:

Name Type Description Default
extent list[float]

The extent in lat/lon (EPSG:4326) to cover with grid tiles. Ordering must be [lon_min, lat_min, lon_max, lat_max].

required
n_origin int

northing origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

None
e_origin int

easting origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

None

Returns:

Name Type Description
DataFrame Tuple[DataFrame, List[float]]

The DataFrame representing the grid tiles covering the extent, with their grid ID and origin columns.

Source code in multimno/core/grid.py
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
def cover_extent_with_grid_tiles(
    self, extent: List[float], n_origin: int = None, e_origin: int = None
) -> Tuple[DataFrame, List[float]]:
    """Covers the given extent with grid tiles. It takes an extent expressed in EPSG:4326 and covers it
    with grid tile polygon geometries in EPSG:3035, returning a DataFrame with these geometries, the internal
    4-byte grid ID and the origin used to define the 4-byte ID. If both `n_origin` and `e_origin` are provided,
    they are used as the origin of the ID; if not, the origin is taken from the provided extent.

    It is desirable to define the origin using `n_origin` and `e_origin` when one wants to cover several extents
    sharing the same origin, i.e. using the 4-byte grid ID defined in the same way for all of them.

    Args:
        extent (list[float]): The extent in lat/lon (EPSG:4326) to cover with grid tiles. Ordering must be
            [lon_min, lat_min, lon_max, lat_max].
        n_origin (int, optional): northing origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.
        e_origin (int, optional): easting origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

    Returns:
        DataFrame: The DataFrame representing the grid tiles covering the extent, with their grid ID and origin
            columns.
    """
    if (n_origin is None and e_origin is not None) or (n_origin is not None and e_origin is None):
        raise ValueError("Either both or none of the arguments `n_origin` and `e_origin` must be passed")

    sdf, proj_extent = self._get_grid_blueprint(extent)

    sdf = sdf.selectExpr("explode(RS_PixelAsPolygons(tile, 1)) as exploded").selectExpr(
        f"exploded.geom as {self.geometry_col}"
    )

    if n_origin is not None:
        sdf = self._get_grid_id_from_grid_tiles(sdf, n_origin=n_origin, e_origin=e_origin)
    else:
        sdf = self._get_grid_id_from_grid_tiles(sdf, n_origin=proj_extent[0], e_origin=proj_extent[1])

    return sdf

cover_polygon_with_grid_centroids(polygon_sdf, n_origin=None, e_origin=None)

Covers the given polygon with grid centroids. It takes an polygon expressed in EPSG:4326 and covers it with grid centroid point geometries in EPSG:3035, returning a DataFrame with these geometries, the internal 4-byte grid ID and the origin used to define the 4-byte ID. If both n_origin and e_origin are provided, they are used as the origin of the ID; if not, the origin is taken from the extent covering the provided polygon.

It is desirable to define the origin using n_origin and e_origin when one wants to cover several polygons sharing the same origin, i.e. using the 4-byte grid ID defined in the same way for all of them.

Parameters:

Name Type Description Default
polygon_sdf DataFrame

DataFrame containing a single row with a polygon in EPSG:4326 in a column named geometry.

required
n_origin int

northing origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

None
e_origin int

easting origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

None

Returns:

Name Type Description
DataFrame DataFrame

The DataFrame representing the grid centroids covering the polygon, with their grid ID and origin columns.

Source code in multimno/core/grid.py
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
def cover_polygon_with_grid_centroids(
    self, polygon_sdf: DataFrame, n_origin: int = None, e_origin: int = None
) -> DataFrame:
    """Covers the given polygon with grid centroids. It takes an polygon expressed in EPSG:4326 and covers it
    with grid centroid point geometries in EPSG:3035, returning a DataFrame with these geometries, the internal
    4-byte grid ID and the origin used to define the 4-byte ID. If both `n_origin` and `e_origin` are provided,
    they are used as the origin of the ID; if not, the origin is taken from the extent covering the provided
    polygon.

    It is desirable to define the origin using `n_origin` and `e_origin` when one wants to cover several polygons
    sharing the same origin, i.e. using the 4-byte grid ID defined in the same way for all of them.

    Args:
        polygon_sdf (DataFrame): DataFrame containing a single row with a polygon in EPSG:4326 in a column named
            `geometry`.
        n_origin (int, optional): northing origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.
        e_origin (int, optional): easting origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

    Returns:
        DataFrame: The DataFrame representing the grid centroids covering the polygon, with their grid ID and origin
            columns.
    """
    extent = self._get_polygon_sdf_extent(polygon_sdf)

    sdf = self.cover_extent_with_grid_centroids(extent, n_origin, e_origin)

    sdf = self._get_grid_intersection_with_mask(sdf, polygon_sdf)

    return sdf

cover_polygon_with_grid_tiles(polygon_sdf, n_origin, e_origin)

Covers the given polygon with grid tiles. It takes an polygon expressed in EPSG:4326 and covers it with grid tile polygon geometries in EPSG:3035, returning a DataFrame with these geometries, the internal 4-byte grid ID and the origin used to define the 4-byte ID. If both n_origin and e_origin are provided, they are used as the origin of the ID; if not, the origin is taken from the polygon covering the provided polygon.

It is desirable to define the origin using n_origin and e_origin when one wants to cover several polygons sharing the same origin, i.e. using the 4-byte grid ID defined in the same way for all of them.

Parameters:

Name Type Description Default
polygon_sdf DataFrame

DataFrame containing a single row with a polygon in EPSG:4326 in a column named geometry.

required
n_origin int

northing origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

required
e_origin int

easting origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

required

Returns:

Name Type Description
DataFrame DataFrame

The DataFrame representing the grid tiles covering the polygon, with their grid ID and origin columns.

Source code in multimno/core/grid.py
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
def cover_polygon_with_grid_tiles(self, polygon_sdf: DataFrame, n_origin: int, e_origin: int) -> DataFrame:
    """Covers the given polygon with grid tiles. It takes an polygon expressed in EPSG:4326 and covers it
    with grid tile polygon geometries in EPSG:3035, returning a DataFrame with these geometries, the internal
    4-byte grid ID and the origin used to define the 4-byte ID. If both `n_origin` and `e_origin` are provided,
    they are used as the origin of the ID; if not, the origin is taken from the polygon covering the provided
    polygon.

    It is desirable to define the origin using `n_origin` and `e_origin` when one wants to cover several polygons
    sharing the same origin, i.e. using the 4-byte grid ID defined in the same way for all of them.

    Args:
        polygon_sdf (DataFrame): DataFrame containing a single row with a polygon in EPSG:4326 in a column named
            `geometry`.
        n_origin (int, optional): northing origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.
        e_origin (int, optional): easting origin to be used for the 4-byte grid ID, in EPSG:3035 (metres). Defaults to None.

    Returns:
        DataFrame: The DataFrame representing the grid tiles covering the polygon, with their grid ID and origin
            columns.
    """
    extent = self._get_polygon_sdf_extent(polygon_sdf)

    sdf = self.cover_extent_with_grid_tiles(extent, n_origin, e_origin)

    sdf = self._get_grid_intersection_with_mask(sdf, polygon_sdf)

    return sdf

grid_id_from_coarser_resolution(sdf, coarse_resolution, coarse_grid_id_col, new_grid_id_col=None)

This function takes a DataFrame that contains the grid ID representation of grid tiles in a resolution coarser than 100m, and transforms it back into 100m.

Parameters:

Name Type Description Default
sdf DataFrame

DataFrame with grid IDs in a coarser resolution.

required
coarse_resolution int

coarser resolution of the grid IDs of the provided DataFrame. Must be a multiple of self.resolution, i.e., 100.

required
coarse_grid_id_col str

column that currently holds the IDs of the grid tiles in the coarser resolution.

required
new_grid_id_col str

column that will hold the IDs of the grid tiles in the 100m resolution. If None, it will be set (and possible replace an existing column) as self.grid_id_col. Defaults to None.

None

Returns:

Name Type Description
DataFrame DataFrame

DataFrame with the coarser grid IDs.

Source code in multimno/core/grid.py
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
def grid_id_from_coarser_resolution(
    self, sdf: DataFrame, coarse_resolution: int, coarse_grid_id_col: str, new_grid_id_col: str = None
) -> DataFrame:
    """This function takes a DataFrame that contains the grid ID representation of grid tiles in a resolution
    coarser than 100m, and transforms it back into 100m.

    Args:
        sdf (DataFrame): DataFrame with grid IDs in a coarser resolution.
        coarse_resolution (int): coarser resolution of the grid IDs of the provided DataFrame. Must be a multiple
            of `self.resolution`, i.e., 100.
        coarse_grid_id_col (str): column that currently holds the IDs of the grid tiles in the coarser
            resolution.
        new_grid_id_col (str, optional): column that will hold the IDs of the grid tiles in the 100m resolution.
            If None, it will be set (and possible replace an existing column) as `self.grid_id_col`.
                Defaults to None.

    Returns:
        DataFrame: DataFrame with the coarser grid IDs.
    """
    if coarse_resolution % self.resolution != 0:
        raise ValueError(f"Coarser resolution {coarse_resolution} must be a multiple of {self.resolution}")
    if coarse_resolution <= self.resolution:
        raise ValueError(f"Coarser resolution {coarse_resolution} must be greater than {self.resolution}")
    if new_grid_id_col is None:
        new_grid_id_col = self.grid_id_col

    factor = coarse_resolution // self.resolution
    offsets_df = self.spark.createDataFrame(
        [(i << 16) + j for i in range(factor) for j in range(factor)],
        schema=StructType([StructField("offset", IntegerType(), False)]),
    )

    offsets_df = F.broadcast(offsets_df)

    sdf = (
        sdf.crossJoin(offsets_df)
        .withColumn(new_grid_id_col, F.col(coarse_grid_id_col) + F.col("offset"))
        .drop("offset")
    )

    return sdf

grid_id_to_coarser_resolution(sdf, coarse_resolution, coarse_grid_id_col=None)

This function takes a DataFrame that contains the grid ID representation of 100m grid tiles, and transforms it into a coarser resolution. It is always expected that the provided DataFrame has a grid ID that represents 100m grid tiles (in the self.grid_id_col column), and not a different resolution.

Notice that this method does not take into account the origin of the 4-byte grid IDs. Thus, the coarser grids need not be compatible with the INSPIRE definition of a resolution coarser than 100m.

Parameters:

Name Type Description Default
sdf DataFrame

DataFrame for which a coarser resolution grid ID will be computed

required
coarse_resolution int

coarser resolution to compute. Must be a multiple of self.resolution, i.e., 100.

required
coarse_grid_id_col str

column that will hold the IDs of the grid tiles in the coarser resolution. If None, it will replace the original grid ID column. Defaults to None.

None

Returns:

Name Type Description
DataFrame DataFrame

DataFrame with the coarser grid IDs.

Source code in multimno/core/grid.py
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
def grid_id_to_coarser_resolution(
    self, sdf: DataFrame, coarse_resolution: int, coarse_grid_id_col: str = None
) -> DataFrame:
    """This function takes a DataFrame that contains the grid ID representation of 100m grid tiles, and transforms
    it into a coarser resolution. It is always expected that the provided DataFrame has a grid ID that represents
    100m grid tiles (in the `self.grid_id_col column`), and not a different resolution.

    Notice that this method does not take into account the origin of the 4-byte grid IDs. Thus, the coarser grids
    need not be compatible with the INSPIRE definition of a resolution coarser than 100m.

    Args:
        sdf (DataFrame): DataFrame for which a coarser resolution grid ID will be computed
        coarse_resolution (int): coarser resolution to compute. Must be a multiple of `self.resolution`, i.e., 100.
        coarse_grid_id_col (str, optional): column that will hold the IDs of the grid tiles in the coarser
            resolution. If None, it will replace the original grid ID column. Defaults to None.

    Returns:
        DataFrame: DataFrame with the coarser grid IDs.
    """
    if coarse_resolution % self.resolution != 0:
        raise ValueError(f"Coarser resolution {coarse_resolution} must be a multiple of {self.resolution}")
    if coarse_resolution <= self.resolution:
        raise ValueError(f"Coarser resolution {coarse_resolution} must be greater than {self.resolution}")

    factor = coarse_resolution // self.resolution

    if coarse_grid_id_col is None:
        coarse_grid_id_col = self.grid_id_col

    sdf = sdf.withColumn("easting", F.shiftrightunsigned(ColNames.grid_id, 16)).withColumn(
        "northing", F.col(ColNames.grid_id).bitwiseAND((1 << 16) - 1)
    )

    sdf = sdf.withColumn("northing", F.col("northing") - F.col("northing") % factor).withColumn(
        "easting", F.col("easting") - F.col("easting") % factor
    )

    sdf = sdf.withColumn(coarse_grid_id_col, F.shiftleft(F.col("easting"), 16) + F.col("northing"))

    sdf = sdf.drop("northing", "easting")

    return sdf

grid_id_to_inspire_id(sdf, inspire_resolution, grid_id_col=None, origin=None)

Function that takes a DataFrame containing 4-byte grid IDs and returns it with a new column containing the official INSPIRE grid ID string. Only accepted INSPIRE grid resolutions are 100m and 1km.

It is expected that the grid ID column contains the internal representation for 100m grid tiles, and not for a coarser resolution. If the 100m INSPIRE grid ID was requested, the ID corresponding to the 100m grid tile represented by the internal grid ID is constructed. If the 1km INSPIRE grid ID was requested, the ID corresponding to the 1km grid tile containing the internal grid ID is constructed.

By default, the function will use a ColNames.origin column of sdf. Only if the origin parameter is passed, the existence of this column will not be checked, and origin will be used as the origin of the 4-byte grid ID definition even if the column exists. This origin will be treated as an 8-byte integer, where the first (most significant) 4 bytes hold the easting origin divided by 100 and the last (least significant) 4 bytes hold the northing origin divided by 100. That is, taking the first 4 bytes and multiplying by 100 gets the easting value in metres (analogous for northing).

Parameters:

Name Type Description Default
sdf DataFrame

DataFrame containing the grid ID column, and a ColNames.origin column, to which the INSPIRE grid ID is to be added

required
inspire_resolution int

resolution for the INSPIRE grid ID. Currently accepts two value: 100 and 1000.

required
grid_id_col str

Name of the column containing the internal 4-byte grid ID. If None, the value self.grid_id_col is taken by default. Defaults to None

None
origin int

If provided, it will be used as the origin of the definition of the 4-byte grid ID. It will ignore the ColNames.origin column even if it exists. If not provided, it is expected that sdf contains a ColNames.origin column, and throws an error otherwise.

None

Returns:

Name Type Description
DataFrame DataFrame

DataFrame with a new column, ColNames.inspire_id, containing the INSPIRE grid ID strings.

Raises:

Type Description
ValueError

If the inspire_resolution is not 100 or 1000.

ValueError

If the origin is not an integer.

ValueError

If the sdf does not contain a ColNames.origin column and origin is not passed.

Source code in multimno/core/grid.py
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
def grid_id_to_inspire_id(
    self, sdf: DataFrame, inspire_resolution: int, grid_id_col: str = None, origin: int = None
) -> DataFrame:
    """Function that takes a DataFrame containing 4-byte grid IDs and returns it with a new column containing
    the official INSPIRE grid ID string. Only accepted INSPIRE grid resolutions are 100m and 1km.

    It is expected that the grid ID column contains the internal representation for 100m grid tiles, and not for
    a coarser resolution. If the 100m INSPIRE grid ID was requested, the ID corresponding to the 100m grid tile
    represented by the internal grid ID is constructed. If the 1km INSPIRE grid ID was requested, the ID
    corresponding to the 1km grid tile containing the internal grid ID is constructed.

    By default, the function will use a ColNames.origin column of `sdf`. Only if the `origin` parameter is passed,
    the existence of this column will not be checked, and `origin` will be used as the origin of the 4-byte grid ID
    definition even if the column exists. This origin will be treated as an 8-byte integer, where the first (most
    significant) 4 bytes hold the easting origin divided by 100 and the last (least significant) 4 bytes hold the
    northing origin divided by 100. That is, taking the first 4 bytes and multiplying by 100 gets the easting
    value in metres (analogous for northing).

    Args:
        sdf (DataFrame): DataFrame containing the grid ID column, and a `ColNames.origin` column, to which the
            INSPIRE grid ID is to be added
        inspire_resolution (int): resolution for the INSPIRE grid ID. Currently accepts two value: `100` and `1000`.
        grid_id_col (str, optional): Name of the column containing the internal 4-byte grid ID. If None, the value
            `self.grid_id_col` is taken by default. Defaults to None
        origin (int, optional): If provided, it will be used as the origin of the definition of the 4-byte grid ID.
            It will ignore the ColNames.origin column even if it exists. If not provided, it is expected that
            `sdf` contains a ColNames.origin column, and throws an error otherwise.

    Returns:
        DataFrame: DataFrame with a new column, `ColNames.inspire_id`, containing the INSPIRE grid ID strings.

    Raises:
        ValueError: If the `inspire_resolution` is not 100 or 1000.
        ValueError: If the `origin` is not an integer.
        ValueError: If the `sdf` does not contain a ColNames.origin column and `origin` is not passed.
    """
    if grid_id_col is None:
        grid_id_col = self.grid_id_col
    if inspire_resolution not in self.ACCEPTED_RESOLUTIONS:
        raise ValueError(
            f"Expected INSPIRE resolutions are {self.ACCEPTED_RESOLUTIONS} -- received `{inspire_resolution}`"
        )
    if origin is not None:
        if not isinstance(origin, int):
            raise ValueError(f"`origin` parameter must be an integer if used -- found type {type(origin)}")
        origin_column = F.lit(origin).cast(LongType())
    else:
        if ColNames.origin not in sdf.columns:
            raise ValueError(f"`sdf` must contain a {ColNames.origin} column, or `origin` parameter must be passed")
        origin_column = F.col(ColNames.origin)

    sdf = sdf.withColumn(
        "easting",
        F.shiftrightunsigned(grid_id_col, 16).cast(LongType()) + F.shiftrightunsigned(origin_column, 32),
    ).withColumn(
        "northing",
        F.col(grid_id_col).bitwiseAND((1 << 16) - 1).cast(LongType()) + origin_column.bitwiseAND((1 << 32) - 1),
    )

    # Substract the units digit to get the ID for 1km
    if inspire_resolution == 1000:
        sdf = sdf.withColumn("northing", F.expr("northing DIV 10")).withColumn("easting", F.expr("easting DIV 10"))
    sdf = sdf.withColumn(
        ColNames.inspire_id,
        F.concat(
            F.lit(self._format_distance(inspire_resolution)),
            F.lit("N"),
            F.col("northing"),
            F.lit("E"),
            F.col("easting"),
        ),
    ).drop("northing", "easting")
    return sdf

grid_ids_to_grid_centroids(sdf, grid_resolution, grid_id_col=None, geometry_col=None, origin=None)

Function that takes a DataFrame containing internal 4-byte grid IDs and returns it with point geometries of the centroids of the corresponding grid tiles.

By default, the function will use a ColNames.origin column of sdf. Only if the origin parameter is passed, the existence of this column will not be checked, and origin will be used as the origin of the 4-byte grid ID definition even if the column exists. This origin will be treated as an 8-byte integer, where the first (most significant) 4 bytes hold the easting origin divided by 100 and the last (least significant) 4 bytes hold the northing origin divided by 100. That is, taking the first 4 bytes and multiplying by 100 gets the easting value in metres (analogous for northing).

Parameters:

Name Type Description Default
sdf DataFrame

DataFrame containing the internal 4-byte grid IDs.

required
grid_resolution int

resolution, in metres, of the current grid as represented by the internal 4-byte grid IDs. Must be a multiple of self.resolution. i.e. 100.

required
grid_id_col str

column that holds the internal grid IDs. If None, it is set to self.grid_id_col. Defaults to None.

None
geometry_col str

column that will hold the grid centroid geometries. If None, it is set to self.geometry_col. Defaults to None.

None
origin int

If provided, it will be used as the origin of the definition of the 4-byte grid ID. It will ignore the ColNames.origin column even if it exists. If not provided, it is expected that sdf contains a ColNames.origin column, and throws an error otherwise.

None

Returns:

Name Type Description
DataFrame DataFrame

DataFrame with the grid centroid geometries

Source code in multimno/core/grid.py
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
def grid_ids_to_grid_centroids(
    self,
    sdf: DataFrame,
    grid_resolution: int,
    grid_id_col: str = None,
    geometry_col: str = None,
    origin: int = None,
) -> DataFrame:
    """Function that takes a DataFrame containing internal 4-byte grid IDs and returns it with point geometries
    of the centroids of the corresponding grid tiles.

    By default, the function will use a ColNames.origin column of `sdf`. Only if the `origin` parameter is passed,
    the existence of this column will not be checked, and `origin` will be used as the origin of the 4-byte grid ID
    definition even if the column exists. This origin will be treated as an 8-byte integer, where the first (most
    significant) 4 bytes hold the easting origin divided by 100 and the last (least significant) 4 bytes hold the
    northing origin divided by 100. That is, taking the first 4 bytes and multiplying by 100 gets the easting
    value in metres (analogous for northing).

    Args:
        sdf (DataFrame): DataFrame containing the internal 4-byte grid IDs.
        grid_resolution (int): resolution, in metres, of the current grid as represented by the internal 4-byte
            grid IDs. Must be a multiple of `self.resolution`. i.e. 100.
        grid_id_col (str, optional): column that holds the internal grid IDs. If None, it is set to
            `self.grid_id_col`. Defaults to None.
        geometry_col (str, optional): column that will hold the grid centroid geometries. If None, it is set to
            `self.geometry_col`. Defaults to None.
        origin (int, optional): If provided, it will be used as the origin of the definition of the 4-byte grid ID.
            It will ignore the ColNames.origin column even if it exists. If not provided, it is expected that
            `sdf` contains a ColNames.origin column, and throws an error otherwise.

    Returns:
        DataFrame: DataFrame with the grid centroid geometries
    """

    if grid_resolution % self.resolution != 0:
        raise ValueError(f"Grid resolution must be a multiple of {self.resolution}")
    if geometry_col is None:
        geometry_col = self.geometry_col
    if grid_id_col is None:
        grid_id_col = self.grid_id_col
    if origin is not None:
        if not isinstance(origin, int):
            raise ValueError(f"`origin` parameter must be an integer if used -- found type {type(origin)}")
        origin_column = F.lit(origin).cast(LongType())
    else:
        if ColNames.origin not in sdf.columns:
            raise ValueError(f"`sdf` must contain a {ColNames.origin} column, or `origin` parameter must be passed")
        origin_column = F.col(ColNames.origin)

    # For Sedona, (X, Y) == (Easting, Northing) in EPSG 3035
    sdf = sdf.withColumn(
        geometry_col,
        STC.ST_Point(
            (F.shiftrightunsigned(ColNames.grid_id, 16).cast(LongType()) + F.shiftrightunsigned(origin_column, 32))
            * self.resolution
            + grid_resolution // 2,
            (
                F.col(ColNames.grid_id).bitwiseAND((1 << 16) - 1).cast(LongType())
                + origin_column.bitwiseAND((1 << 32) - 1)
            )
            * self.resolution
            + grid_resolution // 2,
        ),
    )

    sdf = sdf.withColumn(geometry_col, STF.ST_SetSRID(geometry_col, self.GRID_CRS_EPSG_CODE))

    return sdf

grid_ids_to_grid_tiles(sdf, grid_resolution, geometry_col=None, origin=None)

Function that takes a DataFrame containing internal 4-byte grid IDs and returns it with polygon geometries of the corresponding grid tiles.

By default, the function will use a ColNames.origin column of sdf. Only if the origin parameter is passed, the existence of this column will not be checked, and origin will be used as the origin of the 4-byte grid ID definition even if the column exists. This origin will be treated as an 8-byte integer, where the first (most significant) 4 bytes hold the easting origin divided by 100 and the last (least significant) 4 bytes hold the northing origin divided by 100. That is, taking the first 4 bytes and multiplying by 100 gets the easting value in metres (analogous for northing).

Parameters:

Name Type Description Default
sdf DataFrame

DataFrame containing the internal 4-byte grid IDs.

required
grid_resolution int

resolution, in metres, of the current grid as represented by the internal 4-byte grid IDs. Must be a multiple of self.resolution. i.e. 100.

required
geometry_col str

column that will hold the grid tile geometries. If None, it is set to self.geometry_col. Defaults to None.

None
origin int

If provided, it will be used as the origin of the definition of the 4-byte grid ID. It will ignore the ColNames.origin column even if it exists. If not provided, it is expected that sdf contains a ColNames.origin column, and throws an error otherwise.

None

Returns:

Name Type Description
DataFrame DataFrame

DataFrame with the grid centroid geometries

Source code in multimno/core/grid.py
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
def grid_ids_to_grid_tiles(
    self, sdf: DataFrame, grid_resolution: int, geometry_col: str = None, origin: int = None
) -> DataFrame:
    """Function that takes a DataFrame containing internal 4-byte grid IDs and returns it with polygon geometries
    of the corresponding grid tiles.

    By default, the function will use a ColNames.origin column of `sdf`. Only if the `origin` parameter is passed,
    the existence of this column will not be checked, and `origin` will be used as the origin of the 4-byte grid ID
    definition even if the column exists. This origin will be treated as an 8-byte integer, where the first (most
    significant) 4 bytes hold the easting origin divided by 100 and the last (least significant) 4 bytes hold the
    northing origin divided by 100. That is, taking the first 4 bytes and multiplying by 100 gets the easting
    value in metres (analogous for northing).

    Args:
        sdf (DataFrame): DataFrame containing the internal 4-byte grid IDs.
        grid_resolution (int): resolution, in metres, of the current grid as represented by the internal 4-byte
            grid IDs. Must be a multiple of `self.resolution`. i.e. 100.
        geometry_col (str, optional): column that will hold the grid tile geometries. If None, it is set to
            `self.geometry_col`. Defaults to None.
        origin (int, optional): If provided, it will be used as the origin of the definition of the 4-byte grid ID.
            It will ignore the ColNames.origin column even if it exists. If not provided, it is expected that
            `sdf` contains a ColNames.origin column, and throws an error otherwise.

    Returns:
        DataFrame: DataFrame with the grid centroid geometries
    """
    if grid_resolution % self.resolution != 0:
        raise ValueError(f"Grid resolution must be a multiple of {self.resolution}")
    if geometry_col is None:
        geometry_col = self.geometry_col
    if origin is not None:
        if not isinstance(origin, int):
            raise ValueError(f"`origin` parameter must be an integer if used -- found type {type(origin)}")
        origin_column = F.lit(origin).cast(LongType())
    else:
        if ColNames.origin not in sdf.columns:
            raise ValueError(f"`sdf` must contain a {ColNames.origin} column, or `origin` parameter must be passed")
        origin_column = F.col(ColNames.origin)

    sdf = sdf.withColumn(
        "easting",
        (F.shiftrightunsigned(ColNames.grid_id, 16).cast(LongType()) + F.shiftrightunsigned(origin_column, 32))
        * self.resolution,
    ).withColumn(
        "northing",
        (
            F.col(ColNames.grid_id).bitwiseAND((1 << 16) - 1).cast(LongType())
            + origin_column.bitwiseAND((1 << 32) - 1)
        )
        * self.resolution,
    )

    # For Sedona, (X, Y) == (Easting, Northing) in EPSG 3035
    sdf = sdf.withColumn(
        geometry_col,
        STC.ST_PolygonFromEnvelope(
            F.col("easting"),  # min_x (min_easting)
            F.col("northing"),  # min_y (min_northing)
            F.col("easting") + grid_resolution,  # max_x (max_easting)
            F.col("northing") + grid_resolution,  # max_y (max_northing)
        ),
    )

    sdf = sdf.drop("northing", "easting")

    # Set the CRS of the geometries
    sdf = sdf.withColumn(geometry_col, STF.ST_SetSRID(geometry_col, self.GRID_CRS_EPSG_CODE))

    return sdf

inspire_id_to_grid_centroids(sdf, inspire_id_col=None, geometry_col=None)

Function that takes a DataFrame containing INSPIRE grid ID strings and returns it with point geometries of the centroids of the corresponding grid tiles. It extracts the units and grid size from the first element of the DataFrame and uses it to construct the necessary geometries.

Parameters:

Name Type Description Default
sdf DataFrame

DataFrame containing the INSPIRE grid ID strings.

required
inspire_id_col str

name of the column holding the INSPIRE grid IDs. If None, it is set to ColNames.inspire_id. Defaults to None.

None
geometry_col str

column that will hold the grid centroid geometries. If None, it is set to self.geometry. Defaults to None.

None

Returns:

Name Type Description
DataFrame DataFrame

DataFrame with the grid centroid geometries

Source code in multimno/core/grid.py
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
def inspire_id_to_grid_centroids(
    self, sdf: DataFrame, inspire_id_col: str = None, geometry_col: str = None
) -> DataFrame:
    """Function that takes a DataFrame containing INSPIRE grid ID strings and returns it with point geometries
    of the centroids of the corresponding grid tiles. It extracts the units and grid size from the first element
    of the DataFrame and uses it to construct the necessary geometries.

    Args:
        sdf (DataFrame): DataFrame containing the INSPIRE grid ID strings.
        inspire_id_col (str, optional): name of the column holding the INSPIRE grid IDs. If None, it is set to
            `ColNames.inspire_id`. Defaults to None.
        geometry_col (str, optional): column that will hold the grid centroid geometries. If None, it is set to
            `self.geometry`. Defaults to None.

    Returns:
        DataFrame: DataFrame with the grid centroid geometries
    """
    if inspire_id_col is None:
        inspire_id_col = ColNames.inspire_id
    if geometry_col is None:
        geometry_col = self.geometry_col

    # First, get the INSPIRE resolution
    resolution_str = sdf.select(F.regexp_extract(F.col(inspire_id_col), r"^(.*?)N", 1).alias("prefix")).first()[
        "prefix"
    ]

    # Parse and validate the INSPIRE resolution. Get the units and the grid size/resolution
    if resolution_str[-2:] == "km":
        try:
            grid_size = int(resolution_str[:-2])
        except ValueError:
            raise ValueError(f"Unexpected INSPIRE grid resolution string `{resolution_str}`")
        resolution_unit = 1000
    elif resolution_str[-1:] == "m":
        try:
            grid_size = int(resolution_str[:-1])
        except ValueError:
            raise ValueError(f"Unexpected INSPIRE grid resolution string `{resolution_str}`")
        resolution_unit = 100
    else:
        raise ValueError(f"Unexpected INSPIRE grid resolution string `{resolution_str}`")

    # Create geometries. Multiply INSPIRE ID northing and easting values by the resolution unit, and add half
    # the grid size to get the centroid of each tile

    # Sedona has (X, Y) = (Easting, Northing) for EPSG 3035
    sdf = sdf.withColumn(
        geometry_col,
        STC.ST_Point(
            F.regexp_extract(inspire_id_col, r"E(\d+)", 1).cast(LongType()) * resolution_unit + grid_size // 2,
            F.regexp_extract(inspire_id_col, r"N(\d+)E", 1).cast(LongType()) * resolution_unit + grid_size // 2,
        ),
    )

    # Set the CRS of the geometry
    sdf = sdf.withColumn(geometry_col, STF.ST_SetSRID(geometry_col, self.GRID_CRS_EPSG_CODE))

    return sdf

inspire_id_to_grid_tiles(sdf, inspire_id_col=None, geometry_col=None)

Function that takes a DataFrame containing INSPIRE grid ID strings and returns it with polygon geometries of the corresponding grid tiles. It extracts the units and grid size from the first element of the DataFrame and uses it to construct the necessary geometries.

Parameters:

Name Type Description Default
sdf DataFrame

DataFrame containing the INSPIRE grid ID strings.

required
inspire_id_col str

name of the column holding the INSPIRE grid IDs. If None, it is set to ColNames.inspire_id. Defaults to None.

None
geometry_col str

column that will hold the grid tile geometries. If None, it is set to ColNames.geometry. Defaults to None.

None

Returns:

Name Type Description
DataFrame DataFrame

DataFrame with the grid centroid geometries

Source code in multimno/core/grid.py
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
def inspire_id_to_grid_tiles(
    self, sdf: DataFrame, inspire_id_col: str = None, geometry_col: str = None
) -> DataFrame:
    """Function that takes a DataFrame containing INSPIRE grid ID strings and returns it with polygon geometries
    of the corresponding grid tiles. It extracts the units and grid size from the first element of the DataFrame
    and uses it to construct the necessary geometries.

    Args:
        sdf (DataFrame): DataFrame containing the INSPIRE grid ID strings.
        inspire_id_col (str, optional): name of the column holding the INSPIRE grid IDs. If None, it is set to
            `ColNames.inspire_id`. Defaults to None.
        geometry_col (str, optional): column that will hold the grid tile geometries. If None, it is set to
            `ColNames.geometry`. Defaults to None.

    Returns:
        DataFrame: DataFrame with the grid centroid geometries
    """
    if inspire_id_col is None:
        inspire_id_col = ColNames.inspire_id
    if geometry_col is None:
        geometry_col = self.geometry_col

    # First, get the INSPIRE resolution
    resolution_str = sdf.select(F.regexp_extract(F.col(inspire_id_col), r"^(.*?)N", 1).alias("prefix")).first()[
        "prefix"
    ]

    # Parse and validate the INSPIRE resolution. Get the units and the grid size/resolution
    if resolution_str[-2:] == "km":
        try:
            grid_size = int(resolution_str[:-2])
        except ValueError:
            raise ValueError(f"Unexpected INSPIRE grid resolution string `{resolution_str}`")
        resolution_unit = 1000
        sdf = sdf.withColumn(
            "northing", F.regexp_extract(inspire_id_col, r"N(\d+)E", 1).cast(LongType()) * resolution_unit
        ).withColumn("easting", F.regexp_extract(inspire_id_col, r"E(\d+)", 1).cast(LongType()) * resolution_unit)
    elif resolution_str[-1:] == "m":
        try:
            grid_size = int(resolution_str[:-1])
        except ValueError:
            raise ValueError(f"Unexpected INSPIRE grid resolution string `{resolution_str}`")
        resolution_unit = 1
        sdf = sdf.withColumn(
            "northing", F.regexp_extract(inspire_id_col, r"N(\d+)E", 1).cast(LongType()) * grid_size
        ).withColumn("easting", F.regexp_extract(inspire_id_col, r"E(\d+)", 1).cast(LongType()) * grid_size)
    else:
        raise ValueError(f"Unexpected INSPIRE grid resolution string `{resolution_str}`")

    # Sedona has (X, Y) = (Easting, Northing) for EPSG 3035
    sdf = sdf.withColumn(
        geometry_col,
        STC.ST_PolygonFromEnvelope(
            F.col("easting"),  # min_x (min_easting)
            F.col("northing"),  # min_y, (min_northing)
            F.col("easting") + F.lit(resolution_unit * grid_size),  # max_x (max_easting)
            F.col("northing") + F.lit(resolution_unit * grid_size),  # max_y (max_northing)
        ),
    )

    sdf = sdf.drop("northing", "easting")

    # Set the CRS of the geometry
    sdf = sdf.withColumn(geometry_col, STF.ST_SetSRID(geometry_col, self.GRID_CRS_EPSG_CODE))

    return sdf

process_latlon_extent(extent)

Takes an extent expressed in latitude and longitude (EPSG 4326), projects it into EPSG 3035, creates bounding box, snaps to grid, and extends it some extra tiles in each direction.

Parameters:

Name Type Description Default
extent list[float]

The extent in lat/lon to process. Ordering is [lon_min, lat_min, lon_max, lat_max].

required

Returns:

Name Type Description
extent list[float]

Coordinates of the rectangle/bounding box that covers the projected and extended extent. Order is [n_min, e_min, n_max, e_max] (bottom-left and top-right corners)

raster_bounds list[float]

Appropriate raster bounds

Source code in multimno/core/grid.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def process_latlon_extent(self, extent: List[float]) -> Tuple[List[float], List[float]]:
    """Takes an extent expressed in latitude and longitude (EPSG 4326), projects it into EPSG 3035, creates
    bounding box, snaps to grid, and extends it some extra tiles in each direction.

    Args:
        extent (list[float]): The extent in lat/lon to process. Ordering is [lon_min, lat_min, lon_max, lat_max].

    Returns:
        extent (list[float]): Coordinates of the rectangle/bounding box that covers the projected and extended
            extent. Order is [n_min, e_min, n_max, e_max] (bottom-left and top-right corners)
        raster_bounds (list[float]): Appropriate raster bounds
    """
    extent, auxiliar_coords = self._project_latlon_extent(extent)
    extent, raster_bounds = self._project_bounding_box(extent, auxiliar_coords)

    extent = self._snap_extent_to_grid(extent)
    raster_bounds = self._snap_extent_to_grid(raster_bounds)

    extent = self._extend_grid_extent(extent)
    raster_bounds = self._extend_grid_raster_bounds(raster_bounds)
    return extent, raster_bounds