Skip to content

synthetic_diaries

Module that generates a MNO synthetic network.

SyntheticDiaries

Bases: Component

Class that generates the synthetic activity-trip diaries data. It inherits from the Component abstract class.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
class SyntheticDiaries(Component):
    """
    Class that generates the synthetic activity-trip diaries data.
    It inherits from the Component abstract class.
    """

    COMPONENT_ID = "SyntheticDiaries"

    def __init__(self, general_config_path: str, component_config_path: str):
        # keep super class init method:
        super().__init__(general_config_path=general_config_path, component_config_path=component_config_path)

        # and additionally:
        # self.n_partitions = self.config.getint(self.COMPONENT_ID, "n_partitions")

        self.number_of_users = self.config.getint(self.COMPONENT_ID, "number_of_users")

        self.date_format = self.config.get(self.COMPONENT_ID, "date_format")
        self.initial_date = datetime.datetime.strptime(
            self.config.get(self.COMPONENT_ID, "initial_date"), self.date_format
        ).date()
        self.number_of_dates = self.config.getint(self.COMPONENT_ID, "number_of_dates")
        self.date_range = [(self.initial_date + datetime.timedelta(days=d)) for d in range(self.number_of_dates)]

        self.longitude_min = self.config.getfloat(self.COMPONENT_ID, "longitude_min")
        self.longitude_max = self.config.getfloat(self.COMPONENT_ID, "longitude_max")
        self.latitude_min = self.config.getfloat(self.COMPONENT_ID, "latitude_min")
        self.latitude_max = self.config.getfloat(self.COMPONENT_ID, "latitude_max")

        self.home_work_distance_min = self.config.getfloat(self.COMPONENT_ID, "home_work_distance_min")
        self.home_work_distance_max = self.config.getfloat(self.COMPONENT_ID, "home_work_distance_max")
        self.other_distance_min = self.config.getfloat(self.COMPONENT_ID, "other_distance_min")
        self.other_distance_max = self.config.getfloat(self.COMPONENT_ID, "other_distance_max")

        self.home_duration_min = self.config.getfloat(self.COMPONENT_ID, "home_duration_min")
        self.home_duration_max = self.config.getfloat(self.COMPONENT_ID, "home_duration_max")
        self.work_duration_min = self.config.getfloat(self.COMPONENT_ID, "work_duration_min")
        self.work_duration_max = self.config.getfloat(self.COMPONENT_ID, "work_duration_max")
        self.other_duration_min = self.config.getfloat(self.COMPONENT_ID, "other_duration_min")
        self.other_duration_max = self.config.getfloat(self.COMPONENT_ID, "other_duration_max")

        self.displacement_speed = self.config.getfloat(self.COMPONENT_ID, "displacement_speed")

        self.stay_sequence_superset = self.config.get(self.COMPONENT_ID, "stay_sequence_superset").split(",")
        self.stay_sequence_probabilities = [
            float(w)
            for w in self.config.get(self.COMPONENT_ID, "stay_sequence_probabilities").split(
                ","
            )  # TODO: cambiar por stay_sequence
        ]
        assert len(self.stay_sequence_superset) == len(self.stay_sequence_probabilities)

    def initalize_data_objects(self):
        output_synthetic_diaries_data_path = self.config.get(CONFIG_BRONZE_PATHS_KEY, "diaries_data_bronze")
        bronze_synthetic_diaries = BronzeSyntheticDiariesDataObject(self.spark, output_synthetic_diaries_data_path)
        self.output_data_objects = {BronzeSyntheticDiariesDataObject.ID: bronze_synthetic_diaries}

    def read(self):
        pass  # No input datasets are used in this component

    def transform(self):
        spark = self.spark
        activities_df = spark.createDataFrame(self.generate_activities())
        activities_df = calc_hashed_user_id(activities_df)
        columns = {
            field.name: F.col(field.name).cast(field.dataType)
            for field in BronzeSyntheticDiariesDataObject.SCHEMA.fields
        }
        activities_df = activities_df.withColumns(columns)
        self.output_data_objects[BronzeSyntheticDiariesDataObject.ID].df = activities_df

    def haversine(self, lon1: float, lat1: float, lon2: float, lat2: float) -> float:
        """
        Calculate the haversine distance in meters between two points.

        Args:
            lon1 (float): longitude of first point, in decimal degrees.
            lat1 (float): latitude of first point, in decimal degrees.
            lon2 (float): longitude of second point, in decimal degrees.
            lat2 (float): latitude of second point, in decimal degrees.

        Returns:
            float: distance between both points, in meters.
        """
        r = 6_371_000  # Radius of earth in meters.

        # convert decimal degrees to radians
        lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

        # haversine formula
        dlon = lon2 - lon1
        dlat = lat2 - lat1
        a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
        c = 2 * asin(sqrt(a))
        return c * r

    def random_seed_number_generator(
        self, base_seed: int, agent_id: int = None, date: datetime.date = None, i: int = None
    ) -> int:
        """
        Generate random seed integer based on provided arguments.

        Args:
            base_seed (int): base integer for operations.
            agent_id (int, optional): agent identifier. Defaults to None.
            date (datetime.date, optional): date. Defaults to None.
            i (int, optional): position integer. Defaults to None.

        Returns:
            int: generated random seed integer.
        """
        seed = base_seed
        if agent_id is not None:
            seed += int(agent_id) * 100
        if date is not None:
            start_datetime = datetime.datetime.combine(date, datetime.time(0))
            seed += int(start_datetime.timestamp())
        if i is not None:
            seed += i
        return seed

    def calculate_trip_time(self, o_location: Tuple[float, float], d_location: Tuple[float, float]) -> float:
        """
        Calculate trip time given an origin location and a destination
        location, according to the specified trip speed.

        Args:
            o_location (Tuple[float,float]): lon, lat of 1st point,
                in decimal degrees.
            d_location (Tuple[float,float]): lon, lat of 2nd point,
                in decimal degrees.

        Returns:
            float: trip time, in seconds.
        """
        trip_distance = self.haversine(o_location[0], o_location[1], d_location[0], d_location[1])  # m
        trip_speed = self.displacement_speed  # m/s
        trip_time = trip_distance / trip_speed  # s
        return trip_time

    def calculate_trip_final_time(
        self,
        origin_location: Tuple[float, float],
        destin_location: Tuple[float, float],
        origin_timestamp: datetime.datetime,
    ) -> datetime.datetime:
        """
        Calculate end time of a trip given an origin time, an origin location,
        a destination location and a speed.

        Args:
            origin_location (Tuple[float,float]): lon, lat of 1st point,
                in decimal degrees.
            destin_location (Tuple[float,float]): lon, lat of 2nd point,
                in decimal degrees.
            origin_timestamp (datetime.datetime): start time of trip.

        Returns:
            datetime.datetime: end time of trip.
        """

        trip_time = self.calculate_trip_time(origin_location, destin_location)  # s
        return origin_timestamp + datetime.timedelta(seconds=trip_time)

    def generate_stay_location(
        self,
        stay_type: str,
        home_location: Tuple[float, float],
        work_location: Tuple[float, float],
        previous_location: Tuple[float, float],
        user_id: int,
        date: datetime.date,
        i: int,
    ) -> Tuple[float, float]:
        """
        Generate a random activity location within the bounding box limits based
        on the activity type and previous activity locations.

        Args:
            stay_type (str): type of stay ("home", "work" or "other").
            home_location (Tuple[float,float]): coordinates of home location.
            work_location (Tuple[float,float]): coordinates of work location.
            previous_location (Tuple[float,float]): coordinates of previous
                activity location.
            user_id (int): agent identifier, used for random seed generation.
            date (datetime.date): date, used for random seed generation.
            i (int): activity position, used for random seed generation.

        Returns:
            Tuple[float,float]: randomly generated activity location coordinates.
        """
        if stay_type == "home":
            location = home_location
        elif stay_type == "work":
            location = work_location
        else:
            location = self.generate_other_location(user_id, date, i, home_location, previous_location)
        return location

    def create_agent_activities_min_duration(
        self,
        user_id: int,
        agent_stay_type_sequence: List[str],
        home_location: Tuple[float, float],
        work_location: Tuple[float, float],
        date: datetime.date,
        start_of_date: datetime.datetime,
        end_of_date: datetime.datetime,
    ) -> List[Row]:
        """
        Generate activities of the minimum duration following the specified agent
        activity sequence for this agent and date.

        Args:
            user_id (int): agent identifier.
            agent_stay_type_sequence (List[str]): list of generated stay types,
                each represented by a string indicating the stay type.
            home_location (Tuple[float,float]): coordinates of home location.
            work_location (Tuple[float,float]): coordinates of work location.
            date (datetime.date): date for activity sequence generation, used for
                timestamps and random seed generation.
            start_of_date (datetime.datetime): timestamp of current date at 00:00:00.
            end_of_date (datetime.datetime): timestamp of current date at 23:59:59.

        Returns:
            List[Row]: list of generated activities and trips, each represented by a
                spark row object with all its information.
        """
        date_activities = []
        previous_location = None
        for i, stay_type in enumerate(agent_stay_type_sequence):
            # activity location:
            location = self.generate_stay_location(
                stay_type, home_location, work_location, previous_location, user_id, date, i
            )
            # previous move (unless first stay)
            if i != 0:
                # move timestamps:
                trip_initial_timestamp = stay_final_timestamp
                trip_final_timestamp = self.calculate_trip_final_time(
                    previous_location, location, trip_initial_timestamp
                )
                # add move:
                date_activities.append(
                    Row(
                        user_id=user_id,
                        activity_type="move",
                        stay_type="move",
                        longitude=float("nan"),
                        latitude=float("nan"),
                        initial_timestamp=trip_initial_timestamp,
                        final_timestamp=trip_final_timestamp,
                        year=date.year,
                        month=date.month,
                        day=date.day,
                    )
                )
            # stay timestamps:
            stay_initial_timestamp = start_of_date if i == 0 else trip_final_timestamp
            stay_duration = self.generate_min_stay_duration(stay_type)
            stay_final_timestamp = stay_initial_timestamp + datetime.timedelta(hours=stay_duration)

            # add stay:
            date_activities.append(
                Row(
                    user_id=user_id,
                    activity_type="stay",
                    stay_type=stay_type,
                    longitude=location[0],
                    latitude=location[1],
                    initial_timestamp=stay_initial_timestamp,
                    final_timestamp=stay_final_timestamp,
                    year=date.year,
                    month=date.month,
                    day=date.day,
                )
            )

            previous_location = location

        # after the iterations:
        if not date_activities:  # 0 stays
            condition_for_full_home = True
        elif stay_final_timestamp > end_of_date:  # too many stays
            condition_for_full_home = True
        else:
            condition_for_full_home = False

        if condition_for_full_home:  # simple "only home" diary
            return [
                Row(
                    user_id=user_id,
                    activity_type="stay",
                    stay_type="home",
                    longitude=home_location[0],
                    latitude=home_location[1],
                    initial_timestamp=start_of_date,
                    final_timestamp=end_of_date,
                    year=date.year,
                    month=date.month,
                    day=date.day,
                )
            ]
        else:
            return date_activities  # actual generated diary

    def update_spark_row(self, row: Row, column_name: str, new_value: Any) -> Row:
        """
        Return an updated spark row object, changing the value of a column.

        Args:
            row (Row): input spark row.
            column_name (str): name of column to modify.
            new_value (Any): new value to assign.

        Returns:
            Row: modified spark row
        """
        return Row(**{**row.asDict(), **{column_name: new_value}})

    def adjust_activity_times(
        self,
        date_activities: List[Row],
        remaining_time: float,
        user_id: int,
        date: datetime.date,
        start_of_date: datetime.datetime,
        end_of_date: datetime.datetime,
    ):
        """
        Modifies the "date_activities" list, changing the initial and
        final timestamps of both stays and moves probablilistically in order to
        generate stay durations different from the minimum and adjust the
        durations of the activities to the 24h of the day.

        Args:
            date_activities (List[Row]): list of generated activities (stays and
                moves) of the agent for the specified date. Each activity/trip is a
                spark row object.
            user_id (int): agent identifier.
            date (datetime.date): date for activity sequence generation, used for
                timestamps and random seed generation.
            start_of_date (datetime.datetime): timestamp of current date at 00:00:00.
            end_of_date (datetime.datetime): timestamp of current date at 23:59:59.
        """
        current_timestamp = start_of_date
        for i, activity_row in enumerate(date_activities):
            if activity_row.activity_type == "stay":  # stay:
                stay_type = activity_row.stay_type
                old_stay_duration = (
                    activity_row.final_timestamp - activity_row.initial_timestamp
                ).total_seconds() / 3600.0
                new_initial_timestamp = current_timestamp
                if i == len(date_activities) - 1:
                    new_final_timestamp = end_of_date
                    remaining_time = 0.0
                else:
                    new_stay_duration = self.generate_stay_duration(user_id, date, i, stay_type, remaining_time)
                    new_duration_td = datetime.timedelta(seconds=new_stay_duration * 3600.0)
                    new_final_timestamp = new_initial_timestamp + new_duration_td
                    remaining_time -= new_stay_duration - old_stay_duration
            else:  # move:
                old_move_duration = activity_row.final_timestamp - activity_row.initial_timestamp
                new_initial_timestamp = current_timestamp
                new_final_timestamp = new_initial_timestamp + old_move_duration

            # common for all activities (stays and moves):
            activity_row = self.update_spark_row(activity_row, "initial_timestamp", new_initial_timestamp)
            activity_row = self.update_spark_row(activity_row, "final_timestamp", new_final_timestamp)
            date_activities[i] = activity_row
            current_timestamp = new_final_timestamp

    def add_agent_date_activities(
        self,
        activities: List[Row],
        user_id: int,
        agent_stay_type_sequence: List[str],
        home_location: Tuple[float, float],
        work_location: Tuple[float, float],
        date: datetime.date,
        start_of_date: datetime.datetime,
        end_of_date: datetime.datetime,
    ):
        """
        For a specific date and user, generate a sequence of activities probabilistically
        according to the specified activity superset and the activity probabilities.
        Firstly, assign to each of these activities the minimum duration considered for
        that activity type. Trip times are based on Pythagorean distance and a specified
        average speed.
        If the sum of all minimum duration of the activities and the duration of the trips
        is higher than the 24h of the day, then assign just one "home" activity to the
        agent from 00:00:00 to 23:59:59.
        Else, there will be a remaining time. E.g., the diary of an agent, after adding
        up all trip durations and minimum activity durations may end at 20:34:57. There is
        a remaining time to complete the full diary (23:59:59 - 20:34:57).
        Adjust activity times probabilistically according to the maximum activity duration
        and this remaining time, making the diary end at exactly 23:59:59.

        Args:
            activities (List[Row]): list of generated activities (stays and moves) for
                the agent for all of the specified dates. Each activity is a spark
                row object.
            user_id (int): agent identifier.
            agent_stay_type_sequence (List[str]): list of generated stay types,
                each represented by a string indicating the stay type.
            home_location (Tuple[float,float]): coordinates of home location.
            work_location (Tuple[float,float]): coordinates of work location.
            date (datetime.date): date for activity sequence generation, used for
                timestamps and random seed generation.
            start_of_date (datetime.datetime): timestamp of current date at 00:00:00.
            end_of_date (datetime.datetime): timestamp of current date at 23:59:59.
        """
        date_activities = self.create_agent_activities_min_duration(
            user_id, agent_stay_type_sequence, home_location, work_location, date, start_of_date, end_of_date
        )
        remaining_time = (end_of_date - date_activities[-1].final_timestamp).total_seconds() / 3600.0

        if remaining_time != 0:
            self.adjust_activity_times(
                date_activities,
                remaining_time,
                user_id,
                date,
                start_of_date,
                end_of_date,
            )
        activities += date_activities

    def add_date_activities(self, date: datetime.date, activities: List[Row]):
        """
        Generate activity (stays and moves) rows for a specific date according to
        parameters.

        Args:
            date (datetime.date): date for activity sequence generation, used for
                timestamps and random seed generation.
            activities (List[Row]): list of generated activities (stays and moves) for
                the agent for all of the specified dates. Each activity is a spark
                row object.
        """
        # Start of date, end of date: datetime object generation
        start_of_date = datetime.datetime.combine(date, datetime.time(0, 0, 0))
        end_of_date = datetime.datetime.combine(date, datetime.time(23, 59, 59))
        for user_id in range(self.number_of_users):
            # generate user information:
            agent_stay_type_sequence = self.generate_stay_type_sequence(user_id, date)
            home_location = self.generate_home_location(user_id)
            work_location = self.generate_work_location(user_id, home_location)
            self.add_agent_date_activities(
                activities,
                user_id,
                agent_stay_type_sequence,
                home_location,
                work_location,
                date,
                start_of_date,
                end_of_date,
            )

    def generate_activities(self) -> List[Row]:
        """
        Generate activity and trip rows according to parameters.

        Returns:
            List[Row]: list of generated activities and trips for the agent for all
                of the specified dates. Each activity/trip is a spark row object.
        """
        activities = []
        for date in self.date_range:
            self.add_date_activities(date, activities)
        return activities

    def generate_lonlat_at_distance(self, lon1: float, lat1: float, d: float, seed: int) -> Tuple[float, float]:
        """
        Given a point (lon, lat) and a distance, in meters, calculate a new random
        point that is exactly at the specified distance of the provided lon, lat.

        Args:
            lon1 (float): longitude of point, specified in decimal degrees.
            lat1 (float): latitude of point, specified in decimal degrees.
            d (float): distance, in meters.
            seed (int): random seed integer.

        Returns:
            Tuple[float, float]: coordinates of randomly generated point.
        """
        r = 6_371_000  # Radius of earth in meters.

        d_x = Random(seed).uniform(0, d)
        d_y = sqrt(d**2 - d_x**2)

        # firstly, convert lat to radians for later
        lat1_radians = lat1 * pi / 180.0

        # how many meters correspond to one degree of latitude?
        deg_to_meters = r * pi / 180  # aprox. 111111 meters
        # thus, the northwards displacement, in degrees of latitude is:
        north_delta = d_y / deg_to_meters

        # but one degree of longitude does not always correspond to the
        # same distance... depends on the latitude at where you are!
        parallel_radius = abs(r * cos(lat1_radians))
        deg_to_meters = parallel_radius * pi / 180  # variable
        # thus, the eastwards displacement, in degrees of longitude is:
        east_delta = d_x / deg_to_meters

        final_lon = lon1 + east_delta * Random(seed).choice([-1, 1])
        final_lat = lat1 + north_delta * Random(seed).choice([-1, 1])

        return (final_lon, final_lat)

    def generate_home_location(self, agent_id: int) -> Tuple[float, float]:
        """
        Generate random home location based on bounding box limits.

        Args:
            agent_id (int): identifier of agent, used for random seed generation.

        Returns:
            Tuple[float,float]: coordinates of generated home location.
        """
        seed_lon = self.random_seed_number_generator(1, agent_id)
        seed_lat = self.random_seed_number_generator(2, agent_id)
        hlon = Random(seed_lon).uniform(self.longitude_min, self.longitude_max)
        hlat = Random(seed_lat).uniform(self.latitude_min, self.latitude_max)
        return (hlon, hlat)

    def generate_work_location(
        self, agent_id: int, home_location: Tuple[float, float], seed: int = 4
    ) -> Tuple[float, float]:
        """
        Generate random work location based on home location and maximum distance to
        home. If the work location falls outside of bounding box limits, try again.

        Args:
            agent_id (int): identifier of agent, used for random seed generation.
            home_location (Tuple[float,float]): coordinates of home location.
            seed (int, optional): random seed integer. Defaults to 4.

        Returns:
            Tuple[float,float]: coordinates of generated work location.
        """
        seed_distance = self.random_seed_number_generator(seed - 1, agent_id)
        random_distance = Random(seed_distance).uniform(self.home_work_distance_min, self.home_work_distance_max)
        hlon, hlat = home_location
        seed_coords = self.random_seed_number_generator(seed, agent_id)
        wlon, wlat = self.generate_lonlat_at_distance(hlon, hlat, random_distance, seed_coords)

        if not (self.longitude_min < wlon < self.longitude_max) or not (
            self.latitude_min < wlat < self.latitude_max
        ):  # outside limits
            seed += 1
            wlon, wlat = self.generate_work_location(agent_id, home_location, seed=seed)

        return (wlon, wlat)

    def generate_other_location(
        self,
        agent_id: int,
        date: datetime.date,
        activity_number: int,
        home_location: Tuple[float, float],
        previous_location: Tuple[float, float],
        seed: int = 6,
    ) -> Tuple[float, float]:
        """
        Generate other activity location based on previous location and maximum distance
        to previous location. If there is no previous location (this is the first
        activity of the day), then the home location is considered as previous location.
        If the location falls outside of bounding box limits, try again.

        Args:
            agent_id (int): identifier of agent, used for random seed generation.
            date (datetime.date): date, used for random seed generation.
            activity_number (int): act position, used for random seed generation.
            home_location (Tuple[float,float]): coordinates of home location.
            previous_location (Tuple[float,float]): coordinates of previous location.
            seed (int, optional): random seed integer. Defaults to 6.

        Returns:
            Tuple[float,float]: coordinates of generated location.
        """
        seed_distance = self.random_seed_number_generator(seed - 1, agent_id)
        random_distance = Random(seed_distance).uniform(self.other_distance_min, self.other_distance_max)
        if previous_location is None:
            plon, plat = home_location
        else:
            plon, plat = previous_location

        seed_coords = self.random_seed_number_generator(seed, agent_id)
        olon, olat = self.generate_lonlat_at_distance(plon, plat, random_distance, seed_coords)
        if not (self.longitude_min < olon < self.longitude_max) or not (
            self.latitude_min < olat < self.latitude_max
        ):  # outside limits
            seed += 1
            olon, olat = self.generate_other_location(
                agent_id, date, activity_number, home_location, previous_location, seed=seed
            )

        return (olon, olat)

    def generate_stay_duration(
        self, agent_id: int, date: datetime.date, i: int, stay_type: str, remaining_time: float
    ) -> float:
        """
        Generate stay duration probabilistically based on activity type
        abd remaining time.

        Args:
            agent_id (int): identifier of agent, used for random seed generation.
            date (datetime.date): date, used for random seed generation.
            i (int): activity position, used for random seed generation.
            stay_type (str): type of stay. Shall be "home", "work" or "other".
            remaining_time (float): same units as durations.

        Returns:
            float: generated activity duration.
        """
        if stay_type == "home":
            min_duration = self.home_duration_min
            max_duration = self.home_duration_max
        elif stay_type == "work":
            min_duration = self.work_duration_min
            max_duration = self.work_duration_max
        elif stay_type == "other":
            min_duration = self.other_duration_min
            max_duration = self.other_duration_max
        else:
            raise ValueError
        seed = self.random_seed_number_generator(7, agent_id, date, i)
        max_value = min(max_duration, min_duration + remaining_time)
        return Random(seed).uniform(min_duration, max_value)

    def generate_min_stay_duration(self, stay_type: str) -> float:
        """
        Generate minimum stay duration based on stay type specifications.

        Args:
            stay_type (str): type of stay. Shall be "home", "work" or "other".

        Returns:
            float: minimum stay duration.
        """
        if stay_type == "home":
            return self.home_duration_min
        elif stay_type == "work":
            return self.work_duration_min
        elif stay_type == "other":
            return self.other_duration_min
        else:
            raise ValueError

    def remove_consecutive_stay_types(self, stay_sequence_list: List[str], stay_types_to_group: Set[str]) -> List[str]:
        """
        Generate new list replacing consecutive stays of the same type by
        a unique stay as long as the stay type is contained in the
        "stay_types_to_group" list.

        Args:
            stay_sequence_list (List[str]): input stay type list.
            stay_types_to_group (Set[str]): stay types to group.

        Returns:
            List[str]: output stay sequence list.
        """
        new_stay_sequence_list = []
        previous_stay = None
        for stay in stay_sequence_list:
            if stay == previous_stay and stay in stay_types_to_group:
                pass
            else:
                new_stay_sequence_list.append(stay)
            previous_stay = stay
        return new_stay_sequence_list

    def generate_stay_type_sequence(self, agent_id: int, date: datetime.date) -> List[str]:
        """
        Generate the sequence of stay types for an agent for a specific date
        probabilistically based on the superset sequence and specified
        probabilities.
        Replace 'home'-'home' and 'work'-'work' sequences by just 'home' or
        'work'.

        Args:
            agent_id (int): identifier of agent, used for random seed generation.
            date (datetime.date): date for activity sequence generation, used for
                random seed generation.

        Returns:
            List[str]: list of generated stay types, each represented by a string
                indicating the stay type (e.g. "home", "work", "other").
        """
        stay_type_sequence = []
        for i, stay_type in enumerate(self.stay_sequence_superset):
            stay_weight = self.stay_sequence_probabilities[i]
            seed = self.random_seed_number_generator(0, agent_id, date, i)
            if Random(seed).random() < stay_weight:
                stay_type_sequence.append(stay_type)
        stay_type_sequence = self.remove_consecutive_stay_types(stay_type_sequence, {"home", "work"})
        return stay_type_sequence

add_agent_date_activities(activities, user_id, agent_stay_type_sequence, home_location, work_location, date, start_of_date, end_of_date)

For a specific date and user, generate a sequence of activities probabilistically according to the specified activity superset and the activity probabilities. Firstly, assign to each of these activities the minimum duration considered for that activity type. Trip times are based on Pythagorean distance and a specified average speed. If the sum of all minimum duration of the activities and the duration of the trips is higher than the 24h of the day, then assign just one "home" activity to the agent from 00:00:00 to 23:59:59. Else, there will be a remaining time. E.g., the diary of an agent, after adding up all trip durations and minimum activity durations may end at 20:34:57. There is a remaining time to complete the full diary (23:59:59 - 20:34:57). Adjust activity times probabilistically according to the maximum activity duration and this remaining time, making the diary end at exactly 23:59:59.

Parameters:

Name Type Description Default
activities List[Row]

list of generated activities (stays and moves) for the agent for all of the specified dates. Each activity is a spark row object.

required
user_id int

agent identifier.

required
agent_stay_type_sequence List[str]

list of generated stay types, each represented by a string indicating the stay type.

required
home_location Tuple[float, float]

coordinates of home location.

required
work_location Tuple[float, float]

coordinates of work location.

required
date date

date for activity sequence generation, used for timestamps and random seed generation.

required
start_of_date datetime

timestamp of current date at 00:00:00.

required
end_of_date datetime

timestamp of current date at 23:59:59.

required
Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
def add_agent_date_activities(
    self,
    activities: List[Row],
    user_id: int,
    agent_stay_type_sequence: List[str],
    home_location: Tuple[float, float],
    work_location: Tuple[float, float],
    date: datetime.date,
    start_of_date: datetime.datetime,
    end_of_date: datetime.datetime,
):
    """
    For a specific date and user, generate a sequence of activities probabilistically
    according to the specified activity superset and the activity probabilities.
    Firstly, assign to each of these activities the minimum duration considered for
    that activity type. Trip times are based on Pythagorean distance and a specified
    average speed.
    If the sum of all minimum duration of the activities and the duration of the trips
    is higher than the 24h of the day, then assign just one "home" activity to the
    agent from 00:00:00 to 23:59:59.
    Else, there will be a remaining time. E.g., the diary of an agent, after adding
    up all trip durations and minimum activity durations may end at 20:34:57. There is
    a remaining time to complete the full diary (23:59:59 - 20:34:57).
    Adjust activity times probabilistically according to the maximum activity duration
    and this remaining time, making the diary end at exactly 23:59:59.

    Args:
        activities (List[Row]): list of generated activities (stays and moves) for
            the agent for all of the specified dates. Each activity is a spark
            row object.
        user_id (int): agent identifier.
        agent_stay_type_sequence (List[str]): list of generated stay types,
            each represented by a string indicating the stay type.
        home_location (Tuple[float,float]): coordinates of home location.
        work_location (Tuple[float,float]): coordinates of work location.
        date (datetime.date): date for activity sequence generation, used for
            timestamps and random seed generation.
        start_of_date (datetime.datetime): timestamp of current date at 00:00:00.
        end_of_date (datetime.datetime): timestamp of current date at 23:59:59.
    """
    date_activities = self.create_agent_activities_min_duration(
        user_id, agent_stay_type_sequence, home_location, work_location, date, start_of_date, end_of_date
    )
    remaining_time = (end_of_date - date_activities[-1].final_timestamp).total_seconds() / 3600.0

    if remaining_time != 0:
        self.adjust_activity_times(
            date_activities,
            remaining_time,
            user_id,
            date,
            start_of_date,
            end_of_date,
        )
    activities += date_activities

add_date_activities(date, activities)

Generate activity (stays and moves) rows for a specific date according to parameters.

Parameters:

Name Type Description Default
date date

date for activity sequence generation, used for timestamps and random seed generation.

required
activities List[Row]

list of generated activities (stays and moves) for the agent for all of the specified dates. Each activity is a spark row object.

required
Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
def add_date_activities(self, date: datetime.date, activities: List[Row]):
    """
    Generate activity (stays and moves) rows for a specific date according to
    parameters.

    Args:
        date (datetime.date): date for activity sequence generation, used for
            timestamps and random seed generation.
        activities (List[Row]): list of generated activities (stays and moves) for
            the agent for all of the specified dates. Each activity is a spark
            row object.
    """
    # Start of date, end of date: datetime object generation
    start_of_date = datetime.datetime.combine(date, datetime.time(0, 0, 0))
    end_of_date = datetime.datetime.combine(date, datetime.time(23, 59, 59))
    for user_id in range(self.number_of_users):
        # generate user information:
        agent_stay_type_sequence = self.generate_stay_type_sequence(user_id, date)
        home_location = self.generate_home_location(user_id)
        work_location = self.generate_work_location(user_id, home_location)
        self.add_agent_date_activities(
            activities,
            user_id,
            agent_stay_type_sequence,
            home_location,
            work_location,
            date,
            start_of_date,
            end_of_date,
        )

adjust_activity_times(date_activities, remaining_time, user_id, date, start_of_date, end_of_date)

Modifies the "date_activities" list, changing the initial and final timestamps of both stays and moves probablilistically in order to generate stay durations different from the minimum and adjust the durations of the activities to the 24h of the day.

Parameters:

Name Type Description Default
date_activities List[Row]

list of generated activities (stays and moves) of the agent for the specified date. Each activity/trip is a spark row object.

required
user_id int

agent identifier.

required
date date

date for activity sequence generation, used for timestamps and random seed generation.

required
start_of_date datetime

timestamp of current date at 00:00:00.

required
end_of_date datetime

timestamp of current date at 23:59:59.

required
Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
def adjust_activity_times(
    self,
    date_activities: List[Row],
    remaining_time: float,
    user_id: int,
    date: datetime.date,
    start_of_date: datetime.datetime,
    end_of_date: datetime.datetime,
):
    """
    Modifies the "date_activities" list, changing the initial and
    final timestamps of both stays and moves probablilistically in order to
    generate stay durations different from the minimum and adjust the
    durations of the activities to the 24h of the day.

    Args:
        date_activities (List[Row]): list of generated activities (stays and
            moves) of the agent for the specified date. Each activity/trip is a
            spark row object.
        user_id (int): agent identifier.
        date (datetime.date): date for activity sequence generation, used for
            timestamps and random seed generation.
        start_of_date (datetime.datetime): timestamp of current date at 00:00:00.
        end_of_date (datetime.datetime): timestamp of current date at 23:59:59.
    """
    current_timestamp = start_of_date
    for i, activity_row in enumerate(date_activities):
        if activity_row.activity_type == "stay":  # stay:
            stay_type = activity_row.stay_type
            old_stay_duration = (
                activity_row.final_timestamp - activity_row.initial_timestamp
            ).total_seconds() / 3600.0
            new_initial_timestamp = current_timestamp
            if i == len(date_activities) - 1:
                new_final_timestamp = end_of_date
                remaining_time = 0.0
            else:
                new_stay_duration = self.generate_stay_duration(user_id, date, i, stay_type, remaining_time)
                new_duration_td = datetime.timedelta(seconds=new_stay_duration * 3600.0)
                new_final_timestamp = new_initial_timestamp + new_duration_td
                remaining_time -= new_stay_duration - old_stay_duration
        else:  # move:
            old_move_duration = activity_row.final_timestamp - activity_row.initial_timestamp
            new_initial_timestamp = current_timestamp
            new_final_timestamp = new_initial_timestamp + old_move_duration

        # common for all activities (stays and moves):
        activity_row = self.update_spark_row(activity_row, "initial_timestamp", new_initial_timestamp)
        activity_row = self.update_spark_row(activity_row, "final_timestamp", new_final_timestamp)
        date_activities[i] = activity_row
        current_timestamp = new_final_timestamp

calculate_trip_final_time(origin_location, destin_location, origin_timestamp)

Calculate end time of a trip given an origin time, an origin location, a destination location and a speed.

Parameters:

Name Type Description Default
origin_location Tuple[float, float]

lon, lat of 1st point, in decimal degrees.

required
destin_location Tuple[float, float]

lon, lat of 2nd point, in decimal degrees.

required
origin_timestamp datetime

start time of trip.

required

Returns:

Type Description
datetime

datetime.datetime: end time of trip.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
def calculate_trip_final_time(
    self,
    origin_location: Tuple[float, float],
    destin_location: Tuple[float, float],
    origin_timestamp: datetime.datetime,
) -> datetime.datetime:
    """
    Calculate end time of a trip given an origin time, an origin location,
    a destination location and a speed.

    Args:
        origin_location (Tuple[float,float]): lon, lat of 1st point,
            in decimal degrees.
        destin_location (Tuple[float,float]): lon, lat of 2nd point,
            in decimal degrees.
        origin_timestamp (datetime.datetime): start time of trip.

    Returns:
        datetime.datetime: end time of trip.
    """

    trip_time = self.calculate_trip_time(origin_location, destin_location)  # s
    return origin_timestamp + datetime.timedelta(seconds=trip_time)

calculate_trip_time(o_location, d_location)

Calculate trip time given an origin location and a destination location, according to the specified trip speed.

Parameters:

Name Type Description Default
o_location Tuple[float, float]

lon, lat of 1st point, in decimal degrees.

required
d_location Tuple[float, float]

lon, lat of 2nd point, in decimal degrees.

required

Returns:

Name Type Description
float float

trip time, in seconds.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
def calculate_trip_time(self, o_location: Tuple[float, float], d_location: Tuple[float, float]) -> float:
    """
    Calculate trip time given an origin location and a destination
    location, according to the specified trip speed.

    Args:
        o_location (Tuple[float,float]): lon, lat of 1st point,
            in decimal degrees.
        d_location (Tuple[float,float]): lon, lat of 2nd point,
            in decimal degrees.

    Returns:
        float: trip time, in seconds.
    """
    trip_distance = self.haversine(o_location[0], o_location[1], d_location[0], d_location[1])  # m
    trip_speed = self.displacement_speed  # m/s
    trip_time = trip_distance / trip_speed  # s
    return trip_time

create_agent_activities_min_duration(user_id, agent_stay_type_sequence, home_location, work_location, date, start_of_date, end_of_date)

Generate activities of the minimum duration following the specified agent activity sequence for this agent and date.

Parameters:

Name Type Description Default
user_id int

agent identifier.

required
agent_stay_type_sequence List[str]

list of generated stay types, each represented by a string indicating the stay type.

required
home_location Tuple[float, float]

coordinates of home location.

required
work_location Tuple[float, float]

coordinates of work location.

required
date date

date for activity sequence generation, used for timestamps and random seed generation.

required
start_of_date datetime

timestamp of current date at 00:00:00.

required
end_of_date datetime

timestamp of current date at 23:59:59.

required

Returns:

Type Description
List[Row]

List[Row]: list of generated activities and trips, each represented by a spark row object with all its information.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
def create_agent_activities_min_duration(
    self,
    user_id: int,
    agent_stay_type_sequence: List[str],
    home_location: Tuple[float, float],
    work_location: Tuple[float, float],
    date: datetime.date,
    start_of_date: datetime.datetime,
    end_of_date: datetime.datetime,
) -> List[Row]:
    """
    Generate activities of the minimum duration following the specified agent
    activity sequence for this agent and date.

    Args:
        user_id (int): agent identifier.
        agent_stay_type_sequence (List[str]): list of generated stay types,
            each represented by a string indicating the stay type.
        home_location (Tuple[float,float]): coordinates of home location.
        work_location (Tuple[float,float]): coordinates of work location.
        date (datetime.date): date for activity sequence generation, used for
            timestamps and random seed generation.
        start_of_date (datetime.datetime): timestamp of current date at 00:00:00.
        end_of_date (datetime.datetime): timestamp of current date at 23:59:59.

    Returns:
        List[Row]: list of generated activities and trips, each represented by a
            spark row object with all its information.
    """
    date_activities = []
    previous_location = None
    for i, stay_type in enumerate(agent_stay_type_sequence):
        # activity location:
        location = self.generate_stay_location(
            stay_type, home_location, work_location, previous_location, user_id, date, i
        )
        # previous move (unless first stay)
        if i != 0:
            # move timestamps:
            trip_initial_timestamp = stay_final_timestamp
            trip_final_timestamp = self.calculate_trip_final_time(
                previous_location, location, trip_initial_timestamp
            )
            # add move:
            date_activities.append(
                Row(
                    user_id=user_id,
                    activity_type="move",
                    stay_type="move",
                    longitude=float("nan"),
                    latitude=float("nan"),
                    initial_timestamp=trip_initial_timestamp,
                    final_timestamp=trip_final_timestamp,
                    year=date.year,
                    month=date.month,
                    day=date.day,
                )
            )
        # stay timestamps:
        stay_initial_timestamp = start_of_date if i == 0 else trip_final_timestamp
        stay_duration = self.generate_min_stay_duration(stay_type)
        stay_final_timestamp = stay_initial_timestamp + datetime.timedelta(hours=stay_duration)

        # add stay:
        date_activities.append(
            Row(
                user_id=user_id,
                activity_type="stay",
                stay_type=stay_type,
                longitude=location[0],
                latitude=location[1],
                initial_timestamp=stay_initial_timestamp,
                final_timestamp=stay_final_timestamp,
                year=date.year,
                month=date.month,
                day=date.day,
            )
        )

        previous_location = location

    # after the iterations:
    if not date_activities:  # 0 stays
        condition_for_full_home = True
    elif stay_final_timestamp > end_of_date:  # too many stays
        condition_for_full_home = True
    else:
        condition_for_full_home = False

    if condition_for_full_home:  # simple "only home" diary
        return [
            Row(
                user_id=user_id,
                activity_type="stay",
                stay_type="home",
                longitude=home_location[0],
                latitude=home_location[1],
                initial_timestamp=start_of_date,
                final_timestamp=end_of_date,
                year=date.year,
                month=date.month,
                day=date.day,
            )
        ]
    else:
        return date_activities  # actual generated diary

generate_activities()

Generate activity and trip rows according to parameters.

Returns:

Type Description
List[Row]

List[Row]: list of generated activities and trips for the agent for all of the specified dates. Each activity/trip is a spark row object.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
478
479
480
481
482
483
484
485
486
487
488
489
def generate_activities(self) -> List[Row]:
    """
    Generate activity and trip rows according to parameters.

    Returns:
        List[Row]: list of generated activities and trips for the agent for all
            of the specified dates. Each activity/trip is a spark row object.
    """
    activities = []
    for date in self.date_range:
        self.add_date_activities(date, activities)
    return activities

generate_home_location(agent_id)

Generate random home location based on bounding box limits.

Parameters:

Name Type Description Default
agent_id int

identifier of agent, used for random seed generation.

required

Returns:

Type Description
Tuple[float, float]

Tuple[float,float]: coordinates of generated home location.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
def generate_home_location(self, agent_id: int) -> Tuple[float, float]:
    """
    Generate random home location based on bounding box limits.

    Args:
        agent_id (int): identifier of agent, used for random seed generation.

    Returns:
        Tuple[float,float]: coordinates of generated home location.
    """
    seed_lon = self.random_seed_number_generator(1, agent_id)
    seed_lat = self.random_seed_number_generator(2, agent_id)
    hlon = Random(seed_lon).uniform(self.longitude_min, self.longitude_max)
    hlat = Random(seed_lat).uniform(self.latitude_min, self.latitude_max)
    return (hlon, hlat)

generate_lonlat_at_distance(lon1, lat1, d, seed)

Given a point (lon, lat) and a distance, in meters, calculate a new random point that is exactly at the specified distance of the provided lon, lat.

Parameters:

Name Type Description Default
lon1 float

longitude of point, specified in decimal degrees.

required
lat1 float

latitude of point, specified in decimal degrees.

required
d float

distance, in meters.

required
seed int

random seed integer.

required

Returns:

Type Description
Tuple[float, float]

Tuple[float, float]: coordinates of randomly generated point.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
def generate_lonlat_at_distance(self, lon1: float, lat1: float, d: float, seed: int) -> Tuple[float, float]:
    """
    Given a point (lon, lat) and a distance, in meters, calculate a new random
    point that is exactly at the specified distance of the provided lon, lat.

    Args:
        lon1 (float): longitude of point, specified in decimal degrees.
        lat1 (float): latitude of point, specified in decimal degrees.
        d (float): distance, in meters.
        seed (int): random seed integer.

    Returns:
        Tuple[float, float]: coordinates of randomly generated point.
    """
    r = 6_371_000  # Radius of earth in meters.

    d_x = Random(seed).uniform(0, d)
    d_y = sqrt(d**2 - d_x**2)

    # firstly, convert lat to radians for later
    lat1_radians = lat1 * pi / 180.0

    # how many meters correspond to one degree of latitude?
    deg_to_meters = r * pi / 180  # aprox. 111111 meters
    # thus, the northwards displacement, in degrees of latitude is:
    north_delta = d_y / deg_to_meters

    # but one degree of longitude does not always correspond to the
    # same distance... depends on the latitude at where you are!
    parallel_radius = abs(r * cos(lat1_radians))
    deg_to_meters = parallel_radius * pi / 180  # variable
    # thus, the eastwards displacement, in degrees of longitude is:
    east_delta = d_x / deg_to_meters

    final_lon = lon1 + east_delta * Random(seed).choice([-1, 1])
    final_lat = lat1 + north_delta * Random(seed).choice([-1, 1])

    return (final_lon, final_lat)

generate_min_stay_duration(stay_type)

Generate minimum stay duration based on stay type specifications.

Parameters:

Name Type Description Default
stay_type str

type of stay. Shall be "home", "work" or "other".

required

Returns:

Name Type Description
float float

minimum stay duration.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
def generate_min_stay_duration(self, stay_type: str) -> float:
    """
    Generate minimum stay duration based on stay type specifications.

    Args:
        stay_type (str): type of stay. Shall be "home", "work" or "other".

    Returns:
        float: minimum stay duration.
    """
    if stay_type == "home":
        return self.home_duration_min
    elif stay_type == "work":
        return self.work_duration_min
    elif stay_type == "other":
        return self.other_duration_min
    else:
        raise ValueError

generate_other_location(agent_id, date, activity_number, home_location, previous_location, seed=6)

Generate other activity location based on previous location and maximum distance to previous location. If there is no previous location (this is the first activity of the day), then the home location is considered as previous location. If the location falls outside of bounding box limits, try again.

Parameters:

Name Type Description Default
agent_id int

identifier of agent, used for random seed generation.

required
date date

date, used for random seed generation.

required
activity_number int

act position, used for random seed generation.

required
home_location Tuple[float, float]

coordinates of home location.

required
previous_location Tuple[float, float]

coordinates of previous location.

required
seed int

random seed integer. Defaults to 6.

6

Returns:

Type Description
Tuple[float, float]

Tuple[float,float]: coordinates of generated location.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
def generate_other_location(
    self,
    agent_id: int,
    date: datetime.date,
    activity_number: int,
    home_location: Tuple[float, float],
    previous_location: Tuple[float, float],
    seed: int = 6,
) -> Tuple[float, float]:
    """
    Generate other activity location based on previous location and maximum distance
    to previous location. If there is no previous location (this is the first
    activity of the day), then the home location is considered as previous location.
    If the location falls outside of bounding box limits, try again.

    Args:
        agent_id (int): identifier of agent, used for random seed generation.
        date (datetime.date): date, used for random seed generation.
        activity_number (int): act position, used for random seed generation.
        home_location (Tuple[float,float]): coordinates of home location.
        previous_location (Tuple[float,float]): coordinates of previous location.
        seed (int, optional): random seed integer. Defaults to 6.

    Returns:
        Tuple[float,float]: coordinates of generated location.
    """
    seed_distance = self.random_seed_number_generator(seed - 1, agent_id)
    random_distance = Random(seed_distance).uniform(self.other_distance_min, self.other_distance_max)
    if previous_location is None:
        plon, plat = home_location
    else:
        plon, plat = previous_location

    seed_coords = self.random_seed_number_generator(seed, agent_id)
    olon, olat = self.generate_lonlat_at_distance(plon, plat, random_distance, seed_coords)
    if not (self.longitude_min < olon < self.longitude_max) or not (
        self.latitude_min < olat < self.latitude_max
    ):  # outside limits
        seed += 1
        olon, olat = self.generate_other_location(
            agent_id, date, activity_number, home_location, previous_location, seed=seed
        )

    return (olon, olat)

generate_stay_duration(agent_id, date, i, stay_type, remaining_time)

Generate stay duration probabilistically based on activity type abd remaining time.

Parameters:

Name Type Description Default
agent_id int

identifier of agent, used for random seed generation.

required
date date

date, used for random seed generation.

required
i int

activity position, used for random seed generation.

required
stay_type str

type of stay. Shall be "home", "work" or "other".

required
remaining_time float

same units as durations.

required

Returns:

Name Type Description
float float

generated activity duration.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
def generate_stay_duration(
    self, agent_id: int, date: datetime.date, i: int, stay_type: str, remaining_time: float
) -> float:
    """
    Generate stay duration probabilistically based on activity type
    abd remaining time.

    Args:
        agent_id (int): identifier of agent, used for random seed generation.
        date (datetime.date): date, used for random seed generation.
        i (int): activity position, used for random seed generation.
        stay_type (str): type of stay. Shall be "home", "work" or "other".
        remaining_time (float): same units as durations.

    Returns:
        float: generated activity duration.
    """
    if stay_type == "home":
        min_duration = self.home_duration_min
        max_duration = self.home_duration_max
    elif stay_type == "work":
        min_duration = self.work_duration_min
        max_duration = self.work_duration_max
    elif stay_type == "other":
        min_duration = self.other_duration_min
        max_duration = self.other_duration_max
    else:
        raise ValueError
    seed = self.random_seed_number_generator(7, agent_id, date, i)
    max_value = min(max_duration, min_duration + remaining_time)
    return Random(seed).uniform(min_duration, max_value)

generate_stay_location(stay_type, home_location, work_location, previous_location, user_id, date, i)

Generate a random activity location within the bounding box limits based on the activity type and previous activity locations.

Parameters:

Name Type Description Default
stay_type str

type of stay ("home", "work" or "other").

required
home_location Tuple[float, float]

coordinates of home location.

required
work_location Tuple[float, float]

coordinates of work location.

required
previous_location Tuple[float, float]

coordinates of previous activity location.

required
user_id int

agent identifier, used for random seed generation.

required
date date

date, used for random seed generation.

required
i int

activity position, used for random seed generation.

required

Returns:

Type Description
Tuple[float, float]

Tuple[float,float]: randomly generated activity location coordinates.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
def generate_stay_location(
    self,
    stay_type: str,
    home_location: Tuple[float, float],
    work_location: Tuple[float, float],
    previous_location: Tuple[float, float],
    user_id: int,
    date: datetime.date,
    i: int,
) -> Tuple[float, float]:
    """
    Generate a random activity location within the bounding box limits based
    on the activity type and previous activity locations.

    Args:
        stay_type (str): type of stay ("home", "work" or "other").
        home_location (Tuple[float,float]): coordinates of home location.
        work_location (Tuple[float,float]): coordinates of work location.
        previous_location (Tuple[float,float]): coordinates of previous
            activity location.
        user_id (int): agent identifier, used for random seed generation.
        date (datetime.date): date, used for random seed generation.
        i (int): activity position, used for random seed generation.

    Returns:
        Tuple[float,float]: randomly generated activity location coordinates.
    """
    if stay_type == "home":
        location = home_location
    elif stay_type == "work":
        location = work_location
    else:
        location = self.generate_other_location(user_id, date, i, home_location, previous_location)
    return location

generate_stay_type_sequence(agent_id, date)

Generate the sequence of stay types for an agent for a specific date probabilistically based on the superset sequence and specified probabilities. Replace 'home'-'home' and 'work'-'work' sequences by just 'home' or 'work'.

Parameters:

Name Type Description Default
agent_id int

identifier of agent, used for random seed generation.

required
date date

date for activity sequence generation, used for random seed generation.

required

Returns:

Type Description
List[str]

List[str]: list of generated stay types, each represented by a string indicating the stay type (e.g. "home", "work", "other").

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
def generate_stay_type_sequence(self, agent_id: int, date: datetime.date) -> List[str]:
    """
    Generate the sequence of stay types for an agent for a specific date
    probabilistically based on the superset sequence and specified
    probabilities.
    Replace 'home'-'home' and 'work'-'work' sequences by just 'home' or
    'work'.

    Args:
        agent_id (int): identifier of agent, used for random seed generation.
        date (datetime.date): date for activity sequence generation, used for
            random seed generation.

    Returns:
        List[str]: list of generated stay types, each represented by a string
            indicating the stay type (e.g. "home", "work", "other").
    """
    stay_type_sequence = []
    for i, stay_type in enumerate(self.stay_sequence_superset):
        stay_weight = self.stay_sequence_probabilities[i]
        seed = self.random_seed_number_generator(0, agent_id, date, i)
        if Random(seed).random() < stay_weight:
            stay_type_sequence.append(stay_type)
    stay_type_sequence = self.remove_consecutive_stay_types(stay_type_sequence, {"home", "work"})
    return stay_type_sequence

generate_work_location(agent_id, home_location, seed=4)

Generate random work location based on home location and maximum distance to home. If the work location falls outside of bounding box limits, try again.

Parameters:

Name Type Description Default
agent_id int

identifier of agent, used for random seed generation.

required
home_location Tuple[float, float]

coordinates of home location.

required
seed int

random seed integer. Defaults to 4.

4

Returns:

Type Description
Tuple[float, float]

Tuple[float,float]: coordinates of generated work location.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
def generate_work_location(
    self, agent_id: int, home_location: Tuple[float, float], seed: int = 4
) -> Tuple[float, float]:
    """
    Generate random work location based on home location and maximum distance to
    home. If the work location falls outside of bounding box limits, try again.

    Args:
        agent_id (int): identifier of agent, used for random seed generation.
        home_location (Tuple[float,float]): coordinates of home location.
        seed (int, optional): random seed integer. Defaults to 4.

    Returns:
        Tuple[float,float]: coordinates of generated work location.
    """
    seed_distance = self.random_seed_number_generator(seed - 1, agent_id)
    random_distance = Random(seed_distance).uniform(self.home_work_distance_min, self.home_work_distance_max)
    hlon, hlat = home_location
    seed_coords = self.random_seed_number_generator(seed, agent_id)
    wlon, wlat = self.generate_lonlat_at_distance(hlon, hlat, random_distance, seed_coords)

    if not (self.longitude_min < wlon < self.longitude_max) or not (
        self.latitude_min < wlat < self.latitude_max
    ):  # outside limits
        seed += 1
        wlon, wlat = self.generate_work_location(agent_id, home_location, seed=seed)

    return (wlon, wlat)

haversine(lon1, lat1, lon2, lat2)

Calculate the haversine distance in meters between two points.

Parameters:

Name Type Description Default
lon1 float

longitude of first point, in decimal degrees.

required
lat1 float

latitude of first point, in decimal degrees.

required
lon2 float

longitude of second point, in decimal degrees.

required
lat2 float

latitude of second point, in decimal degrees.

required

Returns:

Name Type Description
float float

distance between both points, in meters.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def haversine(self, lon1: float, lat1: float, lon2: float, lat2: float) -> float:
    """
    Calculate the haversine distance in meters between two points.

    Args:
        lon1 (float): longitude of first point, in decimal degrees.
        lat1 (float): latitude of first point, in decimal degrees.
        lon2 (float): longitude of second point, in decimal degrees.
        lat2 (float): latitude of second point, in decimal degrees.

    Returns:
        float: distance between both points, in meters.
    """
    r = 6_371_000  # Radius of earth in meters.

    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * asin(sqrt(a))
    return c * r

random_seed_number_generator(base_seed, agent_id=None, date=None, i=None)

Generate random seed integer based on provided arguments.

Parameters:

Name Type Description Default
base_seed int

base integer for operations.

required
agent_id int

agent identifier. Defaults to None.

None
date date

date. Defaults to None.

None
i int

position integer. Defaults to None.

None

Returns:

Name Type Description
int int

generated random seed integer.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def random_seed_number_generator(
    self, base_seed: int, agent_id: int = None, date: datetime.date = None, i: int = None
) -> int:
    """
    Generate random seed integer based on provided arguments.

    Args:
        base_seed (int): base integer for operations.
        agent_id (int, optional): agent identifier. Defaults to None.
        date (datetime.date, optional): date. Defaults to None.
        i (int, optional): position integer. Defaults to None.

    Returns:
        int: generated random seed integer.
    """
    seed = base_seed
    if agent_id is not None:
        seed += int(agent_id) * 100
    if date is not None:
        start_datetime = datetime.datetime.combine(date, datetime.time(0))
        seed += int(start_datetime.timestamp())
    if i is not None:
        seed += i
    return seed

remove_consecutive_stay_types(stay_sequence_list, stay_types_to_group)

Generate new list replacing consecutive stays of the same type by a unique stay as long as the stay type is contained in the "stay_types_to_group" list.

Parameters:

Name Type Description Default
stay_sequence_list List[str]

input stay type list.

required
stay_types_to_group Set[str]

stay types to group.

required

Returns:

Type Description
List[str]

List[str]: output stay sequence list.

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
def remove_consecutive_stay_types(self, stay_sequence_list: List[str], stay_types_to_group: Set[str]) -> List[str]:
    """
    Generate new list replacing consecutive stays of the same type by
    a unique stay as long as the stay type is contained in the
    "stay_types_to_group" list.

    Args:
        stay_sequence_list (List[str]): input stay type list.
        stay_types_to_group (Set[str]): stay types to group.

    Returns:
        List[str]: output stay sequence list.
    """
    new_stay_sequence_list = []
    previous_stay = None
    for stay in stay_sequence_list:
        if stay == previous_stay and stay in stay_types_to_group:
            pass
        else:
            new_stay_sequence_list.append(stay)
        previous_stay = stay
    return new_stay_sequence_list

update_spark_row(row, column_name, new_value)

Return an updated spark row object, changing the value of a column.

Parameters:

Name Type Description Default
row Row

input spark row.

required
column_name str

name of column to modify.

required
new_value Any

new value to assign.

required

Returns:

Name Type Description
Row Row

modified spark row

Source code in multimno/components/ingestion/synthetic/synthetic_diaries.py
325
326
327
328
329
330
331
332
333
334
335
336
337
def update_spark_row(self, row: Row, column_name: str, new_value: Any) -> Row:
    """
    Return an updated spark row object, changing the value of a column.

    Args:
        row (Row): input spark row.
        column_name (str): name of column to modify.
        new_value (Any): new value to assign.

    Returns:
        Row: modified spark row
    """
    return Row(**{**row.asDict(), **{column_name: new_value}})