Skip to content

Commit

Permalink
address PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
atvaccaro committed Jan 31, 2023
1 parent 59f352f commit cccd699
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ int_gtfs_rt__service_alerts_trip_summaries AS (
trip_start_date,
COUNT(DISTINCT id) AS num_distinct_message_ids,
ARRAY_AGG(DISTINCT service_alert_message_key) AS service_alert_message_keys,
MIN(_extract_ts) AS min_extract_ts,
MAX(_extract_ts) AS max_extract_ts,
MIN(header_timestamp) AS min_header_timestamp,
MAX(header_timestamp) AS max_header_timestamp,
FROM service_alerts
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ int_gtfs_rt__trip_updates_summaries AS (
trip_start_time,
trip_start_date,
COUNT(DISTINCT id) AS num_distinct_message_ids,
MIN(_extract_ts) AS min_extract_ts,
MAX(_extract_ts) AS max_extract_ts,
MIN(header_timestamp) AS min_header_timestamp,
MAX(header_timestamp) AS max_header_timestamp,
MIN(trip_update_timestamp) AS min_trip_update_timestamp,
MAX(trip_update_timestamp) AS max_trip_update_timestamp,
MAX(trip_update_delay) AS max_delay,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,14 @@ int_gtfs_rt__vehicle_positions_trip_summaries AS (
trip_start_time,
trip_start_date,
COUNT(DISTINCT id) AS num_distinct_message_ids,
MIN(vehicle_timestamp) AS min_trip_update_timestamp,
MAX(vehicle_timestamp) AS max_trip_update_timestamp,
MIN(_extract_ts) AS min_extract_ts,
MAX(_extract_ts) AS max_extract_ts,
MIN(header_timestamp) AS min_header_timestamp,
MAX(header_timestamp) AS max_header_timestamp,
MIN(vehicle_timestamp) AS min_vehicle_timestamp,
MAX(vehicle_timestamp) AS max_vehicle_timestamp,
ARRAY_AGG(position_latitude ORDER BY _extract_ts)[OFFSET(0)] AS first_position_latitude,
ARRAY_AGG(position_longitude ORDER BY _extract_ts)[OFFSET(0)] AS first_position_longitude,
FROM vehicle_positions
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8
)
Expand Down
80 changes: 58 additions & 22 deletions warehouse/models/mart/gtfs/_mart_gtfs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2005,33 +2005,69 @@ models:
columns:
- name: key
tests: *primary_key_tests
- name: dt
- name: schedule_to_use_for_rt_validation_gtfs_dataset_key
description: Schedule feed used to group RT feeds likely representing the same underlying schedule.
- name: trip_identifier
description: Hashed representation of a TripDescriptor.
- name: trip_id
description: Part of a TripDescriptor.
- name: trip_route_id
description: Part of a TripDescriptor.
- name: trip_direction_id
description: Part of a TripDescriptor.
- name: trip_start_time
description: Part of a TripDescriptor.
- name: trip_start_date
description: Part of a TripDescriptor.
# trip update facts
- name: tu_num_distinct_message_ids
description: |
Count of distinct trip update message IDs referencing this trip.
description: Count of distinct trip update message IDs referencing this trip.
- name: tu_min_extract_ts
description: Timestamp of first trip update extract referencing this trip.
- name: tu_max_extract_ts
description: Timestamp of last trip update extract referencing this trip.
- name: tu_min_header_timestamp
description: Timestamp of first trip update header referencing this trip.
- name: tu_max_header_timestamp
description: Timestamp of last trip update header referencing this trip.
- name: tu_min_trip_update_timestamp
description: |
Timestamp of first trip update referencing this trip.
description: Timestamp of first trip update referencing this trip.
- name: tu_max_trip_update_timestamp
description: |
Timestamp of last trip update referencing this trip.
description: Timestamp of last trip update referencing this trip.
- name: tu_max_delay
description: |
Maximum observed delay for this trip.
description: Maximum observed delay for this trip.
- name: tu_num_skipped_stops
description: |
Number of skipped stops for this trip.
description: Number of skipped stops for this trip.
# vehicle position facts
- name: vp_num_distinct_message_ids
description: |
Count of distinct vehicle position message IDs referencing this trip.
- name: vp_min_trip_update_timestamp
description: |
Timestamp of first vehicle position referencing this trip.
- name: vp_max_trip_update_timestamp
description: |
Timestamp of last vehicle position referencing this trip.
description: Count of distinct vehicle position message IDs referencing this trip.
- name: vp_min_extract_ts
description: Timestamp of first vehicle position extract referencing this trip.
- name: vp_max_extract_ts
description: Timestamp of last vehicle position extract referencing this trip.
- name: vp_min_header_timestamp
description: Timestamp of first vehicle position header referencing this trip.
- name: vp_max_header_timestamp
description: Timestamp of last vehicle position header referencing this trip.
- name: vp_min_vehicle_timestamp
description: Timestamp of first vehicle position referencing this trip.
- name: vp_max_vehicle_timestamp
description: Timestamp of last vehicle position referencing this trip.
- name: vp_first_position_latitude
description: Latitude of first vehicle position referencing this trip.
- name: vp_first_position_longitude
description: Longitude of last vehicle position referencing this trip.
# service alert facts
- name: sa_num_distinct_message_ids
description: |
Count of distinct service alert message IDs referencing this trip.
description: Count of distinct service alert message IDs referencing this trip.
- name: sa_service_alert_message_keys
description:
Array of keys of service alert messages referencing this trip.
description: Array of keys of service alert messages referencing this trip.
- name: sa_min_extract_ts
description: Timestamp of first service alert extract referencing this trip.
- name: sa_max_extract_ts
description: Timestamp of last service alert extract referencing this trip.
- name: sa_min_header_timestamp
description: Timestamp of first service alert header referencing this trip.
- name: sa_max_header_timestamp
description: Timestamp of last service alert header referencing this trip.
26 changes: 24 additions & 2 deletions warehouse/models/mart/gtfs/fct_observed_trips.sql
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ service_alerts_with_associated_schedule AS (

fct_observed_trips AS (
SELECT
-- keys/identifiers
{{ dbt_utils.surrogate_key([
'dt',
'schedule_to_use_for_rt_validation_gtfs_dataset_key',
Expand All @@ -108,16 +109,37 @@ fct_observed_trips AS (
COALESCE(tu.trip_direction_id, vp.trip_direction_id, sa.trip_direction_id) AS trip_direction_id,
COALESCE(tu.trip_start_time, vp.trip_start_time, sa.trip_start_time) AS trip_start_time,
COALESCE(tu.trip_start_date, vp.trip_start_date, sa.trip_start_date) AS trip_start_date,

-- trip updates facts
tu.num_distinct_message_ids AS tu_num_distinct_message_ids,
tu.min_extract_ts AS tu_min_extract_ts,
tu.max_extract_ts AS tu_max_extract_ts,
tu.min_header_timestamp AS tu_min_header_timestamp,
tu.max_header_timestamp AS tu_max_header_timestamp,
tu.min_trip_update_timestamp AS tu_min_trip_update_timestamp,
tu.max_trip_update_timestamp AS tu_max_trip_update_timestamp,
tu.max_delay AS tu_max_delay,
tu.num_skipped_stops AS tu_num_skipped_stops,

-- vehicle positions facts
vp.num_distinct_message_ids AS vp_num_distinct_message_ids,
vp.min_trip_update_timestamp AS vp_min_trip_update_timestamp,
vp.max_trip_update_timestamp AS vp_max_trip_update_timestamp,
vp.min_extract_ts AS vp_min_extract_ts,
vp.max_extract_ts AS vp_max_extract_ts,
vp.min_header_timestamp AS vp_min_header_timestamp,
vp.max_header_timestamp AS vp_max_header_timestamp,
vp.min_vehicle_timestamp AS vp_min_vehicle_timestamp,
vp.max_vehicle_timestamp AS vp_max_vehicle_timestamp,
vp.first_position_latitude AS vp_first_position_latitude,
vp.first_position_longitude AS vp_first_position_longitude,

-- service alerts facts
sa.num_distinct_message_ids AS sa_num_distinct_message_ids,
sa.service_alert_message_keys AS sa_service_alert_message_keys,
sa.min_extract_ts AS sa_min_extract_ts,
sa.max_extract_ts AS sa_max_extract_ts,
sa.min_header_timestamp AS sa_min_header_timestamp,
sa.max_header_timestamp AS sa_max_header_timestamp,

FROM trip_updates_with_associated_schedule AS tu
FULL OUTER JOIN vehicle_positions_with_associated_schedule AS vp
USING (dt, schedule_to_use_for_rt_validation_gtfs_dataset_key, trip_identifier)
Expand Down

0 comments on commit cccd699

Please sign in to comment.