Skip to content

Commit

Permalink
Make providers configurable, add config file for easier configuration (
Browse files Browse the repository at this point in the history
…#15)

* Making providers configurable
* Adding config file
  • Loading branch information
arnasbr authored Sep 30, 2024
1 parent 589f579 commit 6c64c07
Show file tree
Hide file tree
Showing 10 changed files with 317 additions and 297 deletions.
75 changes: 21 additions & 54 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,44 +37,26 @@ pip install traveltime-google-comparison
```

## Setup
Provide credentials for the APIs via environment variables.

For Google Maps API:

```bash
export GOOGLE_API_KEY=[Your Google Maps API Key]
```

For TomTom API:

```bash
export TOMTOM_API_KEY=[Your TomTom API Key]
```

For HERE API:

```bash
export HERE_API_KEY=[Your HERE API Key]
```

For Mapbox API:

```bash
export MAPBOX_API_KEY=[Your Mapbox API Key]
```

For OpenRoutes API:

```bash
export OPENROUTES_API_KEY=[Your OpenRoutes API Key]
```

For OSRM API: OSRM does not require a key.

For TravelTime API:
```bash
export TRAVELTIME_APP_ID=[Your TravelTime App ID]
export TRAVELTIME_API_KEY=[Your TravelTime API Key]
Provide credentials and desired max requests per minute for the APIs inside the `config.json` file.
You can also disable unwanted APIs by changing the `enabled` value to `false`.

```json
{
"traveltime": {
"app-id": "<your-app-id>",
"api-key": "<your-api-key>",
"max-rpm": "60"
},
"api-providers": [
{
"name": "google",
"enabled": true,
"api-key": "<your-api-key>",
"max-rpm": "60"
},
...other providers
]
}
```

## Usage
Expand Down Expand Up @@ -104,23 +86,8 @@ Required arguments:
- `--time-zone-id [Time zone ID]`: non-abbreviated time zone identifier in which the time values are specified.
For example: `Europe/London`. For more information, see [here](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones).



Optional arguments:
- `--google-max-rpm [int]`: Set max number of parallel requests sent to Google API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--tomtom-max-rpm [int]`: Set max number of parallel requests sent to TomTom API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--mapbox-max-rpm [int]`: Set max number of parallel requests sent to Mapbox API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--here-max-rpm [int]`: Set max number of parallel requests sent to HERE API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--osrm-max-rpm [int]`: Set max number of parallel requests sent to OSRM API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--openroutes-max-rpm [int]`: Set max number of parallel requests sent to OpenRoutes API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--traveltime-max-rpm [int]`: Set max number of parallel requests sent to TravelTime API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--config [Config file path]`: Path to the config file. Default - ./config.json

Example:

Expand Down
45 changes: 45 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"traveltime": {
"app-id": "<your-app-id>",
"api-key": "<your-api-key>",
"max-rpm": "60"
},
"api-providers": [
{
"name": "google",
"enabled": true,
"api-key": "<your-api-key>",
"max-rpm": "60"
},
{
"name": "tomtom",
"enabled": true,
"api-key": "<your-api-key>",
"max-rpm": "60"
},
{
"name": "here",
"enabled": true,
"api-key": "<your-api-key>",
"max-rpm": "60"
},
{
"name": "mapbox",
"enabled": true,
"api-key": "<your-api-key>",
"max-rpm": "60"
},
{
"name": "osrm",
"enabled": true,
"api-key": "not-needed!",
"max-rpm": "60"
},
{
"name": "openroutes",
"enabled": true,
"api-key": "<your-api-key>",
"max-rpm": "20"
}
]
}
45 changes: 23 additions & 22 deletions src/traveltime_google_comparison/analysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging
from dataclasses import dataclass
from typing import List

from pandas import DataFrame

Expand All @@ -9,6 +8,7 @@
TRAVELTIME_API,
get_capitalized_provider_name,
)
from traveltime_google_comparison.config import Providers


def absolute_error(api_provider: str) -> str:
Expand All @@ -26,31 +26,31 @@ class QuantileErrorResult:


def log_results(
results_with_differences: DataFrame, quantile: float, api_providers: List[str]
results_with_differences: DataFrame, quantile: float, api_providers: Providers
):
for provider in api_providers:
capitalized_provider = get_capitalized_provider_name(provider)
for provider in api_providers.competitors:
name = provider.name
capitalized_provider = get_capitalized_provider_name(name)
logging.info(
f"Mean relative error compared to {capitalized_provider} "
f"API: {results_with_differences[relative_error(provider)].mean():.2f}%"
)
quantile_errors = calculate_quantiles(
results_with_differences, quantile, provider
f"API: {results_with_differences[relative_error(name)].mean():.2f}%"
)
quantile_errors = calculate_quantiles(results_with_differences, quantile, name)
logging.info(
f"{int(quantile * 100)}% of TravelTime results differ from {capitalized_provider} API "
f"by less than {int(quantile_errors.relative_error)}%"
)


def format_results_for_csv(
results_with_differences: DataFrame, api_providers: List[str]
results_with_differences: DataFrame, api_providers: Providers
) -> DataFrame:
formatted_results = results_with_differences.copy()

for provider in api_providers:
formatted_results = formatted_results.drop(columns=[absolute_error(provider)])
relative_error_col = relative_error(provider)
for provider in api_providers.competitors:
name = provider.name
formatted_results = formatted_results.drop(columns=[absolute_error(name)])
relative_error_col = relative_error(name)
formatted_results[relative_error_col] = formatted_results[
relative_error_col
].astype(int)
Expand All @@ -59,7 +59,7 @@ def format_results_for_csv(


def run_analysis(
results: DataFrame, output_file: str, quantile: float, api_providers: List[str]
results: DataFrame, output_file: str, quantile: float, api_providers: Providers
):
results_with_differences = calculate_differences(results, api_providers)
log_results(results_with_differences, quantile, api_providers)
Expand All @@ -71,21 +71,22 @@ def run_analysis(
formatted_results.to_csv(output_file, index=False)


def calculate_differences(results: DataFrame, api_providers: List[str]) -> DataFrame:
def calculate_differences(results: DataFrame, api_providers: Providers) -> DataFrame:
results_with_differences = results.copy()

for provider in api_providers:
absolute_error_col = absolute_error(provider)
relative_error_col = relative_error(provider)
for provider in api_providers.competitors:
name = provider.name
absolute_error_col = absolute_error(name)
relative_error_col = relative_error(name)

results_with_differences[absolute_error_col] = abs(
results[Fields.TRAVEL_TIME[provider]]
results[Fields.TRAVEL_TIME[name]]
- results[Fields.TRAVEL_TIME[TRAVELTIME_API]]
)

results_with_differences[relative_error_col] = (
results_with_differences[absolute_error_col]
/ results_with_differences[Fields.TRAVEL_TIME[provider]]
/ results_with_differences[Fields.TRAVEL_TIME[name]]
* 100
)

Expand All @@ -95,13 +96,13 @@ def calculate_differences(results: DataFrame, api_providers: List[str]) -> DataF
def calculate_quantiles(
results_with_differences: DataFrame,
quantile: float,
api_provider: str,
api_provider_name: str,
) -> QuantileErrorResult:
quantile_absolute_error = results_with_differences[
absolute_error(api_provider)
absolute_error(api_provider_name)
].quantile(quantile, "higher")
quantile_relative_error = results_with_differences[
relative_error(api_provider)
relative_error(api_provider_name)
].quantile(quantile, "higher")
return QuantileErrorResult(
int(quantile_absolute_error), int(quantile_relative_error)
Expand Down
24 changes: 8 additions & 16 deletions src/traveltime_google_comparison/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from traveltime_google_comparison.config import Mode
from traveltime_google_comparison.requests.base_handler import BaseRequestHandler


GOOGLE_API = "google"
TOMTOM_API = "tomtom"
HERE_API = "here"
Expand Down Expand Up @@ -132,7 +133,10 @@ def generate_tasks(


async def collect_travel_times(
args, data, request_handlers: Dict[str, BaseRequestHandler], providers: List[str]
args,
data,
request_handlers: Dict[str, BaseRequestHandler],
provider_names: List[str],
) -> DataFrame:
timezone = pytz.timezone(args.time_zone_id)
localized_start_datetime = localize_datetime(args.date, args.start_time, timezone)
Expand All @@ -144,28 +148,16 @@ async def collect_travel_times(
tasks = generate_tasks(data, time_instants, request_handlers, mode=Mode.DRIVING)

capitalized_providers_str = ", ".join(
[get_capitalized_provider_name(provider) for provider in providers]
)
logger.info(
f"Sending {len(tasks)} requests to {capitalized_providers_str} and TravelTime APIs"
[get_capitalized_provider_name(provider) for provider in provider_names]
)
logger.info(f"Sending {len(tasks)} requests to {capitalized_providers_str} APIs")

results = await asyncio.gather(*tasks)

results_df = pd.DataFrame(results)
deduplicated = results_df.groupby(
[Fields.ORIGIN, Fields.DESTINATION, Fields.DEPARTURE_TIME], as_index=False
).agg(
{
Fields.TRAVEL_TIME[GOOGLE_API]: "first",
Fields.TRAVEL_TIME[TOMTOM_API]: "first",
Fields.TRAVEL_TIME[HERE_API]: "first",
Fields.TRAVEL_TIME[OSRM_API]: "first",
Fields.TRAVEL_TIME[OPENROUTES_API]: "first",
Fields.TRAVEL_TIME[MAPBOX_API]: "first",
Fields.TRAVEL_TIME[TRAVELTIME_API]: "first",
}
)
).agg({Fields.TRAVEL_TIME[provider]: "first" for provider in provider_names})
deduplicated.to_csv(args.output, index=False)
return deduplicated

Expand Down
Loading

0 comments on commit 6c64c07

Please sign in to comment.