Skip to content

Commit

Permalink
Move label and description accessors to wikidata_value
Browse files Browse the repository at this point in the history
  • Loading branch information
dseomn committed Nov 9, 2023
1 parent b9cbb22 commit 0797f75
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 72 deletions.
30 changes: 4 additions & 26 deletions rock_paper_sand/wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"""Code that uses Wikidata's APIs."""

import collections
from collections.abc import Generator, Iterable, Mapping, Sequence, Set
from collections.abc import Generator, Iterable, Sequence, Set
import contextlib
import dataclasses
import datetime
Expand Down Expand Up @@ -71,28 +71,6 @@ def requests_session() -> Generator[requests.Session, None, None]:
yield session


def _language_keyed_string(
mapping: Mapping[str, Any],
languages: Sequence[str],
) -> str | None:
# https://doc.wikimedia.org/Wikibase/master/php/docs_topics_json.html#json_fingerprint
for language in languages:
if language in mapping:
return mapping[language]["value"]
for other_language, record in mapping.items():
if other_language.startswith(f"{language}-"):
return record["value"]
return None


def _label(item: Any, languages: Sequence[str]) -> str | None:
return _language_keyed_string(item["labels"], languages)


def _description(item: Any, languages: Sequence[str]) -> str | None:
return _language_keyed_string(item["descriptions"], languages)


def _truthy_statements(
item: Any, prop: wikidata_value.PropertyRef
) -> Sequence[Any]:
Expand Down Expand Up @@ -686,12 +664,12 @@ def _related_item_result_extra(
category: str,
item: wikidata_value.ItemRef,
) -> media_filter.ResultExtra:
item_data = self._api.entity(item).json_full
entity = self._api.entity(item)
item_description_parts = []
if (label := _label(item_data, self._config.languages)) is not None:
if (label := entity.label(self._config.languages)) is not None:
item_description_parts.append(label)
if (
description := _description(item_data, self._config.languages)
description := entity.description(self._config.languages)
) is not None:
item_description_parts.append(f"({description})")
item_description_parts.append(f"<{item}>")
Expand Down
46 changes: 1 addition & 45 deletions rock_paper_sand/wikidata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

# pylint: disable=missing-module-docstring

from collections.abc import Callable, Collection, Mapping, Sequence, Set
from collections.abc import Collection, Mapping, Sequence, Set
import datetime
from typing import Any
from unittest import mock
Expand Down Expand Up @@ -318,50 +318,6 @@ def test_related_media_error(self) -> None:

class WikidataUtilsTest(parameterized.TestCase):
# pylint: disable=protected-access
@parameterized.product(
(
dict(function=wikidata._label, section="labels"),
dict(function=wikidata._description, section="descriptions"),
),
(
dict(
mapping={},
languages=("en",),
expected_value=None,
),
dict(
mapping={"en": {"value": "foo"}},
languages=(),
expected_value=None,
),
dict(
mapping={
"en": {"value": "foo"},
"en-us": {"value": "bar"},
},
languages=("qa", "en"),
expected_value="foo",
),
dict(
mapping={"en-us": {"value": "foo"}},
languages=("en",),
expected_value="foo",
),
),
)
def test_language_keyed_string(
self,
*,
function: Callable[[Any, Sequence[str]], str | None],
section: str,
mapping: Mapping[str, Any],
languages: Sequence[str],
expected_value: str | None,
) -> None:
self.assertEqual(
expected_value,
function({section: mapping}, languages),
)

@parameterized.named_parameters(
dict(
Expand Down
24 changes: 23 additions & 1 deletion rock_paper_sand/wikidata_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"""

import abc
from collections.abc import Collection
from collections.abc import Collection, Mapping, Sequence
import dataclasses
import re
from typing import Any, Self
Expand Down Expand Up @@ -195,6 +195,20 @@ def human_readable_url_prefix(cls) -> str:
del _p


def _language_keyed_string(
mapping: Mapping[str, Any],
languages: Sequence[str],
) -> str | None:
# https://doc.wikimedia.org/Wikibase/master/php/docs_topics_json.html#json_fingerprint
for language in languages:
if language in mapping:
return mapping[language]["value"]
for other_language, record in mapping.items():
if other_language.startswith(f"{language}-"):
return record["value"]
return None


@dataclasses.dataclass(frozen=True, kw_only=True)
class Entity:
"""Data about an entity.
Expand All @@ -208,3 +222,11 @@ class Entity:
"""

json_full: Any

def label(self, languages: Sequence[str]) -> str | None:
"""Returns a label in the first matching language, or None."""
return _language_keyed_string(self.json_full["labels"], languages)

def description(self, languages: Sequence[str]) -> str | None:
"""Returns a description in the first matching language, or None."""
return _language_keyed_string(self.json_full["descriptions"], languages)
54 changes: 54 additions & 0 deletions rock_paper_sand/wikidata_value_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

# pylint: disable=missing-module-docstring

from collections.abc import Callable, Mapping, Sequence
from typing import Any

from absl.testing import absltest
from absl.testing import parameterized

Expand Down Expand Up @@ -95,6 +98,57 @@ def test_entity_ref_from_uri_valid(self) -> None:
).id,
)

@parameterized.product(
(
dict(function=wikidata_value.Entity.label, section="labels"),
dict(
function=wikidata_value.Entity.description,
section="descriptions",
),
),
(
dict(
mapping={},
languages=("en",),
expected_value=None,
),
dict(
mapping={"en": {"value": "foo"}},
languages=(),
expected_value=None,
),
dict(
mapping={
"en": {"value": "foo"},
"en-us": {"value": "bar"},
},
languages=("qa", "en"),
expected_value="foo",
),
dict(
mapping={"en-us": {"value": "foo"}},
languages=("en",),
expected_value="foo",
),
),
)
def test_language_keyed_string(
self,
*,
function: Callable[[wikidata_value.Entity, Sequence[str]], str | None],
section: str,
mapping: Mapping[str, Any],
languages: Sequence[str],
expected_value: str | None,
) -> None:
self.assertEqual(
expected_value,
function(
wikidata_value.Entity(json_full={section: mapping}),
languages,
),
)


if __name__ == "__main__":
absltest.main()

0 comments on commit 0797f75

Please sign in to comment.