Skip to content

Commit

Permalink
Try to handle TV pilots better in related media
Browse files Browse the repository at this point in the history
  • Loading branch information
dseomn committed Nov 10, 2023
1 parent 7ef5efb commit da60442
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 0 deletions.
35 changes: 35 additions & 0 deletions rock_paper_sand/wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,14 @@ def related_media(self, item_ref: wikidata_value.ItemRef) -> RelatedMedia:
return self._related_media[item_ref]


def _is_positive_integer(value: str) -> bool:
try:
int_value = int(value)
except ValueError:
return False
return int_value > 0


def _release_status(
item: wikidata_value.Entity,
*,
Expand Down Expand Up @@ -405,6 +413,12 @@ def _tv_episode_parent_classes(self) -> Set[wikidata_value.ItemRef]:
*self._tv_season_part_classes,
}

@functools.cached_property
def _tv_pilot_classes(self) -> Set[wikidata_value.ItemRef]:
return self._api.transitive_subclasses(
wikidata_value.Q_TELEVISION_PILOT
)

@functools.cached_property
def _possible_tv_special_classes(self) -> Set[wikidata_value.ItemRef]:
return {
Expand Down Expand Up @@ -505,6 +519,27 @@ def _is_integral_child(
and child_classes & child_classes_to_check
):
return True
if (
child_classes & self._tv_pilot_classes
and parent_classes & self._tv_episode_parent_classes
):
# Some pilots are regular episodes, some aren't. This code assumes
# that if all of the pilot's ordinals (e.g., episode number and
# season number) are positive integers (like in the common case of
# S1E1), it's a regular episode.
pilot = self._api.entity(child)
has_ordinals = False
for statement in (
*pilot.truthy_statements(wikidata_value.P_SEASON),
*pilot.truthy_statements(wikidata_value.P_PART_OF_THE_SERIES),
):
for snak in statement.qualifiers(
wikidata_value.P_SERIES_ORDINAL
):
has_ordinals = True
if not _is_positive_integer(snak.string_value()):
return False
return has_ordinals
if (
child_classes & self._tv_episode_classes
and not child_classes & self._possible_tv_special_classes
Expand Down
122 changes: 122 additions & 0 deletions rock_paper_sand/wikidata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ def _snak_item(item_id: str) -> Any:
}


def _snak_string(value: str) -> Any:
return {
"snaktype": "value",
"datatype": "string",
"datavalue": {"type": "string", "value": value},
}


def _snak_time(time: str) -> Any:
return {
"snaktype": "value",
Expand Down Expand Up @@ -740,6 +748,20 @@ def setUp(self) -> None:
api_entities={
"Q2": {"labels": {}, "descriptions": {}},
"Q22": {"labels": {}, "descriptions": {}},
"Q24": {
"claims": {
wikidata_value.P_PART_OF_THE_SERIES.id: [
{
"rank": "normal",
"qualifiers": {
wikidata_value.P_SERIES_ORDINAL.id: [
_snak_string("1"),
],
},
},
],
},
},
"Q3": {"labels": {}, "descriptions": {}},
"Q4": {"labels": {}, "descriptions": {}},
},
Expand All @@ -752,6 +774,7 @@ def setUp(self) -> None:
wikidata_value.Q_TELEVISION_SPECIAL,
},
"Q23": {wikidata_value.Q_TELEVISION_SERIES_SEASON},
"Q24": {wikidata_value.Q_TELEVISION_PILOT},
"Q31": {wikidata_value.Q_LITERARY_WORK},
"Q3": {wikidata_value.Q_LITERARY_WORK},
"Q4": {wikidata_value.Q_FILM},
Expand All @@ -775,6 +798,7 @@ def setUp(self) -> None:
wikidata_value.ItemRef("Q21"),
wikidata_value.ItemRef("Q22"),
wikidata_value.ItemRef("Q23"),
wikidata_value.ItemRef("Q24"),
},
loose=set(),
),
Expand All @@ -796,6 +820,12 @@ def setUp(self) -> None:
children=set(),
loose=set(),
),
"Q24": wikidata.RelatedMedia(
parents=set(),
siblings=set(),
children=set(),
loose=set(),
),
"Q31": wikidata.RelatedMedia(
parents={wikidata_value.ItemRef("Q3")},
siblings=set(),
Expand Down Expand Up @@ -832,6 +862,7 @@ def setUp(self) -> None:
"related item: <https://www.wikidata.org/wiki/Q22>"
),
# Q23 is an integral child of Q2.
# Q24 is an integral child of Q2.
# Q31 is an integral child of Q3.
media_filter.ResultExtraString(
"related item: <https://www.wikidata.org/wiki/Q3>"
Expand All @@ -843,6 +874,97 @@ def setUp(self) -> None:
},
),
),
dict(
testcase_name="related_media_does_not_ignore_special_tv_pilots",
filter_config={"relatedMedia": {}},
item={"name": "foo", "wikidata": "Q1"},
api_entities={
"Q2": {"labels": {}, "descriptions": {}, "claims": {}},
"Q3": {
"labels": {},
"descriptions": {},
"claims": {
wikidata_value.P_PART_OF_THE_SERIES.id: [
{
"rank": "normal",
"qualifiers": {
wikidata_value.P_SERIES_ORDINAL.id: [
_snak_string("0"),
_snak_string("1"),
],
},
},
],
},
},
"Q4": {
"labels": {},
"descriptions": {},
"claims": {
wikidata_value.P_PART_OF_THE_SERIES.id: [
{
"rank": "normal",
"qualifiers": {
wikidata_value.P_SERIES_ORDINAL.id: [
_snak_string("1.5"),
],
},
},
],
},
},
},
api_entity_classes={
"Q1": {wikidata_value.Q_TELEVISION_SERIES},
"Q2": {wikidata_value.Q_TELEVISION_PILOT},
"Q3": {wikidata_value.Q_TELEVISION_PILOT},
"Q4": {wikidata_value.Q_TELEVISION_PILOT},
},
api_related_media={
"Q1": wikidata.RelatedMedia(
parents=set(),
siblings=set(),
children={
wikidata_value.ItemRef("Q2"),
wikidata_value.ItemRef("Q3"),
wikidata_value.ItemRef("Q4"),
},
loose=set(),
),
"Q2": wikidata.RelatedMedia(
parents=set(),
siblings=set(),
children=set(),
loose=set(),
),
"Q3": wikidata.RelatedMedia(
parents=set(),
siblings=set(),
children=set(),
loose=set(),
),
"Q4": wikidata.RelatedMedia(
parents=set(),
siblings=set(),
children=set(),
loose=set(),
),
},
expected_result=media_filter.FilterResult(
True,
extra={
media_filter.ResultExtraString(
"related item: <https://www.wikidata.org/wiki/Q2>"
),
media_filter.ResultExtraString(
"related item: <https://www.wikidata.org/wiki/Q3>"
),
media_filter.ResultExtraString(
"related item: <https://www.wikidata.org/wiki/Q4>"
),
},
),
),
dict(
testcase_name="related_media_does_not_traverse_collections",
filter_config={"relatedMedia": {}},
Expand Down
3 changes: 3 additions & 0 deletions rock_paper_sand/wikidata_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def human_readable_url_prefix(cls) -> str:
Q_PROLEPTIC_GREGORIAN_CALENDAR = _i("https://www.wikidata.org/wiki/Q1985727")
Q_RELEASE_GROUP = _i("https://www.wikidata.org/wiki/Q108346082")
Q_TELEVISION_FILM = _i("https://www.wikidata.org/wiki/Q506240")
Q_TELEVISION_PILOT = _i("https://www.wikidata.org/wiki/Q653916")
Q_TELEVISION_SERIES = _i("https://www.wikidata.org/wiki/Q5398426")
Q_TELEVISION_SERIES_EPISODE = _i("https://www.wikidata.org/wiki/Q21191270")
Q_TELEVISION_SERIES_SEASON = _i("https://www.wikidata.org/wiki/Q3464665")
Expand Down Expand Up @@ -189,6 +190,8 @@ def human_readable_url_prefix(cls) -> str:
P_PART_OF_THE_SERIES = _p("https://www.wikidata.org/wiki/Property:P179")
P_PLOT_EXPANDED_IN = _p("https://www.wikidata.org/wiki/Property:P5940")
P_PUBLICATION_DATE = _p("https://www.wikidata.org/wiki/Property:P577")
P_SEASON = _p("https://www.wikidata.org/wiki/Property:P4908")
P_SERIES_ORDINAL = _p("https://www.wikidata.org/wiki/Property:P1545")
P_START_TIME = _p("https://www.wikidata.org/wiki/Property:P580")
P_SUBCLASS_OF = _p("https://www.wikidata.org/wiki/Property:P279")
P_SUPPLEMENT_TO = _p("https://www.wikidata.org/wiki/Property:P9234")
Expand Down

0 comments on commit da60442

Please sign in to comment.