From 942db488291dfec9f91c7f3365dd92d7d3fbbd9a Mon Sep 17 00:00:00 2001 From: David Mandelberg Date: Sat, 29 Jun 2024 14:15:54 -0400 Subject: [PATCH] Don't ignore children when the parent-child border isn't crossed If the parent is being processed, this shouldn't change anything because the child wouldn't be processed either way, whether because it's integral or because the border isn't crossed. If the child is being processed, the old behavior was that the parent wasn't processed because of the border, but the child was still ignored for being integral. With this change, the child is no longer ignored in that case. --- rock_paper_sand/wikidata.py | 56 +++++++++++++++++--------------- rock_paper_sand/wikidata_test.py | 42 ++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 27 deletions(-) diff --git a/rock_paper_sand/wikidata.py b/rock_paper_sand/wikidata.py index 6455943..572cd42 100644 --- a/rock_paper_sand/wikidata.py +++ b/rock_paper_sand/wikidata.py @@ -685,6 +685,33 @@ def _is_ignored( or item_classes & ignored_classes_from_request ) + def _should_cross_parent_child_border( + self, parent: wikidata_value.ItemRef, child: wikidata_value.ItemRef + ) -> bool: + """Returns whether to cross the parent-child border for related media. + + Some parent-child pairs cross the border between generally unrelated + sets of media. E.g., somebody interested in watching a series of + anthology films might want to know all the films in the series. But + there could be many items related to stories in the individual + anthologies, and those items don't have much of a connection to the + series of anthologies that the user is interested in. Or from the other + side, if the user is interested in a book that was adapted into a part + of an anthology movie, they might be interested in that part of the + anthology movie, but not necessarily in the entire anthology series. + + Args: + parent: Parent. + child: Child. + """ + del child # Unused. + parent_classes = self._api.entity_classes(parent) + parent_forms = self._api.forms_of_creative_work(parent) + return ( + not parent_classes & self._anthology_classes + and not parent_forms & self._anthology_classes + ) + def _integral_child_classes( self, ) -> Iterable[ @@ -727,6 +754,8 @@ def _is_integral_child( parent: Parent. child: Child. """ + if not self._should_cross_parent_child_border(parent, child): + return False parent_classes = self._api.entity_classes(parent) parent_classes_and_forms = ( parent_classes | self._api.forms_of_creative_work(parent) @@ -794,33 +823,6 @@ def _integral_children( if self._is_integral_child(item_ref, child) ) - def _should_cross_parent_child_border( - self, parent: wikidata_value.ItemRef, child: wikidata_value.ItemRef - ) -> bool: - """Returns whether to cross the parent-child border for related media. - - Some parent-child pairs cross the border between generally unrelated - sets of media. E.g., somebody interested in watching a series of - anthology films might want to know all the films in the series. But - there could be many items related to stories in the individual - anthologies, and those items don't have much of a connection to the - series of anthologies that the user is interested in. Or from the other - side, if the user is interested in a book that was adapted into a part - of an anthology movie, they might be interested in that part of the - anthology movie, but not necessarily in the entire anthology series. - - Args: - parent: Parent. - child: Child. - """ - del child # Unused. - parent_classes = self._api.entity_classes(parent) - parent_forms = self._api.forms_of_creative_work(parent) - return ( - not parent_classes & self._anthology_classes - and not parent_forms & self._anthology_classes - ) - def _related_item_priority( self, item_ref: wikidata_value.ItemRef ) -> _RelatedMediaPriority: diff --git a/rock_paper_sand/wikidata_test.py b/rock_paper_sand/wikidata_test.py index 9138a1c..8bc2000 100644 --- a/rock_paper_sand/wikidata_test.py +++ b/rock_paper_sand/wikidata_test.py @@ -1149,6 +1149,48 @@ def setUp(self) -> None: }, ), ), + dict( + testcase_name="related_media_includes_integral_child_of_collection", + filter_config={"relatedMedia": {}}, + item={"name": "foo", "wikidata": "Q1"}, + api_entities={ + "Q21": {"labels": {}, "descriptions": {}}, + }, + api_entity_classes={ + "Q1": set(), + "Q2": {wikidata_value.Q_FILM, wikidata_value.Q_ANTHOLOGY_FILM}, + "Q21": {wikidata_value.Q_FILM}, + }, + api_forms_of_creative_work={ + "Q1": set(), + "Q2": set(), + "Q21": set(), + }, + api_related_media={ + "Q1": wikidata.RelatedMedia( + parents=set(), + siblings={wikidata_value.ItemRef("Q21")}, + children=set(), + loose=set(), + ), + "Q21": wikidata.RelatedMedia( + parents={wikidata_value.ItemRef("Q2")}, + siblings=set(), + children=set(), + loose=set(), + ), + }, + expected_result=media_filter.FilterResult( + True, + extra={ + media_filter.ResultExtra( + human_readable=( + "related item: " + ), + ), + }, + ), + ), dict( testcase_name="related_media_includes_label_and_description", filter_config={"languages": ["en"], "relatedMedia": {}},