Skip to content

Commit

Permalink
Add filter-level config for related media
Browse files Browse the repository at this point in the history
  • Loading branch information
dseomn committed Nov 16, 2023
1 parent e068964 commit e1811cf
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 4 deletions.
13 changes: 11 additions & 2 deletions rock_paper_sand/proto/config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,17 @@ message WikidataFilter {
// If specified, only items with one of these release statuses will match.
repeated ReleaseStatus release_statuses = 1;

// Placeholder for future options for related_media.
message RelatedMedia {}
// Options for related_media.
message RelatedMedia {
// Items to ignore.
repeated string ignore = 1;

// Classes to ignore instances of (subclasses of).
repeated string classes_ignore = 2;

// Classes to exclude (subclasses of) from the classes_ignore set.
repeated string classes_ignore_excluded = 3;
}

// Filters for top-level media to find potential additional parts that aren't
// present in the config file, and existing parts that aren't related to the
Expand Down
38 changes: 38 additions & 0 deletions rock_paper_sand/wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,25 @@ def __init__(
self._config = filter_config
self._api = api

self._config_ignore = tuple(
map(
wikidata_value.ItemRef.from_string,
self._config.related_media.ignore,
)
)
self._config_classes_ignore = tuple(
map(
wikidata_value.ItemRef.from_string,
self._config.related_media.classes_ignore,
)
)
self._config_classes_ignore_excluded = tuple(
map(
wikidata_value.ItemRef.from_string,
self._config.related_media.classes_ignore_excluded,
)
)

@functools.cached_property
def _fictional_entity_classes(self) -> Set[wikidata_value.ItemRef]:
# Fictional entities (other than fictional universes) can be part of
Expand All @@ -401,6 +420,7 @@ def _fictional_entity_classes(self) -> Set[wikidata_value.ItemRef]:
@functools.cached_property
def _ignored_items(self) -> Set[wikidata_value.ItemRef]:
return {
*self._config_ignore,
# Sometimes these classes are linked to media, instead of instances
# of those classes being linked to the media.
*self._fictional_entity_classes,
Expand All @@ -421,6 +441,24 @@ def _ignored_items(self) -> Set[wikidata_value.ItemRef]:
@functools.cached_property
def _ignored_classes(self) -> Set[wikidata_value.ItemRef]:
return {
*(
frozenset(
itertools.chain.from_iterable(
map(
self._api.transitive_subclasses,
self._config_classes_ignore,
)
)
)
- frozenset(
itertools.chain.from_iterable(
map(
self._api.transitive_subclasses,
self._config_classes_ignore_excluded,
)
)
)
),
*self._api.transitive_subclasses(wikidata_value.Q_BOX_OFFICE),
*self._fictional_entity_classes,
*self._api.transitive_subclasses(wikidata_value.Q_LIST),
Expand Down
18 changes: 16 additions & 2 deletions rock_paper_sand/wikidata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,13 @@ def setUp(self) -> None:
),
dict(
testcase_name="related_media_excludes_ignored_items",
filter_config={"relatedMedia": {}},
filter_config={
"relatedMedia": {
"ignore": ["Q7"],
"classesIgnore": ["Q81"],
"classesIgnoreExcluded": ["Q811"],
},
},
item={
"name": "foo",
"wikidata": "Q1",
Expand All @@ -783,18 +789,26 @@ def setUp(self) -> None:
"Q3": set(),
"Q4": set(),
"Q6": {wikidata_value.ItemRef("Q61")},
"Q7": set(),
"Q8": {wikidata_value.ItemRef("Q81")},
},
api_forms_of_creative_work={
"Q1": set(),
"Q2": set(),
"Q3": set(),
"Q4": set(),
"Q6": set(),
"Q7": set(),
"Q8": set(),
},
api_related_media={
"Q1": wikidata.RelatedMedia(
parents={wikidata_value.ItemRef("Q4")},
siblings={wikidata_value.Q_PARATEXT},
siblings={
wikidata_value.Q_PARATEXT,
wikidata_value.ItemRef("Q7"),
wikidata_value.ItemRef("Q8"),
},
children={wikidata_value.ItemRef("Q6")},
loose={
wikidata_value.ItemRef("Q2"),
Expand Down

0 comments on commit e1811cf

Please sign in to comment.