From 6722524a1b49e7596a28c111655bc867b5303f6f Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Tue, 24 Sep 2024 13:47:29 +0800 Subject: [PATCH 1/2] Add top-level folder for QuickSight assets --- metaphor/quick_sight/extractor.py | 11 +++++- metaphor/quick_sight/folder.py | 26 ++++++++++++++ poetry.lock | 8 ++--- pyproject.toml | 4 +-- tests/quick_sight/expected.json | 59 +++++++++++++++++++++++++++---- 5 files changed, 94 insertions(+), 14 deletions(-) create mode 100644 metaphor/quick_sight/folder.py diff --git a/metaphor/quick_sight/extractor.py b/metaphor/quick_sight/extractor.py index 538ca7ae..97580922 100644 --- a/metaphor/quick_sight/extractor.py +++ b/metaphor/quick_sight/extractor.py @@ -24,6 +24,11 @@ ) from metaphor.quick_sight.client import Client from metaphor.quick_sight.config import QuickSightRunConfig +from metaphor.quick_sight.folder import ( + DASHBOARD_DIRECTORIES, + DATA_SET_DIRECTORIES, + create_top_level_folders, +) from metaphor.quick_sight.lineage import ( extract_virtual_view_schema, extract_virtual_view_upstream, @@ -103,6 +108,7 @@ def _make_entities_list(self) -> Collection[ENTITY_TYPES]: entities: List[ENTITY_TYPES] = [] entities.extend(self._virtual_views.values()) entities.extend(self._dashboards.values()) + entities.extend(create_top_level_folders()) return entities def _init_virtual_view(self, arn: str, data_set: DataSet) -> VirtualView: @@ -111,7 +117,9 @@ def _init_virtual_view(self, arn: str, data_set: DataSet) -> VirtualView: name=arn, type=VirtualViewType.QUICK_SIGHT, ), - structure=AssetStructure(name=data_set.Name), + structure=AssetStructure( + name=data_set.Name, directories=DATA_SET_DIRECTORIES + ), source_info=SourceInfo( created_at_source=data_set.CreatedTime, last_updated=data_set.LastUpdatedTime, @@ -136,6 +144,7 @@ def _init_dashboard(self, arn: str, dashboard: Dashboard) -> MetaphorDashboard: ), structure=AssetStructure( name=dashboard.Name, + directories=DASHBOARD_DIRECTORIES, ), ) diff --git a/metaphor/quick_sight/folder.py b/metaphor/quick_sight/folder.py new file mode 100644 index 00000000..e2bf0356 --- /dev/null +++ b/metaphor/quick_sight/folder.py @@ -0,0 +1,26 @@ +from typing import List + +from metaphor.models.metadata_change_event import ( + DashboardPlatform, + Hierarchy, + HierarchyInfo, + HierarchyLogicalID, + HierarchyType, +) + +DASHBOARD_DIRECTORIES = ["DASHBOARD"] +DATA_SET_DIRECTORIES = ["DATA_SET"] + + +def _create_virtual_hierarchy(name: str, path: List[str]) -> Hierarchy: + return Hierarchy( + logical_id=HierarchyLogicalID(path=[DashboardPlatform.QUICK_SIGHT.name] + path), + hierarchy_info=HierarchyInfo(name=name, type=HierarchyType.VIRTUAL_HIERARCHY), + ) + + +def create_top_level_folders() -> List[Hierarchy]: + return [ + _create_virtual_hierarchy("Dashboards", DASHBOARD_DIRECTORIES), + _create_virtual_hierarchy("DataSets", DATA_SET_DIRECTORIES), + ] diff --git a/poetry.lock b/poetry.lock index 6caa3d92..e9114a72 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3258,13 +3258,13 @@ files = [ [[package]] name = "metaphor-models" -version = "0.38.3" +version = "0.39.1" description = "" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "metaphor_models-0.38.3-py3-none-any.whl", hash = "sha256:e33810d5445aa4080d8ec204818272a7aea2d1deb44a7866335132cb35bcd0a3"}, - {file = "metaphor_models-0.38.3.tar.gz", hash = "sha256:79e8710e08e9448ee2607526fb9c54b1d75008509f79c0c141af487c15f7fea2"}, + {file = "metaphor_models-0.39.1-py3-none-any.whl", hash = "sha256:fbf4434aaf5c9a0b62c348567ac42e81225ea5863c6f5d201f9bba66e9a590d9"}, + {file = "metaphor_models-0.39.1.tar.gz", hash = "sha256:b1e4692d555a079c309227a72b4a8ff13421d53308ad6100dd021950107f118b"}, ] [[package]] @@ -6666,4 +6666,4 @@ unity-catalog = ["databricks-sdk", "databricks-sql-connector", "sqlglot"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.12" -content-hash = "0f6b045d0cc3d5203c7d5488cfdb16bdfcc3ddcf9bf103ff2f20099417b1dfda" +content-hash = "180a1e347f9c53e49e9198199f6fefb3e6efcdd7552a1f3496f1b977b88e97f5" diff --git a/pyproject.toml b/pyproject.toml index f582b347..94d0ba3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.107" +version = "0.14.108" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] @@ -42,7 +42,7 @@ llama-index-readers-confluence = { version = "^0.1.4", optional = true } llama-index-readers-notion = { version = "^0.1.6", optional = true } looker-sdk = { version = "^24.2.0", optional = true } lxml = { version = "~=5.0.0", optional = true } -metaphor-models = "0.38.3" +metaphor-models = "0.39.1" more-itertools = { version = "^10.1.0", optional = true } msal = { version = "^1.28.0", optional = true } msgraph-beta-sdk = { version = "~1.4.0", optional = true } diff --git a/tests/quick_sight/expected.json b/tests/quick_sight/expected.json index f2557a61..d2a73448 100644 --- a/tests/quick_sight/expected.json +++ b/tests/quick_sight/expected.json @@ -162,7 +162,10 @@ "lastUpdated": "2024-09-19T17:30:15.605000+08:00" }, "structure": { - "name": "locations" + "name": "locations", + "directories": [ + "DATA_SET" + ] } }, { @@ -382,7 +385,10 @@ "lastUpdated": "2024-09-19T12:20:38.550000+08:00" }, "structure": { - "name": "sample_sales_records" + "name": "sample_sales_records", + "directories": [ + "DATA_SET" + ] } }, { @@ -452,7 +458,10 @@ "lastUpdated": "2024-09-18T16:42:39.096000+08:00" }, "structure": { - "name": "Bike data" + "name": "Bike data", + "directories": [ + "DATA_SET" + ] } }, { @@ -622,7 +631,10 @@ "lastUpdated": "2024-09-18T16:09:07.953000+08:00" }, "structure": { - "name": "orders" + "name": "orders", + "directories": [ + "DATA_SET" + ] } }, { @@ -649,7 +661,10 @@ "lastUpdated": "2024-09-12T21:01:13.244000+08:00" }, "structure": { - "name": "Sales" + "name": "Sales", + "directories": [ + "DASHBOARD" + ] } }, { @@ -676,7 +691,10 @@ "lastUpdated": "2024-09-18T16:24:00.923000+08:00" }, "structure": { - "name": "Bike rides" + "name": "Bike rides", + "directories": [ + "DASHBOARD" + ] } }, { @@ -703,7 +721,34 @@ "lastUpdated": "2024-09-18T16:09:35.228000+08:00" }, "structure": { - "name": "Jaffle Shop Orders" + "name": "Jaffle Shop Orders", + "directories": [ + "DASHBOARD" + ] + } + }, + { + "hierarchyInfo": { + "name": "Dashboards", + "type": "VIRTUAL_HIERARCHY" + }, + "logicalId": { + "path": [ + "QUICK_SIGHT", + "DASHBOARD" + ] + } + }, + { + "hierarchyInfo": { + "name": "DataSets", + "type": "VIRTUAL_HIERARCHY" + }, + "logicalId": { + "path": [ + "QUICK_SIGHT", + "DATA_SET" + ] } } ] From 4a4f47816897e5848db2a7eb23ff85053852ba6e Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Tue, 24 Sep 2024 14:20:45 +0800 Subject: [PATCH 2/2] Refactor create_hierarchy --- metaphor/common/hierarchy.py | 33 ++++++++++++++++++++++++ metaphor/looker/folder.py | 19 +++++--------- metaphor/quick_sight/folder.py | 22 +++++----------- metaphor/thought_spot/extractor.py | 41 ++++++++++++++++-------------- metaphor/thought_spot/utils.py | 16 ------------ 5 files changed, 68 insertions(+), 63 deletions(-) create mode 100644 metaphor/common/hierarchy.py diff --git a/metaphor/common/hierarchy.py b/metaphor/common/hierarchy.py new file mode 100644 index 00000000..4409e915 --- /dev/null +++ b/metaphor/common/hierarchy.py @@ -0,0 +1,33 @@ +from typing import List + +from metaphor.models.metadata_change_event import ( + AssetPlatform, + Hierarchy, + HierarchyInfo, + HierarchyLogicalID, + HierarchyType, +) + + +def create_hierarchy( + platform: AssetPlatform, + path: List[str], + name: str = "", + hierarchy_type: HierarchyType = HierarchyType.VIRTUAL_HIERARCHY, +) -> Hierarchy: + """ + Create a hierarchy with name + """ + return Hierarchy( + logical_id=HierarchyLogicalID( + path=[platform.value] + path, + ), + hierarchy_info=( + HierarchyInfo( + name=name, + type=hierarchy_type, + ) + if name + else None + ), + ) diff --git a/metaphor/looker/folder.py b/metaphor/looker/folder.py index 09d9064a..329ae326 100644 --- a/metaphor/looker/folder.py +++ b/metaphor/looker/folder.py @@ -1,12 +1,11 @@ from dataclasses import dataclass from typing import Dict, List, Optional +from metaphor.common.hierarchy import create_hierarchy from metaphor.common.logger import get_logger from metaphor.models.metadata_change_event import ( - DashboardPlatform, + AssetPlatform, Hierarchy, - HierarchyInfo, - HierarchyLogicalID, HierarchyType, ) @@ -58,13 +57,9 @@ def _build_hierarchies( if folder_id in folder_hierarchies or folder is None: continue - hierarchy = Hierarchy( - logical_id=HierarchyLogicalID( - path=[DashboardPlatform.LOOKER.value] + directories[: i + 1] - ), - hierarchy_info=HierarchyInfo( - type=HierarchyType.LOOKER_FOLDER, name=folder.name - ), + folder_hierarchies[folder_id] = create_hierarchy( + platform=AssetPlatform.LOOKER, + name=folder.name, + path=directories[: i + 1], + hierarchy_type=HierarchyType.LOOKER_FOLDER, ) - - folder_hierarchies[folder_id] = hierarchy diff --git a/metaphor/quick_sight/folder.py b/metaphor/quick_sight/folder.py index e2bf0356..ac3d7daf 100644 --- a/metaphor/quick_sight/folder.py +++ b/metaphor/quick_sight/folder.py @@ -1,26 +1,16 @@ from typing import List -from metaphor.models.metadata_change_event import ( - DashboardPlatform, - Hierarchy, - HierarchyInfo, - HierarchyLogicalID, - HierarchyType, -) +from metaphor.common.hierarchy import create_hierarchy +from metaphor.models.metadata_change_event import AssetPlatform, Hierarchy DASHBOARD_DIRECTORIES = ["DASHBOARD"] DATA_SET_DIRECTORIES = ["DATA_SET"] -def _create_virtual_hierarchy(name: str, path: List[str]) -> Hierarchy: - return Hierarchy( - logical_id=HierarchyLogicalID(path=[DashboardPlatform.QUICK_SIGHT.name] + path), - hierarchy_info=HierarchyInfo(name=name, type=HierarchyType.VIRTUAL_HIERARCHY), - ) - - def create_top_level_folders() -> List[Hierarchy]: + platform = AssetPlatform.QUICK_SIGHT + return [ - _create_virtual_hierarchy("Dashboards", DASHBOARD_DIRECTORIES), - _create_virtual_hierarchy("DataSets", DATA_SET_DIRECTORIES), + create_hierarchy(platform, DASHBOARD_DIRECTORIES, "Dashboards"), + create_hierarchy(platform, DATA_SET_DIRECTORIES, "DataSets"), ] diff --git a/metaphor/thought_spot/extractor.py b/metaphor/thought_spot/extractor.py index dfc76a73..12dee876 100644 --- a/metaphor/thought_spot/extractor.py +++ b/metaphor/thought_spot/extractor.py @@ -16,10 +16,12 @@ to_virtual_view_entity_id, ) from metaphor.common.event_util import ENTITY_TYPES +from metaphor.common.hierarchy import create_hierarchy from metaphor.common.logger import get_logger from metaphor.common.utils import unique_list from metaphor.models.crawler_run_metadata import Platform from metaphor.models.metadata_change_event import ( + AssetPlatform, AssetStructure, Chart, Dashboard, @@ -30,6 +32,7 @@ EntityType, EntityUpstream, FieldMapping, + HierarchyType, SourceField, SourceInfo, SystemTag, @@ -59,7 +62,6 @@ ) from metaphor.thought_spot.utils import ( ThoughtSpot, - create_virtual_hierarchy, from_list, getColumnTransformation, mapping_chart_type, @@ -102,24 +104,7 @@ async def extract(self) -> Collection[ENTITY_TYPES]: self.fetch_virtual_views() self.fetch_dashboards() - - virtual_hierarchies = [ - create_virtual_hierarchy( - name="Answer", path=[ThoughtSpotDashboardType.ANSWER.name] - ), - create_virtual_hierarchy( - name="Liveboard", path=[ThoughtSpotDashboardType.LIVEBOARD.name] - ), - create_virtual_hierarchy( - name="Table", path=[ThoughtSpotDataObjectType.TABLE.name] - ), - create_virtual_hierarchy( - name="View", path=[ThoughtSpotDataObjectType.VIEW.name] - ), - create_virtual_hierarchy( - name="Worksheet", path=[ThoughtSpotDataObjectType.WORKSHEET.name] - ), - ] + virtual_hierarchies = self._create_virtual_hierarchies() return list( chain( @@ -246,6 +231,24 @@ def populate_virtual_views( ) self._virtual_views[table_id] = view + @staticmethod + def _create_virtual_hierarchies(): + return [ + create_hierarchy( + name=name, + path=[enum_value.value], + platform=AssetPlatform.THOUGHT_SPOT, + hierarchy_type=HierarchyType.THOUGHT_SPOT_VIRTUAL_HIERARCHY, + ) + for name, enum_value in [ + ("Answer", ThoughtSpotDashboardType.ANSWER), + ("Liveboard", ThoughtSpotDashboardType.LIVEBOARD), + ("Table", ThoughtSpotDataObjectType.TABLE), + ("View", ThoughtSpotDataObjectType.VIEW), + ("Worksheet", ThoughtSpotDataObjectType.WORKSHEET), + ] + ] + @staticmethod def build_column_expr_map(tml: TMLObject): def build_formula_map(tml_table): diff --git a/metaphor/thought_spot/utils.py b/metaphor/thought_spot/utils.py index 8d91588e..01e5eba0 100644 --- a/metaphor/thought_spot/utils.py +++ b/metaphor/thought_spot/utils.py @@ -8,12 +8,7 @@ from metaphor.common.utils import chunks from metaphor.models.metadata_change_event import ( ChartType, - DashboardPlatform, DataPlatform, - Hierarchy, - HierarchyInfo, - HierarchyLogicalID, - HierarchyType, ThoughtSpotDataObjectType, ) from metaphor.thought_spot.config import ThoughtSpotRunConfig @@ -278,14 +273,3 @@ def getColumnTransformation(target_column: Column) -> Optional[str]: if target_column.expression is None or target_column.expression.token is None: return None return str(target_column.expression.token) - - -def create_virtual_hierarchy(name: str, path: List[str]) -> Hierarchy: - return Hierarchy( - logical_id=HierarchyLogicalID( - path=[DashboardPlatform.THOUGHT_SPOT.name] + path - ), - hierarchy_info=HierarchyInfo( - name=name, type=HierarchyType.THOUGHT_SPOT_VIRTUAL_HIERARCHY - ), - )