diff --git a/docs/datahub_lite.md b/docs/datahub_lite.md index 3b82448492707..d5ec1f617a7ba 100644 --- a/docs/datahub_lite.md +++ b/docs/datahub_lite.md @@ -35,10 +35,10 @@ The following features are **NOT** supported: ## Prerequisites -There are no pre-requisites for DataHub Lite other than having a Python 3.7+ environment and a [`acryl-datahub`](https://pypi.org/project/acryl-datahub/) > 0.9.6. Install the `datahub` Python cli using the [instructions](./cli.md#using-pip). +To use `datahub lite` commands, you need to install [`acryl-datahub`](https://pypi.org/project/acryl-datahub/) > 0.9.6 ([install instructions](./cli.md#using-pip)) and the `datahub-lite` plugin. ```shell -pip install acryl-datahub +pip install acryl-datahub[datahub-lite] ``` ## Importing Metadata diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 92a10f9281c87..e9bffae22d865 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -241,7 +241,10 @@ def get_long_description(): # Sink plugins. "datahub-kafka": kafka_common, "datahub-rest": rest_common, - "datahub-lite": set(), + "datahub-lite": { + "fastapi", + "uvicorn", + }, # Integrations. "airflow": { "apache-airflow >= 2.0.2", diff --git a/metadata-ingestion/src/datahub/cli/lite_cli.py b/metadata-ingestion/src/datahub/cli/lite_cli.py index 0532110ea2f23..2f32edbf2b3e8 100644 --- a/metadata-ingestion/src/datahub/cli/lite_cli.py +++ b/metadata-ingestion/src/datahub/cli/lite_cli.py @@ -211,11 +211,10 @@ def serve(port: int) -> None: uvicorn.run(app, port=port) -@lite.command(context_settings=dict(allow_extra_args=True)) +@lite.command() @click.argument("path", required=False, type=CompleteablePath()) -@click.pass_context @telemetry.with_telemetry -def ls(ctx: click.Context, path: Optional[str]) -> None: +def ls(path: Optional[str]) -> None: """List at a path""" start_time = time.time() diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 8a63da437fe24..e798d3c997fa9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -245,7 +245,7 @@ class SQLAlchemyConfig(StatefulIngestionConfigBase): ) profile_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="Regex patterns to filter tables for profiling during ingestion. Allowed by the `table_pattern`.", + description="Regex patterns to filter tables (or specific columns) for profiling during ingestion. Note that only tables allowed by the `table_pattern` will be considered.", ) domain: Dict[str, AllowDenyPattern] = Field( default=dict(), diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index 8fa31231d7fe8..30d5616b84617 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -346,7 +346,10 @@ def _populate_usage_stat_registry(self): if self.server is None: return + view: TSC.ViewItem for view in TSC.Pager(self.server.views, usage=True): + if not view.id: + continue self.tableau_stat_registry[view.id] = UsageStat(view_count=view.total_views) logger.debug("Tableau stats %s", self.tableau_stat_registry) diff --git a/metadata-ingestion/src/datahub/lite/lite_server.py b/metadata-ingestion/src/datahub/lite/lite_server.py index d5cfa08abcf36..27960a7a17a0f 100644 --- a/metadata-ingestion/src/datahub/lite/lite_server.py +++ b/metadata-ingestion/src/datahub/lite/lite_server.py @@ -1,7 +1,7 @@ import logging -from typing import Dict, Iterable, List, Optional, Union +from typing import Dict, List, Optional, Union -from fastapi import Depends, FastAPI, Query +from fastapi import Depends, FastAPI, HTTPException, Query from fastapi.responses import RedirectResponse from datahub.lite.lite_local import ( @@ -10,7 +10,6 @@ Searchable, SearchFlavor, ) -from datahub.metadata.schema_classes import SystemMetadataClass, _Aspect app = FastAPI() logger = logging.getLogger(__name__) @@ -22,8 +21,8 @@ def redirect_to_docs(): return RedirectResponse(app.docs_url) -@app.get("/ping") # type: ignore -def ping() -> dict: # type: ignore +@app.get("/ping") +def ping() -> dict: return {"ping": "pong"} @@ -34,44 +33,45 @@ def lite() -> DataHubLiteLocal: return lite -@app.get("/entities") # type: ignore -def entities_list(lite: DataHubLiteLocal = Depends(lite)) -> Iterable[str]: # type: ignore +@app.get("/entities") +def entities_list(lite: DataHubLiteLocal = Depends(lite)) -> List[str]: # TODO add some filtering capabilities - return lite.list_ids() + return list(lite.list_ids()) -@app.get("/entities/{id}") # type: ignore -def entities_get( # type: ignore +@app.get("/entities/{id}") +def entities_get( id: str, aspects: Optional[List[str]] = Query(None), lite: DataHubLiteLocal = Depends(lite), -) -> Optional[ - Dict[str, Union[str, Dict[str, Union[dict, _Aspect, SystemMetadataClass]]]] -]: +) -> Dict[str, Union[str, Dict[str, dict]]]: # Queried as GET /entities/?aspects=aspect1&aspects=aspect2&... logger.warning(f"get {id} aspects={aspects}") - return lite.get(id, aspects=aspects) + entities = lite.get(id, aspects=aspects, typed=False) + if not entities: + raise HTTPException(status_code=404, detail="Entity not found") + return entities # type: ignore -@app.get("/browse") # type: ignore -def browse( # type: ignore +@app.get("/browse") +def browse( path: str = Query("/"), catalog: DataHubLiteLocal = Depends(lite), -) -> Iterable[Browseable]: +) -> List[Browseable]: # Queried as GET /browse/?path= logger.info(f"browse {path}") - return catalog.ls(path) + return list(catalog.ls(path)) -@app.get("/search") # type: ignore -def search( # type: ignore +@app.get("/search") +def search( query: str = Query("*"), flavor: SearchFlavor = Query(SearchFlavor.FREE_TEXT), lite: DataHubLiteLocal = Depends(lite), -) -> Iterable[Searchable]: +) -> List[Searchable]: # Queried as GET /search/?query= logger.info(f"search {query}") - return lite.search(query=query, flavor=flavor) + return list(lite.search(query=query, flavor=flavor)) # TODO put command