From 0d06a613655fbec2311161c89cb4f333e0d74c31 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 14 Oct 2024 20:38:23 -0700 Subject: [PATCH] docs: add docs on term suggestion (#11606) --- docs-website/sidebars.js | 12 ++++ docs/automations/ai-docs.md | 36 ++++++++++ docs/automations/ai-term-suggestion.md | 72 +++++++++++++++++++ docs/automations/snowflake-tag-propagation.md | 33 +++++---- 4 files changed, 136 insertions(+), 17 deletions(-) create mode 100644 docs/automations/ai-docs.md create mode 100644 docs/automations/ai-term-suggestion.md diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 06838f2e686bc..8f3da2050a9b7 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -113,6 +113,18 @@ module.exports = { id: "docs/automations/snowflake-tag-propagation", className: "saasOnly", }, + { + label: "AI Classification", + type: "doc", + id: "docs/automations/ai-term-suggestion", + className: "saasOnly", + }, + { + label: "AI Documentation", + type: "doc", + id: "docs/automations/ai-docs", + className: "saasOnly", + }, ], }, { diff --git a/docs/automations/ai-docs.md b/docs/automations/ai-docs.md new file mode 100644 index 0000000000000..bbec33f3bcae6 --- /dev/null +++ b/docs/automations/ai-docs.md @@ -0,0 +1,36 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# AI Documentation + + + +:::info + +This feature is currently in closed beta. Reach out to your Acryl representative to get access. + +::: + +With AI-powered documentation, you can automatically generate documentation for tables and columns. + +

+ +

+ +## Configuring + +No configuration is required - just hit "Generate" on any table or column in the UI. + +## How it works + +Generating good documentation requires a holistic understanding of the data. Information we take into account includes, but is not limited to: + +- Dataset name and any existing documentation +- Column name, type, description, and sample values +- Lineage relationships to upstream and downstream assets +- Metadata about other related assets + +Data privacy: Your metadata is not sent to any third-party LLMs. We use AWS Bedrock internally, which means all metadata remains within the Acryl AWS account. We do not fine-tune on customer data. + +## Limitations + +- This feature is powered by an LLM, which can produce inaccurate results. While we've taken steps to reduce the likelihood of hallucinations, they can still occur. diff --git a/docs/automations/ai-term-suggestion.md b/docs/automations/ai-term-suggestion.md new file mode 100644 index 0000000000000..27d1716cfc372 --- /dev/null +++ b/docs/automations/ai-term-suggestion.md @@ -0,0 +1,72 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# AI Glossary Term Suggestions + + + +:::info + +This feature is currently in closed beta. Reach out to your Acryl representative to get access. + +::: + +The AI Glossary Term Suggestion automation uses LLMs to suggest [Glossary Terms](../glossary/business-glossary.md) for tables and columns in your data. + +This is useful for improving coverage of glossary terms across your organization, which is important for compliance and governance efforts. + +This automation can: + +- Automatically suggests glossary terms for tables and columns. +- Goes beyond a predefined set of terms and works with your business glossary. +- Generates [proposals](../managed-datahub/approval-workflows.md) for owners to review, or can automatically add terms to tables/columns. +- Automatically adjusts to human-provided feedback and curation (coming soon). + +## Prerequisites + +- A business glossary with terms defined. Additional metadata, like documentation and existing term assignments, will improve the accuracy of our suggestions. + +## Configuring + +1. **Navigate to Automations**: Click on 'Govern' > 'Automations' in the navigation bar. + +

+ +

+ +2. **Create the Automation**: Click on 'Create' and select 'AI Glossary Term Suggestions'. + +

+ +

+ +3. **Configure the Automation**: Fill in the required fields to configure the automation. + The main fields to configure are (1) what terms to use for suggestions and (2) what entities to generate suggestions for. + +

+ +

+ +4. Once it's enabled, that's it! You'll start to see terms show up in the UI, either on assets or in the proposals page. + +

+ +

+ +## How it works + +The automation will scan through all the datasets matched by the configured filters. For each one, it will generate suggestions. +If new entities are added that match the configured filters, those will also be classified within 24 hours. + +We take into account the following metadata when generating suggestions: + +- Dataset name and description +- Column name, type, description, and sample values +- Glossary term name, documentation, and hierarchy +- Feedback loop: existing assignments and accepted/rejected proposals (coming soon) + +Data privacy: Your metadata is not sent to any third-party LLMs. We use AWS Bedrock internally, which means all metadata remains within the Acryl AWS account. We do not fine-tune on customer data. + +## Limitations + +- A single configured automation can classify at most 10k entities. +- We cannot do partial reclassification. If you add a new column to an existing table, we won't regenerate suggestions for that table. diff --git a/docs/automations/snowflake-tag-propagation.md b/docs/automations/snowflake-tag-propagation.md index bdc80376dfb48..c708e40cbdd81 100644 --- a/docs/automations/snowflake-tag-propagation.md +++ b/docs/automations/snowflake-tag-propagation.md @@ -1,4 +1,3 @@ - import FeatureAvailability from '@site/src/components/FeatureAvailability'; # Snowflake Tag Propagation Automation @@ -20,22 +19,22 @@ both columns and tables back to Snowflake. This automation is available in DataH 1. **Navigate to Automations**: Click on 'Govern' > 'Automations' in the navigation bar. -

- +

+

2. **Create An Automation**: Click on 'Create' and select 'Snowflake Tag Propagation'. -

- +

+

-3. **Configure Automation**: Fill in the required fields to connect to Snowflake, along with the name, description, and category. -Note that you can limit propagation based on specific Tags and Glossary Terms. If none are selected, then ALL Tags or Glossary Terms will be automatically -propagated to Snowflake tables and columns. Finally, click 'Save and Run' to start the automation +3. **Configure Automation**: Fill in the required fields to connect to Snowflake, along with the name, description, and category. + Note that you can limit propagation based on specific Tags and Glossary Terms. If none are selected, then ALL Tags or Glossary Terms will be automatically + propagated to Snowflake tables and columns. Finally, click 'Save and Run' to start the automation -

- +

+

## Propagating for Existing Assets @@ -46,13 +45,13 @@ Note that it may take some time to complete the initial back-filling process, de To do so, navigate to the Automation you created in Step 3 above, click the 3-dot "More" menu

- +

and then click "Initialize".

- +

This one-time step will kick off the back-filling process for existing descriptions. If you only want to begin propagating @@ -68,21 +67,21 @@ that you no longer want propagated descriptions to be visible. To do this, navigate to the Automation you created in Step 3 above, click the 3-dot "More" menu

- +

and then click "Rollback".

- +

This one-time step will remove all propagated tags and glossary terms from Snowflake. To simply stop propagating new tags, you can disable the automation. ## Viewing Propagated Tags -You can view propagated Tags (and corresponding DataHub URNs) inside the Snowflake UI to confirm the automation is working as expected. +You can view propagated Tags (and corresponding DataHub URNs) inside the Snowflake UI to confirm the automation is working as expected. -

- +

+