From 9592fee9cfa4064f3108181f5efc7c99aa58ccfa Mon Sep 17 00:00:00 2001 From: taylorfturner Date: Tue, 19 Jul 2022 13:26:52 -0400 Subject: [PATCH 1/7] test From 9e50a3d9773e7ad02c6af1e252599d09fe0b193b Mon Sep 17 00:00:00 2001 From: taylorfturner Date: Tue, 19 Jul 2022 13:26:52 -0400 Subject: [PATCH 2/7] test From 26bfa052df7228ab4bf926824a0e815953a6e6c4 Mon Sep 17 00:00:00 2001 From: taylorfturner Date: Tue, 19 Jul 2022 13:26:52 -0400 Subject: [PATCH 3/7] test From 8860c61f965fdf749487d75aa9ec18f5df42f81b Mon Sep 17 00:00:00 2001 From: taylorfturner Date: Tue, 19 Jul 2022 13:26:52 -0400 Subject: [PATCH 4/7] test From b5b67f6d8de76ac2c07082c5f267661283708970 Mon Sep 17 00:00:00 2001 From: taylorfturner Date: Tue, 19 Jul 2022 13:26:52 -0400 Subject: [PATCH 5/7] test From 78ec93bee4d293f9019fad4684c874110fc95723 Mon Sep 17 00:00:00 2001 From: taylorfturner Date: Tue, 19 Jul 2022 13:26:52 -0400 Subject: [PATCH 6/7] test From 5fa499c0ab43a3cb1b7cdbfa84322f717e18434d Mon Sep 17 00:00:00 2001 From: taylorfturner Date: Mon, 25 Jul 2022 09:27:41 -0400 Subject: [PATCH 7/7] add notebook example --- examples/merge_profile_list_example.ipynb | 126 ++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 examples/merge_profile_list_example.ipynb diff --git a/examples/merge_profile_list_example.ipynb b/examples/merge_profile_list_example.ipynb new file mode 100644 index 000000000..5457f4799 --- /dev/null +++ b/examples/merge_profile_list_example.ipynb @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7eee37ff", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0d27009", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import json\n", + "\n", + "import pandas as pd\n", + "import tensorflow as tf\n", + "\n", + "try:\n", + " sys.path.insert(0, '..')\n", + " import dataprofiler as dp\n", + " from dataprofiler.profilers.utils import merge_profile_list\n", + "except ImportError:\n", + " import dataprofiler as dp\n", + " from dataprofiler.profilers.utils import merge_profile_list\n", + "\n", + "# remove extra tf loggin\n", + "tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)" + ] + }, + { + "cell_type": "markdown", + "id": "b4369e64", + "metadata": {}, + "source": [ + "## Setup the Data and Profiler" + ] + }, + { + "cell_type": "markdown", + "id": "410c3c4d", + "metadata": {}, + "source": [ + "This section shows the basic example of the Data Profiler. A CSV dataset is read using the data reader, then the Data object is given to the Data Profiler to detect sensitive data and obtain the statistics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3567c82", + "metadata": {}, + "outputs": [], + "source": [ + "d = {'col1': [1, 2], 'col2': [3, 4]}\n", + "df = pd.DataFrame(data=d)\n", + "\n", + "list_of_profiles = [dp.Profiler(df), dp.Profiler(df)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b649db32", + "metadata": {}, + "outputs": [], + "source": [ + "list_of_profiles" + ] + }, + { + "cell_type": "markdown", + "id": "4ed4fc12", + "metadata": {}, + "source": [ + "## Run Merge on List of Profiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a636047", + "metadata": {}, + "outputs": [], + "source": [ + "single_profile = merge_profile_list(list_of_profiles=list_of_profiles)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34059c21", + "metadata": {}, + "outputs": [], + "source": [ + "single_profile.report()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dataprofiler", + "language": "python", + "name": "dataprofiler" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}