capitalone · micdavis · Jul 25, 2022 · Jul 19, 2022 · Jul 19, 2022 · Jul 19, 2022
@@ -0,0 +1,126 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "7eee37ff",
+   "metadata": {},
+   "source": [
+    "# Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0d27009",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import json\n",
+    "\n",
+    "import pandas as pd\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "try:\n",
+    "    sys.path.insert(0, '..')\n",
+    "    import dataprofiler as dp\n",
+    "    from dataprofiler.profilers.utils import merge_profile_list\n",
+    "except ImportError:\n",
+    "    import dataprofiler as dp\n",
+    "    from dataprofiler.profilers.utils import merge_profile_list\n",
+    "\n",
+    "# remove extra tf loggin\n",
+    "tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b4369e64",
+   "metadata": {},
+   "source": [
+    "## Setup the Data and Profiler"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "410c3c4d",
+   "metadata": {},
+   "source": [
+    "This section shows the basic example of the Data Profiler. A CSV dataset is read using the data reader, then the Data object is given to the Data Profiler to detect sensitive data and obtain the statistics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d3567c82",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d = {'col1': [1, 2], 'col2': [3, 4]}\n",
+    "df = pd.DataFrame(data=d)\n",
+    "\n",
+    "list_of_profiles = [dp.Profiler(df), dp.Profiler(df)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b649db32",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list_of_profiles"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4ed4fc12",
+   "metadata": {},
+   "source": [
+    "## Run Merge on List of Profiles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4a636047",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "single_profile = merge_profile_list(list_of_profiles=list_of_profiles)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "34059c21",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "single_profile.report()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "dataprofiler",
+   "language": "python",
+   "name": "dataprofiler"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}