Skip to content
This repository has been archived by the owner on Nov 30, 2022. It is now read-only.

[#557] MSSQL discovery script #581

Merged
merged 9 commits into from
Jun 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -138,4 +138,7 @@ envfiles/
fides_uploads

# Prevent SaaS configs from being committed
saas_config.toml
saas_config.toml

# Script secrets
scripts/secrets.py
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ The types of changes are:
### Changed
* Use the `RuleResponse` schema within the `PrivacyRequestReposnse` schema [#580](https://github.com/ethyca/fidesops/pull/580)

### Developer Experience
* Adds a script for MSSQL schema exploration [#557](https://github.com/ethyca/fidesops/pull/581)

## [1.5.1](https://github.com/ethyca/fidesops/compare/1.5.0...1.5.1) - 2022-05-27

### Added
Expand Down
79 changes: 79 additions & 0 deletions scripts/mssql_discover.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import sqlalchemy

# This file is not committed to the repo, please create secrets.py with the required
# variables in the same dir as this file before running this script
from secrets import (
USER,
PASS,
IP,
PORT,
DB,
)

MASTER_MSSQL_URL = f"mssql+pyodbc://{USER}:{PASS}@{IP}:{PORT}/{DB}?driver=ODBC+Driver+17+for+SQL+Server"


SUPPORTED_DATA_TYPES = set(
[
# char types
"varchar",
"nvarchar",
"char",
"nchar",
"ntext",
"text",
# numeric types
"int",
"bigint",
"smallint",
"tinyint",
"money",
"float",
"decimal",
# date types
"date",
"datetime",
"datetime2",
"smalldatetime",
# other types
"bit",
]
)


def mssql_discover():
"""
Select all databases from the instance
Select the schema data for each data base
Check if there are any fields in the schema that Fidesops does not yet support
"""
engine = sqlalchemy.create_engine(MASTER_MSSQL_URL)
all_dbs = engine.execute("SELECT name FROM sys.databases;").all()
all_columns = []
flagged_columns = []
flagged_datatypes = set()
for db_name in all_dbs:
db_name = db_name[0]
try:
columns = engine.execute(
f"SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE FROM {db_name}.INFORMATION_SCHEMA.COLUMNS;"
).all()
except Exception:
continue

all_columns.extend(columns)
for table, column, data_type in columns:
if data_type not in SUPPORTED_DATA_TYPES:
flagged_datatypes.add(data_type)
flagged_columns.append(f"{db_name}.{table}.{column}: {data_type}")

print(f"{len(all_columns)} columns found")
print(f"{len(flagged_columns)} columns flagged")
print(f"Flagged datatypes:")
print(",\n".join(flagged_datatypes))
print(f"Flagged columns:")
print(",\n".join(flagged_columns))


if __name__ == "__main__":
mssql_discover()