-
Notifications
You must be signed in to change notification settings - Fork 16
[#557] MSSQL discovery script #581
Changes from 5 commits
79a873f
9026d24
3542ee1
a8a9883
5919821
e990558
a1c277f
ccc802d
6dd566b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import sqlalchemy | ||
|
||
|
||
# NB. These are connection secrets, never ever commit these | ||
USER = "" | ||
PASS = "" | ||
IP = "" | ||
PORT = "1433" | ||
DB = "" | ||
|
||
MASTER_MSSQL_URL = f"mssql+pyodbc://{USER}:{PASS}@{IP}:{PORT}/{DB}?driver=ODBC+Driver+17+for+SQL+Server" | ||
|
||
|
||
SUPPORTED_DATA_TYPES = set( | ||
[ | ||
# char types | ||
"varchar", | ||
"nvarchar", | ||
"char", | ||
"nchar", | ||
"ntext", | ||
"text", | ||
# numeric types | ||
"int", | ||
"bigint", | ||
"smallint", | ||
"tinyint", | ||
"money", | ||
"float", | ||
"decimal", | ||
# date types | ||
"date", | ||
"datetime", | ||
"datetime2", | ||
"smalldatetime", | ||
# other types | ||
"bit", | ||
] | ||
) | ||
|
||
|
||
def mssql_discover(): | ||
""" | ||
Select all databases from the instance | ||
Select the schema data for each data base | ||
Check if there are any fields in the schema that Fidesops does not yet support | ||
""" | ||
engine = sqlalchemy.create_engine(MASTER_MSSQL_URL) | ||
all_dbs = engine.execute("SELECT name FROM sys.databases;").all() | ||
all_columns = [] | ||
flagged_columns = [] | ||
flagged_datatypes = set() | ||
for db_name in all_dbs: | ||
db_name = db_name[0] | ||
try: | ||
columns = engine.execute( | ||
f"SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE FROM {db_name}.INFORMATION_SCHEMA.COLUMNS;" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can’t think of any way this this would be set outside this function, but if it was it seems vulnerable to SQL injection. I don’t see a way to get to it from the outside so I’m probably being over paranoid. |
||
).all() | ||
except Exception: | ||
# print(f"Access to {db_name}'s tables denied.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was this commented out line left intentionally? |
||
continue | ||
|
||
all_columns.extend(columns) | ||
for table, column, data_type, nullable in columns: | ||
if data_type not in SUPPORTED_DATA_TYPES: | ||
flagged_datatypes.add(data_type) | ||
flagged_columns.append(f"{table}.{column}: {data_type}") | ||
|
||
print(f"{len(set(all_columns))} columns found") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you want a count of all columns why use a set? This would be all unique columns rather than all columns. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was the de-dupe the list, since all columns should be unique There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They might not be unique across tables. What if there is table1.unsupported_field and table2.unsupported_field? Then won't the count be 1, but really there are 2 from two different tables? |
||
print(f"{len(set(flagged_columns))} columns flagged") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here, why a set? Should this say unique columns? |
||
print(f"Flagged datatypes:") | ||
print(",\n".join(flagged_datatypes)) | ||
print(f"Flagged columns:") | ||
print(",\n".join(flagged_columns)) | ||
|
||
|
||
if __name__ == "__main__": | ||
mssql_discover() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe add an option to read these from environment variables to make it less likely to commit them.