Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[auto_schema] import schema given connection to db #1223

Open
wants to merge 29 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion internal/db/db_schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ func getSearchConfigFromIndex(fullText *input.FullText) string {
if fullText.LanguageColumn == "" {
return fmt.Sprintf("'%s'", fullText.Language)
} else {
return fmt.Sprintf("%s::reconfig", fullText.LanguageColumn)
return fmt.Sprintf("%s::regconfig", fullText.LanguageColumn)
}
}

Expand Down Expand Up @@ -1104,6 +1104,7 @@ func (s *dbSchema) addUniqueConstraint(nodeData *schema.NodeData, inputConstrain
return nil
}

// same logic as parse_db._default_index
func (s *dbSchema) getDefaultIndexType(f *field.Field) input.IndexType {
// default index type for lists|jsonb when not specified is gin type
typ := f.GetFieldType()
Expand Down
2 changes: 1 addition & 1 deletion internal/db/db_schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1824,7 +1824,7 @@ func TestFullTextIndexMultipleColsLangColumn(t *testing.T) {
strconv.Quote("users_name_idx"),
getKVDict([]string{
getKVPair("postgresql_using", strconv.Quote("gin")),
getKVPair("postgresql_using_internals", strconv.Quote("to_tsvector(language::reconfig, coalesce(first_name, '') || ' ' || coalesce(last_name, ''))")),
getKVPair("postgresql_using_internals", strconv.Quote("to_tsvector(language::regconfig, coalesce(first_name, '') || ' ' || coalesce(last_name, ''))")),
getKVPair("columns",
fmt.Sprintf("[%s, %s]",
strconv.Quote("first_name"),
Expand Down
1 change: 1 addition & 0 deletions python/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ sqlalchemy = "==1.4.35"
psycopg2 = "==2.9.3"
autopep8 = "==1.5.4"
python-dateutil= "==2.8.2"
inflect= "==6.0.2"

[requires]
python_version = "3.8"
5 changes: 4 additions & 1 deletion python/auto_schema/auto_schema/clause_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

clause_regex = re.compile("(.+)'::(.+)")
date_regex = re.compile(
'([0-9]{4})-([0-9]{2})-([0-9]{2})[T| ]([0-9]{2}):([0-9]{2}):([0-9]{2})(\.[0-9]{3})?(.+)?')
r"([0-9]{4})-([0-9]{2})-([0-9]{2})[T| ]([0-9]{2}):([0-9]{2}):([0-9]{2})(\.[0-9]{3})?(.+)?")


valid_suffixes = {
Expand Down Expand Up @@ -69,6 +69,9 @@ def normalize(arg):
# return the underlying string instead of quoted
arg = str(arg).strip("'")

# condition `price > 0` ends up as `price > (0)::numeric` so we're trying to fix that
arg = re.sub(r"\(([0-9]+)\)::numeric", r'\1', arg)

# strip the extra text padding added so we can compare effectively
m = clause_regex.match(arg)
if m is None:
Expand Down
10 changes: 9 additions & 1 deletion python/auto_schema/auto_schema/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import warnings
import alembic

import sqlalchemy

# if env variable is set, manipulate the path to put local
# current directory over possibly installed auto_schema so that we
Expand All @@ -17,6 +16,7 @@
# run from auto_schema root. conflicts with pip-installed auto_schema when that exists so can't have
# that installed when runnning this...
from auto_schema.runner import Runner
from auto_schema.parse_db import ParseDB

from importlib import import_module

Expand Down Expand Up @@ -53,6 +53,8 @@
'--changes', help='get changes in schema', action='store_true')
parser.add_argument(
'--debug', help='if debug flag passed', action='store_true')
parser.add_argument(
'--import_db', help='import given a schema uri', action='store_true')

# see https://alembic.sqlalchemy.org/en/latest/offline.html
# if true, pased to u
Expand Down Expand Up @@ -89,6 +91,12 @@ def main():

try:
args = parser.parse_args()

if args.import_db is True:
p = ParseDB(args.engine)
p.parse_and_print()
return

sys.path.append(os.path.relpath(args.schema))

schema = import_module('schema')
Expand Down
58 changes: 3 additions & 55 deletions python/auto_schema/auto_schema/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from alembic.autogenerate.api import AutogenContext

from auto_schema.schema_item import FullTextIndex
from auto_schema.introspection import get_raw_db_indexes
from . import ops
from alembic.operations import Operations, MigrateOperation
import sqlalchemy as sa
Expand Down Expand Up @@ -444,8 +445,8 @@ def _compare_indexes(autogen_context: AutogenContext,
metadata_table: sa.Table,
):

raw_db_indexes = _get_raw_db_indexes(
autogen_context, conn_table)
raw_db_indexes = get_raw_db_indexes(
autogen_context.connection, conn_table)
missing_conn_indexes = raw_db_indexes.get('missing')
all_conn_indexes = raw_db_indexes.get('all')
conn_indexes = {}
Expand Down Expand Up @@ -524,56 +525,3 @@ def _compare_indexes(autogen_context: AutogenContext,
unique=index.unique,
info=index.info,
)


index_regex = re.compile('CREATE INDEX (.+) USING (gin|btree)(.+)')


# sqlalchemy doesn't reflect postgres indexes that have expressions in them so have to manually
# fetch these indices from pg_indices to find them
# warning: "Skipped unsupported reflection of expression-based index accounts_full_text_idx"
def _get_raw_db_indexes(autogen_context: AutogenContext, conn_table: Optional[sa.Table]):
if conn_table is None or _dialect_name(autogen_context) != 'postgresql':
return {'missing': {}, 'all': {}}

missing = {}
all = {}
# we cache the db hit but the table seems to change across the same call and so we're
# just paying the CPU price. can probably be fixed in some way...
names = set([index.name for index in conn_table.indexes] +
[constraint.name for constraint in conn_table.constraints])
res = get_db_indexes_for_table(autogen_context.connection, conn_table.name)

for row in res.fetchall():
(
name,
details
) = row
m = index_regex.match(details)
if m is None:
continue
r = m.groups()

all[name] = {
'postgresql_using': r[1],
'postgresql_using_internals': r[2],
# TODO don't have columns|column to pass to FullTextIndex
}

# missing!
if name not in names:
missing[name] = {
'postgresql_using': r[1],
'postgresql_using_internals': r[2],
# TODO don't have columns|column to pass to FullTextIndex
}

return {'missing': missing, 'all': all}


# use a cache so we only hit the db once for each table
# @functools.lru_cache()
def get_db_indexes_for_table(connection: sa.engine.Connection, tname: str):
res = connection.execute(
"SELECT indexname, indexdef from pg_indexes where tablename = '%s'" % tname)
return res
84 changes: 84 additions & 0 deletions python/auto_schema/auto_schema/introspection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
import re
from typing import Optional


def get_sorted_enum_values(connection: sa.engine.Connection, enum_type: str):
# we gotta go to the db and check the order
db_sorted_enums = []
# https://www.postgresql.org/docs/9.5/functions-enum.html
query = "select unnest(enum_range(enum_first(null::%s)));" % (
enum_type)
for row in connection.execute(query):
db_sorted_enums.append(dict(row)['unnest'])

return db_sorted_enums


index_regex = re.compile('CREATE INDEX (.+) USING (gin|btree|gist)(.+)')


def _dialect_name(conn: sa.engine.Connection) -> str:
return conn.dialect.name


# sqlalchemy doesn't reflect postgres indexes that have expressions in them so have to manually
# fetch these indices from pg_indices to find them
# warning: "Skipped unsupported reflection of expression-based index accounts_full_text_idx"

# this only returns those that match a using...
# TODO check what happens when this is not all-caps
def get_raw_db_indexes(connection: sa.engine.Connection, table: Optional[sa.Table]):
if table is None or _dialect_name(connection) != 'postgresql':
return {'missing': {}, 'all': {}}

missing = {}
all = {}
# we cache the db hit but the table seems to change across the same call and so we're
# just paying the CPU price. can probably be fixed in some way...
names = set([index.name for index in table.indexes] +
[constraint.name for constraint in table.constraints])
res = _get_db_indexes_for_table(connection, table.name)

for row in res.fetchall():
(
name,
details
) = row
m = index_regex.match(details)
if m is None:
continue

r = m.groups()

all[name] = {
'postgresql_using': r[1],
'postgresql_using_internals': r[2],
# TODO don't have columns|column to pass to FullTextIndex
}

# missing!
if name not in names:
missing[name] = {
'postgresql_using': r[1],
'postgresql_using_internals': r[2],
# TODO don't have columns|column to pass to FullTextIndex
}

return {'missing': missing, 'all': all}


# use a cache so we only hit the db once for each table
# @functools.lru_cache()
def _get_db_indexes_for_table(connection: sa.engine.Connection, tname: str):
res = connection.execute(
"SELECT indexname, indexdef from pg_indexes where tablename = '%s'" % tname)
return res


def default_index(table: sa.Table, col_name: str):
col = table.columns[col_name]
if isinstance(col.type, postgresql.JSONB) or isinstance(col.type, postgresql.JSON) or isinstance(col.type, postgresql.ARRAY):
return 'gin'
return 'btree'
Loading