lolopinto · lolopinto · Oct 25, 2022 · Oct 26, 2022 · Oct 26, 2022 · Oct 26, 2022
diff --git a/internal/db/db_schema.go b/internal/db/db_schema.go
@@ -262,7 +262,7 @@ func getSearchConfigFromIndex(fullText *input.FullText) string {
 	if fullText.LanguageColumn == "" {
 		return fmt.Sprintf("'%s'", fullText.Language)
 	} else {
-		return fmt.Sprintf("%s::reconfig", fullText.LanguageColumn)
+		return fmt.Sprintf("%s::regconfig", fullText.LanguageColumn)
 	}
 }
 
@@ -1104,6 +1104,7 @@ func (s *dbSchema) addUniqueConstraint(nodeData *schema.NodeData, inputConstrain
 	return nil
 }
 
+// same logic as parse_db._default_index
 func (s *dbSchema) getDefaultIndexType(f *field.Field) input.IndexType {
 	// default index type for lists|jsonb when not specified is gin type
 	typ := f.GetFieldType()

diff --git a/internal/db/db_schema_test.go b/internal/db/db_schema_test.go
@@ -1824,7 +1824,7 @@ func TestFullTextIndexMultipleColsLangColumn(t *testing.T) {
 			strconv.Quote("users_name_idx"),
 			getKVDict([]string{
 				getKVPair("postgresql_using", strconv.Quote("gin")),
-				getKVPair("postgresql_using_internals", strconv.Quote("to_tsvector(language::reconfig, coalesce(first_name, '') || ' ' || coalesce(last_name, ''))")),
+				getKVPair("postgresql_using_internals", strconv.Quote("to_tsvector(language::regconfig, coalesce(first_name, '') || ' ' || coalesce(last_name, ''))")),
 				getKVPair("columns",
 					fmt.Sprintf("[%s, %s]",
 						strconv.Quote("first_name"),

diff --git a/python/Pipfile b/python/Pipfile
@@ -14,6 +14,7 @@ sqlalchemy = "==1.4.35"
 psycopg2 = "==2.9.3"
 autopep8 = "==1.5.4"
 python-dateutil= "==2.8.2"
+inflect= "==6.0.2"
 
 [requires]
 python_version = "3.8"
diff --git a/python/auto_schema/auto_schema/clause_text.py b/python/auto_schema/auto_schema/clause_text.py
@@ -7,7 +7,7 @@
 
 clause_regex = re.compile("(.+)'::(.+)")
 date_regex = re.compile(
-    '([0-9]{4})-([0-9]{2})-([0-9]{2})[T| ]([0-9]{2}):([0-9]{2}):([0-9]{2})(\.[0-9]{3})?(.+)?')
+    r"([0-9]{4})-([0-9]{2})-([0-9]{2})[T| ]([0-9]{2}):([0-9]{2}):([0-9]{2})(\.[0-9]{3})?(.+)?")
 
 
 valid_suffixes = {
@@ -69,6 +69,9 @@ def normalize(arg):
         # return the underlying string instead of quoted
         arg = str(arg).strip("'")
 
+        # condition `price > 0` ends up as `price > (0)::numeric` so we're trying to fix that
+        arg = re.sub(r"\(([0-9]+)\)::numeric", r'\1', arg)
+
         # strip the extra text padding added so we can compare effectively
         m = clause_regex.match(arg)
         if m is None:

diff --git a/python/auto_schema/auto_schema/cli/__init__.py b/python/auto_schema/auto_schema/cli/__init__.py
@@ -5,7 +5,6 @@
 import warnings
 import alembic
 
-import sqlalchemy
 
 # if env variable is set, manipulate the path to put local
 # current directory over possibly installed auto_schema so that we
@@ -17,6 +16,7 @@
 # run from auto_schema root. conflicts with pip-installed auto_schema when that exists so can't have
 # that installed when runnning this...
 from auto_schema.runner import Runner
+from auto_schema.parse_db import ParseDB
 
 from importlib import import_module
 
@@ -53,6 +53,8 @@
     '--changes', help='get changes in schema', action='store_true')
 parser.add_argument(
     '--debug', help='if debug flag passed', action='store_true')
+parser.add_argument(
+    '--import_db', help='import given a schema uri', action='store_true')
 
 # see https://alembic.sqlalchemy.org/en/latest/offline.html
 # if true, pased to u
@@ -89,6 +91,12 @@ def main():
 
         try:
             args = parser.parse_args()
+
+            if args.import_db is True:
+                p = ParseDB(args.engine)
+                p.parse_and_print()
+                return
+
             sys.path.append(os.path.relpath(args.schema))
 
             schema = import_module('schema')

diff --git a/python/auto_schema/auto_schema/compare.py b/python/auto_schema/auto_schema/compare.py
@@ -3,6 +3,7 @@
 from alembic.autogenerate.api import AutogenContext
 
 from auto_schema.schema_item import FullTextIndex
+from auto_schema.introspection import get_raw_db_indexes
 from . import ops
 from alembic.operations import Operations, MigrateOperation
 import sqlalchemy as sa
@@ -444,8 +445,8 @@ def _compare_indexes(autogen_context: AutogenContext,
                      metadata_table: sa.Table,
                      ):
 
-    raw_db_indexes = _get_raw_db_indexes(
-        autogen_context, conn_table)
+    raw_db_indexes = get_raw_db_indexes(
+        autogen_context.connection, conn_table)
     missing_conn_indexes = raw_db_indexes.get('missing')
     all_conn_indexes = raw_db_indexes.get('all')
     conn_indexes = {}
@@ -524,56 +525,3 @@ def _compare_indexes(autogen_context: AutogenContext,
                         unique=index.unique,
                         info=index.info,
                     )
-
-
-index_regex = re.compile('CREATE INDEX (.+) USING (gin|btree)(.+)')
-
-
-# sqlalchemy doesn't reflect postgres indexes that have expressions in them so have to manually
-# fetch these indices from pg_indices to find them
-# warning: "Skipped unsupported reflection of expression-based index accounts_full_text_idx"
-def _get_raw_db_indexes(autogen_context: AutogenContext, conn_table: Optional[sa.Table]):
-    if conn_table is None or _dialect_name(autogen_context) != 'postgresql':
-        return {'missing': {}, 'all': {}}
-
-    missing = {}
-    all = {}
-    # we cache the db hit but the table seems to change across the same call and so we're
-    # just paying the CPU price. can probably be fixed in some way...
-    names = set([index.name for index in conn_table.indexes] +
-                [constraint.name for constraint in conn_table.constraints])
-    res = get_db_indexes_for_table(autogen_context.connection, conn_table.name)
-
-    for row in res.fetchall():
-        (
-            name,
-            details
-        ) = row
-        m = index_regex.match(details)
-        if m is None:
-            continue
-        r = m.groups()
-
-        all[name] = {
-            'postgresql_using': r[1],
-            'postgresql_using_internals': r[2],
-            # TODO don't have columns|column to pass to FullTextIndex
-        }
-
-        # missing!
-        if name not in names:
-            missing[name] = {
-                'postgresql_using': r[1],
-                'postgresql_using_internals': r[2],
-                # TODO don't have columns|column to pass to FullTextIndex
-            }
-
-    return {'missing': missing, 'all': all}
-
-
-# use a cache so we only hit the db once for each table
-# @functools.lru_cache()
-def get_db_indexes_for_table(connection: sa.engine.Connection, tname: str):
-    res = connection.execute(
-        "SELECT indexname, indexdef from pg_indexes where tablename = '%s'" % tname)
-    return res
diff --git a/python/auto_schema/auto_schema/introspection.py b/python/auto_schema/auto_schema/introspection.py
@@ -0,0 +1,84 @@
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+import re
+from typing import Optional
+
+
+def get_sorted_enum_values(connection: sa.engine.Connection, enum_type: str):
+    # we gotta go to the db and check the order
+    db_sorted_enums = []
+    # https://www.postgresql.org/docs/9.5/functions-enum.html
+    query = "select unnest(enum_range(enum_first(null::%s)));" % (
+        enum_type)
+    for row in connection.execute(query):
+        db_sorted_enums.append(dict(row)['unnest'])
+
+    return db_sorted_enums
+
+
+index_regex = re.compile('CREATE INDEX (.+) USING (gin|btree|gist)(.+)')
+
+
+def _dialect_name(conn: sa.engine.Connection) -> str:
+    return conn.dialect.name
+
+
+# sqlalchemy doesn't reflect postgres indexes that have expressions in them so have to manually
+# fetch these indices from pg_indices to find them
+# warning: "Skipped unsupported reflection of expression-based index accounts_full_text_idx"
+
+# this only returns those that match a using...
+# TODO check what happens when this is not all-caps
+def get_raw_db_indexes(connection: sa.engine.Connection, table: Optional[sa.Table]):
+    if table is None or _dialect_name(connection) != 'postgresql':
+        return {'missing': {}, 'all': {}}
+
+    missing = {}
+    all = {}
+    # we cache the db hit but the table seems to change across the same call and so we're
+    # just paying the CPU price. can probably be fixed in some way...
+    names = set([index.name for index in table.indexes] +
+                [constraint.name for constraint in table.constraints])
+    res = _get_db_indexes_for_table(connection, table.name)
+
+    for row in res.fetchall():
+        (
+            name,
+            details
+        ) = row
+        m = index_regex.match(details)
+        if m is None:
+            continue
+
+        r = m.groups()
+
+        all[name] = {
+            'postgresql_using': r[1],
+            'postgresql_using_internals': r[2],
+            # TODO don't have columns|column to pass to FullTextIndex
+        }
+
+        # missing!
+        if name not in names:
+            missing[name] = {
+                'postgresql_using': r[1],
+                'postgresql_using_internals': r[2],
+                # TODO don't have columns|column to pass to FullTextIndex
+            }
+
+    return {'missing': missing, 'all': all}
+
+
+# use a cache so we only hit the db once for each table
+# @functools.lru_cache()
+def _get_db_indexes_for_table(connection: sa.engine.Connection, tname: str):
+    res = connection.execute(
+        "SELECT indexname, indexdef from pg_indexes where tablename = '%s'" % tname)
+    return res
+
+
+def default_index(table: sa.Table, col_name: str):
+    col = table.columns[col_name]
+    if isinstance(col.type, postgresql.JSONB) or isinstance(col.type, postgresql.JSON) or isinstance(col.type, postgresql.ARRAY):
+        return 'gin'
+    return 'btree'