diff --git a/src/common/constants.ts b/src/common/constants.ts index dddd1da7..00865777 100644 --- a/src/common/constants.ts +++ b/src/common/constants.ts @@ -1,4 +1,6 @@ export const tokenClassConsts = { + BINARY: 'binary', + BINARY_ESCAPE: 'binary.escape', COMMENT: 'comment', COMMENT_QUOTE: 'comment.quote', DELIMITER: 'delimiter', diff --git a/src/sparksql/sparksql.ts b/src/sparksql/sparksql.ts index 1624ca8a..0246a5f5 100644 --- a/src/sparksql/sparksql.ts +++ b/src/sparksql/sparksql.ts @@ -4,6 +4,7 @@ *--------------------------------------------------------------------------------------------*/ import type { languages } from '../fillers/monaco-editor-core'; +import { tokenClassConsts } from '../common/constants'; export const conf: languages.LanguageConfiguration = { comments: { @@ -11,23 +12,22 @@ export const conf: languages.LanguageConfiguration = { blockComment: ['/*', '*/'] }, brackets: [ - ['{', '}'], ['[', ']'], ['(', ')'] ], autoClosingPairs: [ - { open: '{', close: '}' }, { open: '[', close: ']' }, { open: '(', close: ')' }, { open: '"', close: '"' }, - { open: "'", close: "'" } + { open: "'", close: "'" }, + { open: '`', close: '`' } ], surroundingPairs: [ - { open: '{', close: '}' }, { open: '[', close: ']' }, { open: '(', close: ')' }, { open: '"', close: '"' }, - { open: "'", close: "'" } + { open: "'", close: "'" }, + { open: '`', close: '`' } ] }; @@ -35,1369 +35,912 @@ export const language = { defaultToken: '', tokenPostfix: '.sql', ignoreCase: true, - brackets: [ - { open: '[', close: ']', token: 'delimiter.square' }, - { open: '(', close: ')', token: 'delimiter.parenthesis' } + { open: '[', close: ']', token: tokenClassConsts.DELIMITER_SQUARE }, + { open: '(', close: ')', token: tokenClassConsts.DELIMITER_PAREN } ], - keywords: [ - 'ABORT_AFTER_WAIT', - 'ABSENT', - 'ABSOLUTE', - 'ACCENT_SENSITIVITY', - 'ACTION', - 'ACTIVATION', - 'ACTIVE', + // https://github.com/apache/spark/blob/v3.5.0/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4#L90C1-L90C1 'ADD', - 'ADDRESS', - 'ADMIN', - 'AES', - 'AES_128', - 'AES_192', - 'AES_256', - 'AFFINITY', 'AFTER', - 'AGGREGATE', - 'ALGORITHM', - 'ALL_CONSTRAINTS', - 'ALL_ERRORMSGS', - 'ALL_INDEXES', - 'ALL_LEVELS', - 'ALL_SPARSE_COLUMNS', - 'ALLOW_CONNECTIONS', - 'ALLOW_MULTIPLE_EVENT_LOSS', - 'ALLOW_PAGE_LOCKS', - 'ALLOW_ROW_LOCKS', - 'ALLOW_SINGLE_EVENT_LOSS', - 'ALLOW_SNAPSHOT_ISOLATION', - 'ALLOWED', + 'ALL', 'ALTER', - 'ANONYMOUS', - 'ANSI_DEFAULTS', - 'ANSI_NULL_DEFAULT', - 'ANSI_NULL_DFLT_OFF', - 'ANSI_NULL_DFLT_ON', - 'ANSI_NULLS', - 'ANSI_PADDING', - 'ANSI_WARNINGS', - 'APPEND', - 'APPLICATION', - 'APPLICATION_LOG', - 'ARITHABORT', - 'ARITHIGNORE', + 'ALWAYS', + 'ANALYZE', + 'AND', + 'ANTI', + 'ANY', + 'ANY_VALUE', + 'ARCHIVE', + 'ARRAY', 'AS', 'ASC', - 'ASSEMBLY', - 'ASYMMETRIC', - 'ASYNCHRONOUS_COMMIT', 'AT', - 'ATOMIC', - 'ATTACH', - 'ATTACH_REBUILD_LOG', - 'AUDIT', - 'AUDIT_GUID', - 'AUTHENTICATION', 'AUTHORIZATION', - 'AUTO', - 'AUTO_CLEANUP', - 'AUTO_CLOSE', - 'AUTO_CREATE_STATISTICS', - 'AUTO_SHRINK', - 'AUTO_UPDATE_STATISTICS', - 'AUTO_UPDATE_STATISTICS_ASYNC', - 'AUTOMATED_BACKUP_PREFERENCE', - 'AUTOMATIC', - 'AVAILABILITY', - 'AVAILABILITY_MODE', - 'BACKUP', - 'BACKUP_PRIORITY', - 'BASE64', - 'BATCHSIZE', - 'BEGIN', - 'BEGIN_DIALOG', + 'BETWEEN', 'BIGINT', 'BINARY', - 'BINDING', - 'BIT', - 'BLOCKERS', - 'BLOCKSIZE', - 'BOUNDING_BOX', - 'BREAK', - 'BROKER', - 'BROKER_INSTANCE', - 'BROWSE', - 'BUCKET_COUNT', - 'BUFFER', - 'BUFFERCOUNT', - 'BULK', - 'BULK_LOGGED', + 'BOOLEAN', + 'BOTH', + 'BUCKET', + 'BUCKETS', 'BY', + 'BYTE', 'CACHE', - 'CALL', - 'CALLED', - 'CALLER', - 'CAP_CPU_PERCENT', 'CASCADE', 'CASE', + 'CAST', 'CATALOG', - 'CATCH', - 'CELLS_PER_OBJECT', - 'CERTIFICATE', - 'CHANGE_RETENTION', - 'CHANGE_TRACKING', - 'CHANGES', + 'CATALOGS', + 'CHANGE', 'CHAR', 'CHARACTER', 'CHECK', - 'CHECK_CONSTRAINTS', - 'CHECK_EXPIRATION', - 'CHECK_POLICY', - 'CHECKALLOC', - 'CHECKCATALOG', - 'CHECKCONSTRAINTS', - 'CHECKDB', - 'CHECKFILEGROUP', - 'CHECKIDENT', - 'CHECKPOINT', - 'CHECKTABLE', - 'CLASSIFIER_FUNCTION', - 'CLEANTABLE', - 'CLEANUP', 'CLEAR', - 'CLOSE', 'CLUSTER', 'CLUSTERED', - 'CODEPAGE', + 'CODEGEN', 'COLLATE', 'COLLECTION', 'COLUMN', - 'COLUMN_SET', 'COLUMNS', - 'COLUMNSTORE', - 'COLUMNSTORE_ARCHIVE', + 'COMMENT', 'COMMIT', - 'COMMITTED', - 'COMPATIBILITY_LEVEL', - 'COMPRESSION', + 'COMPACT', + 'COMPACTIONS', 'COMPUTE', - 'CONCAT', - 'CONCAT_NULL_YIELDS_NULL', - 'CONFIGURATION', - 'CONNECT', + 'CONCATENATE', 'CONSTRAINT', - 'CONTAINMENT', - 'CONTENT', - 'CONTEXT', - 'CONTINUE', - 'CONTINUE_AFTER_ERROR', - 'CONTRACT', - 'CONTRACT_NAME', - 'CONTROL', - 'CONVERSATION', - 'COOKIE', - 'COPY_ONLY', - 'COUNTER', - 'CPU', + 'COST', 'CREATE', - 'CREATE_NEW', - 'CREATION_DISPOSITION', - 'CREDENTIAL', - 'CRYPTOGRAPHIC', + 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', - 'CURSOR', - 'CURSOR_CLOSE_ON_COMMIT', - 'CURSOR_DEFAULT', - 'CYCLE', - 'DATA', - 'DATA_COMPRESSION', - 'DATA_PURITY', - 'DATABASE', - 'DATABASE_DEFAULT', - 'DATABASE_MIRRORING', - 'DATABASE_SNAPSHOT', - 'DATAFILETYPE', - 'DATE', - 'DATE_CORRELATION_OPTIMIZATION', - 'DATEFIRST', - 'DATEFORMAT', - 'DATETIME', - 'DATETIME2', - 'DATETIMEOFFSET', + 'CURRENT_TIME', + 'CURRENT_TIMESTAMP', + 'CURRENT_USER', 'DAY', - 'DAYOFYEAR', 'DAYS', - 'DB_CHAINING', - 'DBCC', - 'DBREINDEX', - 'DDL_DATABASE_LEVEL_EVENTS', - 'DEADLOCK_PRIORITY', - 'DEALLOCATE', + 'DAYOFYEAR', + 'DATA', + 'DATE', + 'DATABASE', + 'DATABASES', + 'DATEADD', + 'DATE_ADD', + 'DATEDIFF', + 'DATE_DIFF', + 'DBPROPERTIES', 'DEC', 'DECIMAL', 'DECLARE', - 'DECRYPTION', 'DEFAULT', - 'DEFAULT_DATABASE', - 'DEFAULT_FULLTEXT_LANGUAGE', - 'DEFAULT_LANGUAGE', - 'DEFAULT_SCHEMA', - 'DEFINITION', - 'DELAY', - 'DELAYED_DURABILITY', + 'DEFINED', 'DELETE', - 'DELETED', - 'DENSITY_VECTOR', - 'DENY', - 'DEPENDENTS', - 'DES', + 'DELIMITED', 'DESC', - 'DESCRIPTION', - 'DESX', - 'DHCP', - 'DIAGNOSTICS', - 'DIALOG', - 'DIFFERENTIAL', - 'DIRECTORY_NAME', - 'DISABLE', - 'DISABLE_BROKER', - 'DISABLED', - 'DISK', + 'DESCRIBE', + 'DFS', + 'DIRECTORIES', + 'DIRECTORY', 'DISTINCT', - 'DISTRIBUTED', - 'DOCUMENT', + 'DISTRIBUTE', + 'DIV', 'DOUBLE', 'DROP', - 'DROP_EXISTING', - 'DROPCLEANBUFFERS', - 'DUMP', - 'DURABILITY', - 'DYNAMIC', - 'EDITION', - 'ELEMENTS', 'ELSE', - 'EMERGENCY', - 'EMPTY', - 'EMPTYFILE', - 'ENABLE', - 'ENABLE_BROKER', - 'ENABLED', - 'ENCRYPTION', 'END', - 'ENDPOINT', - 'ENDPOINT_URL', - 'ERRLVL', - 'ERROR', - 'ERROR_BROKER_CONVERSATIONS', - 'ERRORFILE', 'ESCAPE', - 'ESTIMATEONLY', - 'EVENT', - 'EVENT_RETENTION_MODE', - 'EXEC', - 'EXECUTABLE', - 'EXECUTE', - 'EXIT', - 'EXPAND', - 'EXPIREDATE', - 'EXPIRY_DATE', - 'EXPLICIT', - 'EXTENDED_LOGICAL_CHECKS', - 'EXTENSION', + 'ESCAPED', + 'EXCEPT', + 'EXCHANGE', + 'EXCLUDE', + 'EXISTS', + 'EXPLAIN', + 'EXPORT', + 'EXTENDED', 'EXTERNAL', - 'EXTERNAL_ACCESS', - 'FAIL_OPERATION', - 'FAILOVER', - 'FAILOVER_MODE', - 'FAILURE_CONDITION_LEVEL', + 'EXTRACT', 'FALSE', - 'FAN_IN', - 'FAST', - 'FAST_FORWARD', 'FETCH', - 'FIELDTERMINATOR', - 'FILE', - 'FILEGROUP', - 'FILEGROWTH', - 'FILELISTONLY', - 'FILENAME', - 'FILEPATH', - 'FILESTREAM', - 'FILESTREAM_ON', - 'FILETABLE_COLLATE_FILENAME', - 'FILETABLE_DIRECTORY', - 'FILETABLE_FULLPATH_UNIQUE_CONSTRAINT_NAME', - 'FILETABLE_NAMESPACE', - 'FILETABLE_PRIMARY_KEY_CONSTRAINT_NAME', - 'FILETABLE_STREAMID_UNIQUE_CONSTRAINT_NAME', - 'FILLFACTOR', - 'FILTERING', - 'FIRE_TRIGGERS', + 'FIELDS', + 'FILTER', + 'FILEFORMAT', 'FIRST', - 'FIRSTROW', 'FLOAT', - 'FMTONLY', 'FOLLOWING', 'FOR', - 'FORCE', - 'FORCE_FAILOVER_ALLOW_DATA_LOSS', - 'FORCE_SERVICE_ALLOW_DATA_LOSS', - 'FORCED', - 'FORCEPLAN', - 'FORCESCAN', - 'FORCESEEK', 'FOREIGN', - 'FORMATFILE', - 'FORMSOF', - 'FORWARD_ONLY', - 'FREE', - 'FREEPROCCACHE', - 'FREESESSIONCACHE', - 'FREESYSTEMCACHE', + 'FORMAT', + 'FORMATTED', 'FROM', 'FULL', - 'FULLSCAN', - 'FULLTEXT', 'FUNCTION', - 'GB', - 'GEOGRAPHY_AUTO_GRID', - 'GEOGRAPHY_GRID', - 'GEOMETRY_AUTO_GRID', - 'GEOMETRY_GRID', - 'GET', + 'FUNCTIONS', + 'GENERATED', 'GLOBAL', - 'GO', - 'GOTO', - 'GOVERNOR', 'GRANT', - 'GRIDS', 'GROUP', - 'GROUP_MAX_REQUESTS', - 'HADR', - 'HASH', - 'HASHED', + 'GROUPING', 'HAVING', - 'HEADERONLY', - 'HEALTH_CHECK_TIMEOUT', - 'HELP', - 'HIERARCHYID', - 'HIGH', - 'HINT', - 'HISTOGRAM', - 'HOLDLOCK', - 'HONOR_BROKER_PRIORITY', + 'X', 'HOUR', 'HOURS', - 'IDENTITY', - 'IDENTITY_INSERT', - 'IDENTITY_VALUE', - 'IDENTITYCOL', + 'IDENTIFIER', 'IF', - 'IGNORE_CONSTRAINTS', - 'IGNORE_DUP_KEY', - 'IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX', - 'IGNORE_TRIGGERS', - 'IMAGE', - 'IMMEDIATE', - 'IMPERSONATE', - 'IMPLICIT_TRANSACTIONS', - 'IMPORTANCE', + 'IGNORE', + 'IMPORT', + 'IN', 'INCLUDE', - 'INCREMENT', - 'INCREMENTAL', 'INDEX', - 'INDEXDEFRAG', - 'INFINITE', - 'INFLECTIONAL', - 'INIT', - 'INITIATOR', - 'INPUT', - 'INPUTBUFFER', - 'INSENSITIVE', + 'INDEXES', + 'INNER', + 'INPATH', + 'INPUTFORMAT', 'INSERT', - 'INSERTED', - 'INSTEAD', + 'INTERSECT', + 'INTERVAL', 'INT', 'INTEGER', 'INTO', - 'IO', - 'IP', - 'ISABOUT', - 'ISOLATION', - 'JOB', - 'KB', - 'KEEP', - 'KEEP_CDC', - 'KEEP_NULLS', - 'KEEP_REPLICATION', - 'KEEPDEFAULTS', - 'KEEPFIXED', - 'KEEPIDENTITY', - 'KEEPNULLS', - 'KERBEROS', - 'KEY', - 'KEY_SOURCE', + 'IS', + 'ITEMS', + 'JOIN', 'KEYS', - 'KEYSET', - 'KILL', - 'KILOBYTES_PER_BATCH', - 'LABELONLY', - 'LANGUAGE', 'LAST', - 'LASTROW', - 'LEVEL', - 'LEVEL_1', - 'LEVEL_2', - 'LEVEL_3', - 'LEVEL_4', - 'LIFETIME', + 'LATERAL', + 'LAZY', + 'LEADING', + 'LEFT', + 'LIKE', + 'ILIKE', 'LIMIT', - 'LINENO', + 'LINES', 'LIST', - 'LISTENER', - 'LISTENER_IP', - 'LISTENER_PORT', 'LOAD', - 'LOADHISTORY', - 'LOB_COMPACTION', 'LOCAL', - 'LOCAL_SERVICE_NAME', - 'LOCK_ESCALATION', - 'LOCK_TIMEOUT', - 'LOGIN', - 'LOGSPACE', - 'LOOP', - 'LOW', - 'MANUAL', - 'MARK', - 'MARK_IN_USE_FOR_REMOVAL', - 'MASTER', - 'MAX_CPU_PERCENT', - 'MAX_DISPATCH_LATENCY', - 'MAX_DOP', - 'MAX_DURATION', - 'MAX_EVENT_SIZE', - 'MAX_FILES', - 'MAX_IOPS_PER_VOLUME', - 'MAX_MEMORY', - 'MAX_MEMORY_PERCENT', - 'MAX_QUEUE_READERS', - 'MAX_ROLLOVER_FILES', - 'MAX_SIZE', - 'MAXDOP', - 'MAXERRORS', - 'MAXLENGTH', - 'MAXRECURSION', - 'MAXSIZE', - 'MAXTRANSFERSIZE', - 'MAXVALUE', - 'MB', - 'MEDIADESCRIPTION', - 'MEDIANAME', - 'MEDIAPASSWORD', - 'MEDIUM', - 'MEMBER', - 'MEMORY_OPTIMIZED', - 'MEMORY_OPTIMIZED_DATA', - 'MEMORY_OPTIMIZED_ELEVATE_TO_SNAPSHOT', - 'MEMORY_PARTITION_MODE', + 'LOCATION', + 'LOCK', + 'LOCKS', + 'LOGICAL', + 'LONG', + 'MACRO', + 'MAP', + 'MATCHED', 'MERGE', - 'MESSAGE', - 'MESSAGE_FORWARD_SIZE', - 'MESSAGE_FORWARDING', 'MICROSECOND', + 'MICROSECONDS', 'MILLISECOND', - 'MIN_CPU_PERCENT', - 'MIN_IOPS_PER_VOLUME', - 'MIN_MEMORY_PERCENT', + 'MILLISECONDS', 'MINUTE', 'MINUTES', - 'MINVALUE', - 'MIRROR', - 'MIRROR_ADDRESS', - 'MODIFY', - 'MONEY', 'MONTH', - 'MOVE', - 'MULTI_USER', - 'MUST_CHANGE', + 'MONTHS', + 'MSCK', 'NAME', + 'NAMESPACE', + 'NAMESPACES', 'NANOSECOND', - 'NATIONAL', - 'NATIVE_COMPILATION', - 'NCHAR', - 'NEGOTIATE', - 'NESTED_TRIGGERS', - 'NEW_ACCOUNT', - 'NEW_BROKER', - 'NEW_PASSWORD', - 'NEWNAME', - 'NEXT', + 'NANOSECONDS', + 'NATURAL', 'NO', - 'NO_BROWSETABLE', - 'NO_CHECKSUM', - 'NO_COMPRESSION', - 'NO_EVENT_LOSS', - 'NO_INFOMSGS', - 'NO_TRUNCATE', - 'NO_WAIT', - 'NOCHECK', - 'NOCOUNT', - 'NOEXEC', - 'NOEXPAND', - 'NOFORMAT', - 'NOINDEX', - 'NOINIT', - 'NOLOCK', - 'NON', - 'NON_TRANSACTED_ACCESS', - 'NONCLUSTERED', - 'NONE', - 'NORECOMPUTE', - 'NORECOVERY', - 'NORESEED', - 'NORESET', - 'NOREWIND', - 'NORMAL', - 'NOSKIP', - 'NOTIFICATION', - 'NOTRUNCATE', - 'NOUNLOAD', - 'NOWAIT', - 'NTEXT', - 'NTLM', - 'NUMANODE', + 'NOT', + 'NULL', + 'NULLS', 'NUMERIC', - 'NUMERIC_ROUNDABORT', - 'NVARCHAR', - 'OBJECT', 'OF', - 'OFF', - 'OFFLINE', 'OFFSET', - 'OFFSETS', - 'OLD_ACCOUNT', - 'OLD_PASSWORD', 'ON', - 'ON_FAILURE', - 'ONLINE', 'ONLY', - 'OPEN', - 'OPEN_EXISTING', - 'OPENTRAN', - 'OPTIMISTIC', - 'OPTIMIZE', 'OPTION', + 'OPTIONS', + 'OR', 'ORDER', 'OUT', - 'OUTPUT', - 'OUTPUTBUFFER', + 'OUTER', + 'OUTPUTFORMAT', 'OVER', - 'OVERRIDE', - 'OWNER', - 'OWNERSHIP', - 'PAD_INDEX', - 'PAGE', - 'PAGE_VERIFY', - 'PAGECOUNT', - 'PAGLOCK', - 'PARAMETERIZATION', - 'PARSEONLY', - 'PARTIAL', + 'OVERLAPS', + 'OVERLAY', + 'OVERWRITE', 'PARTITION', + 'PARTITIONED', 'PARTITIONS', - 'PARTNER', - 'PASSWORD', - 'PATH', - 'PER_CPU', - 'PER_NODE', + 'PERCENTILE_CONT', + 'PERCENTILE_DISC', 'PERCENT', - 'PERMISSION_SET', - 'PERSISTED', - 'PHYSICAL_ONLY', - 'PLAN', - 'POISON_MESSAGE_HANDLING', - 'POOL', - 'POPULATION', - 'PORT', + 'PIVOT', + 'PLACING', + 'POSITION', 'PRECEDING', - 'PRECISION', 'PRIMARY', - 'PRIMARY_ROLE', - 'PRINT', - 'PRIOR', - 'PRIORITY', - 'PRIORITY_LEVEL', - 'PRIVATE', - 'PRIVILEGES', - 'PROC', - 'PROCCACHE', - 'PROCEDURE', - 'PROCEDURE_NAME', - 'PROCESS', - 'PROFILE', - 'PROPERTY', - 'PROPERTY_DESCRIPTION', - 'PROPERTY_INT_ID', - 'PROPERTY_SET_GUID', - 'PROVIDER', - 'PROVIDER_KEY_NAME', - 'PUBLIC', - 'PUT', + 'PRINCIPALS', + 'PROPERTIES', + 'PURGE', 'QUARTER', 'QUERY', - 'QUERY_GOVERNOR_COST_LIMIT', - 'QUEUE', - 'QUEUE_DELAY', - 'QUOTED_IDENTIFIER', - 'RAISERROR', 'RANGE', - 'RAW', - 'RC2', - 'RC4', - 'RC4_128', - 'READ', - 'READ_COMMITTED_SNAPSHOT', - 'READ_ONLY', - 'READ_ONLY_ROUTING_LIST', - 'READ_ONLY_ROUTING_URL', - 'READ_WRITE', - 'READ_WRITE_FILEGROUPS', - 'READCOMMITTED', - 'READCOMMITTEDLOCK', - 'READONLY', - 'READPAST', - 'READTEXT', - 'READUNCOMMITTED', - 'READWRITE', 'REAL', - 'REBUILD', - 'RECEIVE', - 'RECOMPILE', - 'RECONFIGURE', - 'RECOVERY', - 'RECURSIVE', - 'RECURSIVE_TRIGGERS', + 'RECORDREADER', + 'RECORDWRITER', + 'RECOVER', + 'REDUCE', 'REFERENCES', - 'REGENERATE', - 'RELATED_CONVERSATION', - 'RELATED_CONVERSATION_GROUP', - 'RELATIVE', - 'REMOTE', - 'REMOTE_PROC_TRANSACTIONS', - 'REMOTE_SERVICE_NAME', - 'REMOVE', - 'REORGANIZE', - 'REPAIR_ALLOW_DATA_LOSS', - 'REPAIR_FAST', - 'REPAIR_REBUILD', + 'REFRESH', + 'RENAME', + 'REPAIR', 'REPEATABLE', - 'REPEATABLEREAD', - 'REPLICA', - 'REPLICATION', - 'REQUEST_MAX_CPU_TIME_SEC', - 'REQUEST_MAX_MEMORY_GRANT_PERCENT', - 'REQUEST_MEMORY_GRANT_TIMEOUT_SEC', - 'REQUIRED', - 'RESAMPLE', - 'RESEED', - 'RESERVE_DISK_SPACE', + 'REPLACE', 'RESET', - 'RESOURCE', - 'RESTART', - 'RESTORE', + 'RESPECT', 'RESTRICT', - 'RESTRICTED_USER', - 'RESULT', - 'RESUME', - 'RETAINDAYS', - 'RETENTION', - 'RETURN', - 'RETURNS', - 'REVERT', 'REVOKE', - 'REWIND', - 'REWINDONLY', - 'ROBUST', + 'RIGHT', + 'RLIKE', + 'REGEXP', 'ROLE', + 'ROLES', 'ROLLBACK', 'ROLLUP', - 'ROOT', - 'ROUTE', 'ROW', - 'ROWCOUNT', - 'ROWGUIDCOL', - 'ROWLOCK', 'ROWS', - 'ROWS_PER_BATCH', - 'ROWTERMINATOR', - 'ROWVERSION', - 'RSA_1024', - 'RSA_2048', - 'RSA_512', - 'RULE', - 'SAFE', - 'SAFETY', - 'SAMPLE', - 'SAVE', - 'SCHEDULER', - 'SCHEMA', - 'SCHEMA_AND_DATA', - 'SCHEMA_ONLY', - 'SCHEMABINDING', - 'SCHEME', - 'SCROLL', - 'SCROLL_LOCKS', - 'SEARCH', 'SECOND', - 'SECONDARY', - 'SECONDARY_ONLY', - 'SECONDARY_ROLE', 'SECONDS', - 'SECRET', - 'SECURITY_LOG', - 'SECURITYAUDIT', + 'SCHEMA', + 'SCHEMAS', 'SELECT', - 'SELECTIVE', - 'SELF', - 'SEND', - 'SENT', - 'SEQUENCE', - 'SERIALIZABLE', - 'SERVER', - 'SERVICE', - 'SERVICE_BROKER', - 'SERVICE_NAME', - 'SESSION', - 'SESSION_TIMEOUT', + 'SEMI', + 'SEPARATED', + 'SERDE', + 'SERDEPROPERTIES', + 'SESSION_USER', 'SET', + 'MINUS', 'SETS', - 'SETUSER', - 'SHOW_STATISTICS', - 'SHOWCONTIG', - 'SHOWPLAN', - 'SHOWPLAN_ALL', - 'SHOWPLAN_TEXT', - 'SHOWPLAN_XML', - 'SHRINKDATABASE', - 'SHRINKFILE', - 'SHUTDOWN', - 'SID', - 'SIGNATURE', - 'SIMPLE', - 'SINGLE_BLOB', - 'SINGLE_CLOB', - 'SINGLE_NCLOB', - 'SINGLE_USER', - 'SINGLETON', - 'SIZE', - 'SKIP', - 'SMALLDATETIME', + 'SHORT', + 'SHOW', + 'SINGLE', + 'SKEWED', 'SMALLINT', - 'SMALLMONEY', - 'SNAPSHOT', - 'SORT_IN_TEMPDB', + 'SOME', + 'SORT', + 'SORTED', 'SOURCE', - 'SPARSE', - 'SPATIAL', - 'SPATIAL_WINDOW_MAX_CELLS', - 'SPECIFICATION', - 'SPLIT', - 'SQL', - 'SQL_VARIANT', - 'SQLPERF', - 'STANDBY', 'START', - 'START_DATE', - 'STARTED', - 'STARTUP_STATE', - 'STAT_HEADER', - 'STATE', - 'STATEMENT', - 'STATIC', - 'STATISTICAL_SEMANTICS', 'STATISTICS', - 'STATISTICS_INCREMENTAL', - 'STATISTICS_NORECOMPUTE', - 'STATS', - 'STATS_STREAM', - 'STATUS', - 'STATUSONLY', - 'STOP', - 'STOP_ON_ERROR', - 'STOPAT', - 'STOPATMARK', - 'STOPBEFOREMARK', - 'STOPLIST', - 'STOPPED', - 'SUBJECT', - 'SUBSCRIPTION', - 'SUPPORTED', - 'SUSPEND', - 'SWITCH', - 'SYMMETRIC', - 'SYNCHRONOUS_COMMIT', - 'SYNONYM', - 'SYSNAME', + 'STORED', + 'STRATIFY', + 'STRING', + 'STRUCT', + 'SUBSTR', + 'SUBSTRING', + 'SYNC', 'SYSTEM', + 'SYSTEM_TIME', + 'SYSTEM_VERSION', 'TABLE', - 'TABLERESULTS', + 'TABLES', 'TABLESAMPLE', - 'TABLOCK', - 'TABLOCKX', - 'TAKE', - 'TAPE', 'TARGET', - 'TARGET_RECOVERY_TIME', - 'TB', - 'TCP', - 'TEXT', - 'TEXTIMAGE_ON', - 'TEXTSIZE', + 'TBLPROPERTIES', + 'TEMPORARY', + 'TERMINATED', 'THEN', - 'THESAURUS', - 'THROW', - 'TIES', 'TIME', - 'TIMEOUT', - 'TIMER', + 'TIMEDIFF', 'TIMESTAMP', + 'TIMESTAMP_LTZ', + 'TIMESTAMP_NTZ', + 'TIMESTAMPADD', + 'TIMESTAMPDIFF', 'TINYINT', 'TO', - 'TOP', - 'TORN_PAGE_DETECTION', - 'TRACEOFF', - 'TRACEON', - 'TRACESTATUS', - 'TRACK_CAUSALITY', - 'TRACK_COLUMNS_UPDATED', - 'TRAN', + 'TOUCH', + 'TRAILING', 'TRANSACTION', - 'TRANSFER', - 'TRANSFORM_NOISE_WORDS', - 'TRIGGER', - 'TRIPLE_DES', - 'TRIPLE_DES_3KEY', + 'TRANSACTIONS', + 'TRANSFORM', + 'TRIM', 'TRUE', 'TRUNCATE', - 'TRUNCATEONLY', - 'TRUSTWORTHY', - 'TRY', - 'TSQL', - 'TWO_DIGIT_YEAR_CUTOFF', + 'TRY_CAST', 'TYPE', - 'TYPE_WARNING', + 'UNARCHIVE', 'UNBOUNDED', - 'UNCHECKED', - 'UNCOMMITTED', - 'UNDEFINED', + 'UNCACHE', + 'UNION', 'UNIQUE', - 'UNIQUEIDENTIFIER', 'UNKNOWN', - 'UNLIMITED', - 'UNLOAD', - 'UNSAFE', + 'UNLOCK', + 'UNPIVOT', + 'UNSET', 'UPDATE', - 'UPDATETEXT', - 'UPDATEUSAGE', - 'UPDLOCK', - 'URL', 'USE', - 'USED', 'USER', - 'USEROPTIONS', 'USING', - 'VALID_XML', - 'VALIDATION', - 'VALUE', 'VALUES', - 'VARBINARY', 'VARCHAR', - 'VARYING', - 'VERIFYONLY', + 'VAR', + 'VARIABLE', 'VERSION', 'VIEW', - 'VIEW_METADATA', 'VIEWS', - 'VISIBILITY', - 'WAIT_AT_LOW_PRIORITY', - 'WAITFOR', + 'VOID', 'WEEK', - 'WEIGHT', - 'WELL_FORMED_XML', + 'WEEKS', 'WHEN', 'WHERE', - 'WHILE', - 'WINDOWS', + 'WINDOW', 'WITH', 'WITHIN', - 'WITHOUT', - 'WITNESS', - 'WORK', - 'WORKLOAD', - 'WRITETEXT', - 'XACT_ABORT', - 'XLOCK', - 'XMAX', - 'XMIN', - 'XML', - 'XMLDATA', - 'XMLNAMESPACES', - 'XMLSCHEMA', - 'XQUERY', - 'XSINIL', 'YEAR', - 'YMAX', - 'YMIN' + 'YEARS', + 'ZONE' ], operators: [ // Logical - 'ALL', + 'OR', 'AND', - 'ANY', - 'BETWEEN', - 'EXISTS', - 'IN', - 'LIKE', 'NOT', - 'OR', - 'SOME', - // Set - 'EXCEPT', + // Mathematical + 'MOD', + 'DIV', + // SET 'INTERSECT', 'UNION', + 'EXCEPT', + 'MINUS', + // null + 'INCLUDE', + 'EXCLUDE', // Join - 'APPLY', - 'CROSS', - 'FULL', 'INNER', - 'JOIN', - 'LEFT', 'OUTER', + 'CROSS', + 'LEFT', 'RIGHT', - // Predicates - 'CONTAINS', - 'FREETEXT', + 'FULL', + 'SEMI', + 'ANTI', + // Predicate 'IS', - 'NULL', + 'BETWEEN', + 'LIKE', + 'ILIKE', + 'RLIKE', + 'REGEXP', + 'IN', + 'DISTINCT', + 'FROM', // Pivoting 'PIVOT', - 'UNPIVOT', - // Merging - 'MATCHED' + 'UNPIVOT' ], builtinFunctions: [ + // https://spark.apache.org/docs/latest/sql-ref-functions-builtin.html // Aggregate + 'ANY', + 'ANY_VALUE', + 'APPROX_COUNT_DISTINCT', + 'APPROX_PERCENTILE', + 'ARRAY_AGG', 'AVG', - 'CHECKSUM_AGG', + 'BIT_AND', + 'BIT_OR', + 'BIT_XOR', + 'BITMAP_CONSTRUCT_AGG', + 'BITMAP_OR_AGG', + 'BOOL_AND', + 'BOOL_OR', + 'COLLECT_LIST', + 'COLLECT_SET', + 'CORR', + 'COUNT', 'COUNT', - 'COUNT_BIG', + 'COUNT', + 'COUNT_IF', + 'COUNT_MIN_SKETCH', + 'COVAR_POP', + 'COVAR_SAMP', + 'EVERY', + 'FIRST', + 'FIRST_VALUE', 'GROUPING', 'GROUPING_ID', + 'HISTOGRAM_NUMERIC', + 'HLL_SKETCH_AGG', + 'HLL_UNION_AGG', + 'KURTOSIS', + 'LAST', + 'LAST_VALUE', 'MAX', + 'MAX_BY', + 'MEAN', + 'MEDIAN', 'MIN', + 'MIN_BY', + 'MODE', + 'PERCENTILE', + 'PERCENTILE', + 'PERCENTILE_APPROX', + 'REGR_AVGX', + 'REGR_AVGY', + 'REGR_COUNT', + 'REGR_INTERCEPT', + 'REGR_R2', + 'REGR_SLOPE', + 'REGR_SXX', + 'REGR_SXY', + 'REGR_SYY', + 'SKEWNESS', + 'SOME', + 'STD', + 'STDDEV', + 'STDDEV_POP', + 'STDDEV_SAMP', 'SUM', - 'STDEV', - 'STDEVP', - 'VAR', - 'VARP', - // Analytic + 'TRY_AVG', + 'TRY_SUM', + 'VAR_POP', + 'VAR_SAMP', + 'VARIANCE', + // Window Functions 'CUME_DIST', - 'FIRST_VALUE', + 'DENSE_RANK', 'LAG', - 'LAST_VALUE', 'LEAD', - 'PERCENTILE_CONT', - 'PERCENTILE_DISC', + 'NTH_VALUE', + 'NTILE', 'PERCENT_RANK', - // Collation - 'COLLATE', - 'COLLATIONPROPERTY', - 'TERTIARY_WEIGHTS', - // Azure - 'FEDERATION_FILTERING_VALUE', - // Conversion - 'CAST', - 'CONVERT', - 'PARSE', - 'TRY_CAST', - 'TRY_CONVERT', - 'TRY_PARSE', - // Cryptographic - 'ASYMKEY_ID', - 'ASYMKEYPROPERTY', - 'CERTPROPERTY', - 'CERT_ID', - 'CRYPT_GEN_RANDOM', - 'DECRYPTBYASYMKEY', - 'DECRYPTBYCERT', - 'DECRYPTBYKEY', - 'DECRYPTBYKEYAUTOASYMKEY', - 'DECRYPTBYKEYAUTOCERT', - 'DECRYPTBYPASSPHRASE', - 'ENCRYPTBYASYMKEY', - 'ENCRYPTBYCERT', - 'ENCRYPTBYKEY', - 'ENCRYPTBYPASSPHRASE', - 'HASHBYTES', - 'IS_OBJECTSIGNED', - 'KEY_GUID', - 'KEY_ID', - 'KEY_NAME', - 'SIGNBYASYMKEY', - 'SIGNBYCERT', - 'SYMKEYPROPERTY', - 'VERIFYSIGNEDBYCERT', - 'VERIFYSIGNEDBYASYMKEY', - // Cursor - 'CURSOR_STATUS', - // Datatype - 'DATALENGTH', - 'IDENT_CURRENT', - 'IDENT_INCR', - 'IDENT_SEED', - 'IDENTITY', - 'SQL_VARIANT_PROPERTY', - // Datetime + 'RANK', + 'ROW_NUMBER', + // Array Functions + 'ARRAY', + 'ARRAY_APPEND', + 'ARRAY_COMPACT', + 'ARRAY_CONTAINS', + 'ARRAY_DISTINCT', + 'ARRAY_EXCEPT', + 'ARRAY_INSERT', + 'ARRAY_INTERSECT', + 'ARRAY_JOIN', + 'ARRAY_MAX', + 'ARRAY_MIN', + 'ARRAY_POSITION', + 'ARRAY_PREPEND', + 'ARRAY_REMOVE', + 'ARRAY_REPEAT', + 'ARRAY_UNION', + 'ARRAYS_OVERLAP', + 'ARRAYS_ZIP', + 'FLATTEN', + 'GET', + 'SEQUENCE', + 'SHUFFLE', + 'SLICE', + 'SORT_ARRAY', + // Map Functions + 'ELEMENT_AT', + 'ELEMENT_AT', + 'MAP', + 'MAP_CONCAT', + 'MAP_CONTAINS_KEY', + 'MAP_ENTRIES', + 'MAP_FROM_ARRAYS', + 'MAP_FROM_ENTRIES', + 'MAP_KEYS', + 'MAP_VALUES', + 'STR_TO_MAP', + 'TRY_ELEMENT_AT', + 'TRY_ELEMENT_AT', + // Date and Timestamp Functions + 'ADD_MONTHS', + 'CONVERT_TIMEZONE', + 'CURDATE', + 'CURRENT_DATE', + 'CURRENT_DAT', 'CURRENT_TIMESTAMP', + 'CURRENT_TIMESTAM', + 'CURRENT_TIMEZONE', + 'DATE_ADD', + 'DATE_DIFF', + 'DATE_FORMAT', + 'DATE_FROM_UNIX_DATE', + 'DATE_PART', + 'DATE_SUB', + 'DATE_TRUNC', 'DATEADD', 'DATEDIFF', - 'DATEFROMPARTS', - 'DATENAME', 'DATEPART', - 'DATETIME2FROMPARTS', - 'DATETIMEFROMPARTS', - 'DATETIMEOFFSETFROMPARTS', 'DAY', - 'EOMONTH', - 'GETDATE', - 'GETUTCDATE', - 'ISDATE', + 'DAYOFMONTH', + 'DAYOFWEEK', + 'DAYOFYEAR', + 'EXTRACT', + 'FROM_UNIXTIME', + 'FROM_UTC_TIMESTAMP', + 'HOUR', + 'LAST_DAY', + 'LOCALTIMESTAMP', + 'LOCALTIMESTAM', + 'MAKE_DATE', + 'MAKE_DT_INTERVAL', + 'MAKE_INTERVAL', + 'MAKE_TIMESTAMP', + 'MAKE_TIMESTAMP_LTZ', + 'MAKE_TIMESTAMP_NTZ', + 'MAKE_YM_INTERVAL', + 'MINUTE', 'MONTH', - 'SMALLDATETIMEFROMPARTS', - 'SWITCHOFFSET', - 'SYSDATETIME', - 'SYSDATETIMEOFFSET', - 'SYSUTCDATETIME', - 'TIMEFROMPARTS', - 'TODATETIMEOFFSET', + 'MONTHS_BETWEEN', + 'NEXT_DAY', + 'NOW', + 'QUARTER', + 'SECOND', + 'SESSION_WINDOW', + 'TIMESTAMP_MICROS', + 'TIMESTAMP_MILLIS', + 'TIMESTAMP_SECONDS', + 'TO_DATE', + 'TO_TIMESTAMP', + 'TO_TIMESTAMP_LTZ', + 'TO_TIMESTAMP_NTZ', + 'TO_UNIX_TIMESTAMP', + 'TO_UTC_TIMESTAMP', + 'TRUNC', + 'TRY_TO_TIMESTAMP', + 'UNIX_DATE', + 'UNIX_MICROS', + 'UNIX_MILLIS', + 'UNIX_SECONDS', + 'UNIX_TIMESTAMP', + 'WEEKDAY', + 'WEEKOFYEAR', + 'WINDOW', + 'WINDOW_TIME', 'YEAR', - // Logical - 'CHOOSE', - 'COALESCE', - 'IIF', - 'NULLIF', - // Mathematical + // JSON Functions + 'FROM_JSON', + 'GET_JSON_OBJECT', + 'JSON_ARRAY_LENGTH', + 'JSON_OBJECT_KEYS', + 'JSON_TUPLE', + 'SCHEMA_OF_JSON', + 'TO_JSON', + // Mathematical Functions 'ABS', 'ACOS', + 'ACOSH', 'ASIN', + 'ASINH', 'ATAN', - 'ATN2', + 'ATAN2', + 'ATANH', + 'BIN', + 'BROUND', + 'CBRT', + 'CEIL', 'CEILING', + 'CONV', 'COS', + 'COSH', 'COT', + 'CSC', 'DEGREES', + 'E', 'EXP', + 'EXPM1', + 'FACTORIAL', 'FLOOR', + 'GREATEST', + 'HEX', + 'HYPOT', + 'LEAST', + 'LN', 'LOG', 'LOG10', + 'LOG1P', + 'LOG2', + 'NEGATIVE', 'PI', + 'PMOD', + 'POSITIVE', + 'POW', 'POWER', 'RADIANS', 'RAND', + 'RANDN', + 'RANDOM', + 'RINT', 'ROUND', + 'SEC', + 'SHIFTLEFT', 'SIGN', + 'SIGNUM', 'SIN', + 'SINH', 'SQRT', - 'SQUARE', 'TAN', - // Metadata - 'APP_NAME', - 'APPLOCK_MODE', - 'APPLOCK_TEST', - 'ASSEMBLYPROPERTY', - 'COL_LENGTH', - 'COL_NAME', - 'COLUMNPROPERTY', - 'DATABASE_PRINCIPAL_ID', - 'DATABASEPROPERTYEX', - 'DB_ID', - 'DB_NAME', - 'FILE_ID', - 'FILE_IDEX', - 'FILE_NAME', - 'FILEGROUP_ID', - 'FILEGROUP_NAME', - 'FILEGROUPPROPERTY', - 'FILEPROPERTY', - 'FULLTEXTCATALOGPROPERTY', - 'FULLTEXTSERVICEPROPERTY', - 'INDEX_COL', - 'INDEXKEY_PROPERTY', - 'INDEXPROPERTY', - 'OBJECT_DEFINITION', - 'OBJECT_ID', - 'OBJECT_NAME', - 'OBJECT_SCHEMA_NAME', - 'OBJECTPROPERTY', - 'OBJECTPROPERTYEX', - 'ORIGINAL_DB_NAME', - 'PARSENAME', - 'SCHEMA_ID', - 'SCHEMA_NAME', - 'SCOPE_IDENTITY', - 'SERVERPROPERTY', - 'STATS_DATE', - 'TYPE_ID', - 'TYPE_NAME', - 'TYPEPROPERTY', - // Ranking - 'DENSE_RANK', - 'NTILE', - 'RANK', - 'ROW_NUMBER', - // Replication - 'PUBLISHINGSERVERNAME', - // Rowset - 'OPENDATASOURCE', - 'OPENQUERY', - 'OPENROWSET', - 'OPENXML', - // Security - 'CERTENCODED', - 'CERTPRIVATEKEY', - 'CURRENT_USER', - 'HAS_DBACCESS', - 'HAS_PERMS_BY_NAME', - 'IS_MEMBER', - 'IS_ROLEMEMBER', - 'IS_SRVROLEMEMBER', - 'LOGINPROPERTY', - 'ORIGINAL_LOGIN', - 'PERMISSIONS', - 'PWDENCRYPT', - 'PWDCOMPARE', - 'SESSION_USER', - 'SESSIONPROPERTY', - 'SUSER_ID', - 'SUSER_NAME', - 'SUSER_SID', - 'SUSER_SNAME', - 'SYSTEM_USER', - 'USER', - 'USER_ID', - 'USER_NAME', - // String + 'TANH', + 'TRY_ADD', + 'TRY_DIVIDE', + 'TRY_MULTIPLY', + 'TRY_SUBTRACT', + 'UNHEX', + 'WIDTH_BUCKET', + // String Functions 'ASCII', + 'BASE64', + 'BIT_LENGTH', + 'BTRIM', + 'BTRIM', 'CHAR', - 'CHARINDEX', - 'CONCAT', - 'DIFFERENCE', - 'FORMAT', + 'CHAR_LENGTH', + 'CHARACTER_LENGTH', + 'CHR', + 'CONCAT_WS', + 'CONTAINS', + 'DECODE', + 'DECODE', + 'ELT', + 'ENCODE', + 'ENDSWITH', + 'FIND_IN_SET', + 'FORMAT_NUMBER', + 'FORMAT_STRING', + 'INITCAP', + 'INSTR', + 'LCASE', 'LEFT', 'LEN', + 'LENGTH', + 'LEVENSHTEIN', + 'LOCATE', 'LOWER', + 'LPAD', 'LTRIM', - 'NCHAR', - 'PATINDEX', - 'QUOTENAME', + 'LUHN_CHECK', + 'MASK', + 'OCTET_LENGTH', + 'OVERLAY', + 'POSITION', + 'PRINTF', + 'REGEXP_COUNT', + 'REGEXP_EXTRACT', + 'REGEXP_EXTRACT_ALL', + 'REGEXP_INSTR', + 'REGEXP_REPLACE', + 'REGEXP_SUBSTR', + 'REPEAT', 'REPLACE', - 'REPLICATE', - 'REVERSE', 'RIGHT', + 'RPAD', 'RTRIM', + 'SENTENCES', 'SOUNDEX', 'SPACE', - 'STR', - 'STUFF', + 'SPLIT', + 'SPLIT_PART', + 'STARTSWITH', + 'SUBSTR', + 'SUBSTR', + 'SUBSTRING', 'SUBSTRING', - 'UNICODE', + 'SUBSTRING_INDEX', + 'TO_BINARY', + 'TO_CHAR', + 'TO_NUMBER', + 'TO_VARCHAR', + 'TRANSLATE', + 'TRIM', + 'TRIM', + 'TRIM', + 'TRIM', + 'TRIM', + 'TRIM', + 'TRIM', + 'TRIM', + 'TRY_TO_BINARY', + 'TRY_TO_NUMBER', + 'UCASE', + 'UNBASE64', 'UPPER', - // System - 'BINARY_CHECKSUM', - 'CHECKSUM', - 'CONNECTIONPROPERTY', - 'CONTEXT_INFO', - 'CURRENT_REQUEST_ID', - 'ERROR_LINE', - 'ERROR_NUMBER', - 'ERROR_MESSAGE', - 'ERROR_PROCEDURE', - 'ERROR_SEVERITY', - 'ERROR_STATE', - 'FORMATMESSAGE', - 'GETANSINULL', - 'GET_FILESTREAM_TRANSACTION_CONTEXT', - 'HOST_ID', - 'HOST_NAME', + // Conditional Functions + 'COALESCE', + 'IFNULL', + 'NANVL', + 'NULLIF', + 'NVL', + 'NVL2', + // Bitwise Functions + 'BIT_COUNT', + 'BIT_GET', + 'GETBIT', + 'SHIFTRIGHT', + 'SHIFTRIGHTUNSIGNED', + // Predicate Functions + 'ISNAN', + 'ISNOTNULL', 'ISNULL', - 'ISNUMERIC', - 'MIN_ACTIVE_ROWVERSION', - 'NEWID', - 'NEWSEQUENTIALID', - 'ROWCOUNT_BIG', - 'XACT_STATE', - // TextImage - 'TEXTPTR', - 'TEXTVALID', - // Trigger - 'COLUMNS_UPDATED', - 'EVENTDATA', - 'TRIGGER_NESTLEVEL', - 'UPDATE', - // ChangeTracking - 'CHANGETABLE', - 'CHANGE_TRACKING_CONTEXT', - 'CHANGE_TRACKING_CURRENT_VERSION', - 'CHANGE_TRACKING_IS_COLUMN_IN_MASK', - 'CHANGE_TRACKING_MIN_VALID_VERSION', - // FullTextSearch - 'CONTAINSTABLE', - 'FREETEXTTABLE', - // SemanticTextSearch - 'SEMANTICKEYPHRASETABLE', - 'SEMANTICSIMILARITYDETAILSTABLE', - 'SEMANTICSIMILARITYTABLE', - // FileStream - 'FILETABLEROOTPATH', - 'GETFILENAMESPACEPATH', - 'GETPATHLOCATOR', - 'PATHNAME', - // ServiceBroker - 'GET_TRANSMISSION_STATUS' + 'REGEXP', + 'REGEXP_LIKE', + 'RLIKE', + // Csv Functions + 'FROM_CSV', + 'SCHEMA_OF_CSV', + 'TO_CSV', + // Misc Functions + 'AES_DECRYPT', + 'AES_ENCRYPT', + 'ASSERT_TRUE', + 'BITMAP_BIT_POSITION', + 'BITMAP_BUCKET_NUMBER', + 'BITMAP_COUNT', + 'CURRENT_CATALOG', + 'CURRENT_DATABASE', + 'CURRENT_SCHEMA', + 'CURRENT_USER', + 'EQUAL_NULL', + 'HLL_SKETCH_ESTIMATE', + 'HLL_UNION', + 'INPUT_FILE_BLOCK_LENGTH', + 'INPUT_FILE_BLOCK_START', + 'INPUT_FILE_NAME', + 'JAVA_METHOD', + 'MONOTONICALLY_INCREASING_ID', + 'REFLECT', + 'SPARK_PARTITION_ID', + 'TRY_AES_DECRYPT', + 'TYPEOF', + 'USER', + 'UUID', + 'VERSION', + // Generator Functions + 'EXPLODE', + 'EXPLODE_OUTER', + 'INLINE', + 'INLINE_OUTER', + 'POSEXPLODE', + 'POSEXPLODE_OUTER', + 'STACK' ], builtinVariables: [ - // Configuration - '@@DATEFIRST', - '@@DBTS', - '@@LANGID', - '@@LANGUAGE', - '@@LOCK_TIMEOUT', - '@@MAX_CONNECTIONS', - '@@MAX_PRECISION', - '@@NESTLEVEL', - '@@OPTIONS', - '@@REMSERVER', - '@@SERVERNAME', - '@@SERVICENAME', - '@@SPID', - '@@TEXTSIZE', - '@@VERSION', - // Cursor - '@@CURSOR_ROWS', - '@@FETCH_STATUS', - // Datetime - '@@DATEFIRST', - // Metadata - '@@PROCID', - // System - '@@ERROR', - '@@IDENTITY', - '@@ROWCOUNT', - '@@TRANCOUNT', - // Stats - '@@CONNECTIONS', - '@@CPU_BUSY', - '@@IDLE', - '@@IO_BUSY', - '@@PACKET_ERRORS', - '@@PACK_RECEIVED', - '@@PACK_SENT', - '@@TIMETICKS', - '@@TOTAL_ERRORS', - '@@TOTAL_READ', - '@@TOTAL_WRITE' + // Not support + ], + typeKeywords: [ + // https://spark.apache.org/docs/latest/sql-ref-datatypes.html + 'BOOLEAN', + 'TINYINT', + 'BYTE', + 'SMALLINT', + 'SHORT', + 'INT', + 'INTEGER', + 'BIGINT', + 'LONG', + 'FLOAT', + 'REAL', + 'DOUBLE', + 'DATE', + 'TIMESTAMP', + 'TIMESTAMP_NTZ', + 'TIMESTAMP_LTZ', + 'STRING', + 'CHARACTER', + 'CHAR', + 'VARCHAR', + 'BINARY', + 'DECIMAL', + 'DEC', + 'NUMERIC', + 'VOID', + 'INTERVAL', + 'ARRAY', + 'STRUCT', + 'MAP' + ], + pseudoColumns: [ + // Not support ], - pseudoColumns: ['$ACTION', '$IDENTITY', '$ROWGUID', '$PARTITION'], tokenizer: { root: [ { include: '@comments' }, { include: '@whitespace' }, { include: '@pseudoColumns' }, { include: '@numbers' }, + { include: '@binaries' }, { include: '@strings' }, { include: '@complexIdentifiers' }, { include: '@scopes' }, - [/[;,.]/, 'delimiter'], - [/[()]/, '@brackets'], + { include: '@complexDataTypes' }, + [/[;,.]/, tokenClassConsts.DELIMITER], + [/[\(\)\[\]\{\}]/, '@brackets'], [ /[\w@#$]+/, { cases: { - '@keywords': 'keyword', - '@operators': 'operator', - '@builtinVariables': 'predefined', - '@builtinFunctions': 'predefined', - '@default': 'identifier' + '@operators': tokenClassConsts.OPERATOR_KEYWORD, + '@typeKeywords': tokenClassConsts.TYPE, + '@builtinVariables': tokenClassConsts.VARIABLE, + '@builtinFunctions': tokenClassConsts.PREDEFINED, + '@keywords': tokenClassConsts.KEYWORD, + '@default': tokenClassConsts.IDENTIFIER } } ], - [/[<>=!%&+\-*/|~^]/, 'operator'] + [/[<>=!%&+\-*/|~^]/, tokenClassConsts.OPERATOR_SYMBOL] ], - whitespace: [[/\s+/, 'white']], + whitespace: [[/[\s\t\r\n]+/, tokenClassConsts.WHITE]], comments: [ - [/--+.*/, 'comment'], - [/\/\*/, { token: 'comment.quote', next: '@comment' }] + [/--+.*/, tokenClassConsts.COMMENT], + [/\/\*/, { token: tokenClassConsts.COMMENT_QUOTE, next: '@comment' }] ], comment: [ - [/[^*/]+/, 'comment'], - // Not supporting nested comments, as nested comments seem to not be standard? - // i.e. http://stackoverflow.com/questions/728172/are-there-multiline-comment-delimiters-in-sql-that-are-vendor-agnostic + [/[^*/]+/, tokenClassConsts.COMMENT], // [/\/\*/, { token: 'comment.quote', next: '@push' }], // nested comment not allowed :-( - [/\*\//, { token: 'comment.quote', next: '@pop' }], - [/./, 'comment'] + [/\*\//, { token: tokenClassConsts.COMMENT_QUOTE, next: '@pop' }], + [/./, tokenClassConsts.COMMENT] ], pseudoColumns: [ [ /[$][A-Za-z_][\w@#$]*/, { cases: { - '@pseudoColumns': 'predefined', - '@default': 'identifier' + '@pseudoColumns': tokenClassConsts.PREDEFINED, + '@default': tokenClassConsts.IDENTIFIER } } ] ], numbers: [ - [/0[xX][0-9a-fA-F]*/, 'number'], - [/[$][+-]*\d*(\.\d*)?/, 'number'], - [/((\d+(\.\d*)?)|(\.\d+))([eE][\-+]?\d+)?/, 'number'] + // https://spark.apache.org/docs/latest/sql-ref-literals.html#numeric-literal + // TODO: Fractional Literals Syntax + [/0[xX][0-9a-fA-F]*/, tokenClassConsts.NUMBER_HEX], + [/[$][+-]*\d*(\.\d*)?/, tokenClassConsts.NUMBER], + [/((\d+(\.\d*)?)|(\.\d+))([eE][\-+]?\d+)?/, tokenClassConsts.NUMBER_FLOAT] + ], + binaries: [ + // https://spark.apache.org/docs/latest/sql-ref-literals.html#binary-literal + [/X'/i, { token: tokenClassConsts.BINARY, next: '@binarySingle' }], + [/X"/i, { token: tokenClassConsts.BINARY, next: '@binaryDouble' }] + ], + binarySingle: [ + [/\d+/, tokenClassConsts.BINARY_ESCAPE], + [/''/, tokenClassConsts.BINARY], + [/'/, { token: tokenClassConsts.BINARY, next: '@pop' }] + ], + binaryDouble: [ + [/\d+/, tokenClassConsts.BINARY_ESCAPE], + [/""/, tokenClassConsts.BINARY], + [/"/, { token: tokenClassConsts.BINARY, next: '@pop' }] ], strings: [ - [/N'/, { token: 'string', next: '@string' }], - [/'/, { token: 'string', next: '@string' }] + // https://spark.apache.org/docs/latest/sql-ref-literals.html#string-literal + [/'/, { token: tokenClassConsts.STRING, next: '@stringSingle' }], + [/R'/i, { token: tokenClassConsts.STRING, next: '@stringSingle' }], + [/"/, { token: tokenClassConsts.STRING, next: '@stringDouble' }], + [/R"/i, { token: tokenClassConsts.STRING, next: '@stringDouble' }] ], - string: [ - [/[^']+/, 'string'], - [/''/, 'string'], - [/'/, { token: 'string', next: '@pop' }] + stringSingle: [ + [/[^']+/, tokenClassConsts.STRING_ESCAPE], + [/''/, tokenClassConsts.STRING], + [/'/, { token: tokenClassConsts.STRING, next: '@pop' }] ], - complexIdentifiers: [ - [/\[/, { token: 'identifier.quote', next: '@bracketedIdentifier' }], - [/"/, { token: 'identifier.quote', next: '@quotedIdentifier' }] + stringDouble: [ + [/[^"]+/, tokenClassConsts.STRING_ESCAPE], + [/""/, tokenClassConsts.STRING], + [/"/, { token: tokenClassConsts.STRING, next: '@pop' }] ], - bracketedIdentifier: [ - [/[^\]]+/, 'identifier'], - [/]]/, 'identifier'], - [/]/, { token: 'identifier.quote', next: '@pop' }] + complexIdentifiers: [ + [/`/, { token: tokenClassConsts.IDENTIFIER_QUOTE, next: '@quotedIdentifier' }] ], quotedIdentifier: [ - [/[^"]+/, 'identifier'], - [/""/, 'identifier'], - [/"/, { token: 'identifier.quote', next: '@pop' }] + [/[^`]+/, tokenClassConsts.IDENTIFIER_QUOTE], + [/``/, tokenClassConsts.IDENTIFIER_QUOTE], + [/`/, { token: tokenClassConsts.IDENTIFIER_QUOTE, next: '@pop' }] ], - scopes: [ - [/BEGIN\s+(DISTRIBUTED\s+)?TRAN(SACTION)?\b/i, 'keyword'], - [/BEGIN\s+TRY\b/i, { token: 'keyword.try' }], - [/END\s+TRY\b/i, { token: 'keyword.try' }], - [/BEGIN\s+CATCH\b/i, { token: 'keyword.catch' }], - [/END\s+CATCH\b/i, { token: 'keyword.catch' }], - [/(BEGIN|CASE)\b/i, { token: 'keyword.block' }], - [/END\b/i, { token: 'keyword.block' }], - [/WHEN\b/i, { token: 'keyword.choice' }], - [/THEN\b/i, { token: 'keyword.choice' }] - ] + scopes: [], + complexDataTypes: [] } };