Skip to content

Commit

Permalink
chore: Refactored sql parser
Browse files Browse the repository at this point in the history
  • Loading branch information
jsumners-nr committed Nov 8, 2024
1 parent 9b6de68 commit b90d315
Show file tree
Hide file tree
Showing 3 changed files with 310 additions and 84 deletions.
226 changes: 196 additions & 30 deletions lib/db/query-parsers/sql.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,17 @@
'use strict'

const logger = require('../../logger').child({ component: 'sql_query_parser' })
const StatementMatcher = require('../statement-matcher')
const stringify = require('json-stringify-safe')

const OPERATIONS = [
new StatementMatcher(
'select',
/^[^\S]*?select\b[\s\S]+?\bfrom[\s\n\r\[\(]+([^\]\s\n\r,)(;]*)/gim
),
new StatementMatcher('update', /^[^\S]*?update[^\S]+?([^\s\n\r,;]+)/gim),
new StatementMatcher(
'insert',
/^[^\S]*?insert(?:[^\S]+ignore)?[^\S]+into[^\S]+([^\s\n\r(,;]+)/gim
),
new StatementMatcher('delete', /^[^\S]*?delete[^\S]+?from[^\S]+([^\s\n\r,(;]+)/gim)
]
const COMMENT_PATTERN = /\/\\*.*?\\*\//g

// This must be called synchronously after the initial db call for backtraces to
// work correctly

/**
* Parses a SQL statement into the parts we want to report as metadata in
* database transactions.
*
* @param {string} sql The statement to parse.
*
* @returns {{query: string, collection: null|string, operation: string}} Parsed
* metadata.
*/
module.exports = function parseSql(sql) {
// Sometimes we get an object here from MySQL. We have been unable to
// reproduce it, so we'll just log what that object is and return a statement
Expand All @@ -38,7 +29,7 @@ module.exports = function parseSql(sql) {
try {
logger.trace('parseSQL got an a non-string sql that looks like: %s', stringify(sql))
} catch (err) {
logger.debug(err, 'Unabler to stringify SQL')
logger.debug(err, 'Unable to stringify SQL')
}
}
return {
Expand All @@ -48,24 +39,199 @@ module.exports = function parseSql(sql) {
}
}

sql = sql.replace(COMMENT_PATTERN, '').trim()
sql = removeMultiLineComments(sql).trim()
let result = {
operation: 'other',
collection: null,
query: sql
}
if (looksLikeValidSql(sql) === false) {
return result
}

const lines = sql.split('\n')
result = { ...result, ...parseLines(lines) }
result.query = sql.trim()

return result
}

/**
* Iterates the lines of an SQL statement, reducing them to the relevant lines,
* and returns the metadata found within.
*
* We do not inline this in `parseSql` because doing so will violate a
* code complexity linting rule.
*
* @param {string[]} lines Set of SQL statement lines.
*
* @returns {{collection: null, operation: string}} SQL statement metadata.
*/
function parseLines(lines) {
let result = {
operation: 'other',
collection: null
}

parser: for (let i = 0; i < lines.length; i += 1) {
const line = lines[i].toLowerCase().trim()
switch (true) {
case line.startsWith('select'): {
const statement = lines.slice(i).join(' ')
result.operation = 'select'
result = { ...result, ...parseStatement(statement, 'select') }
break parser
}

case line.startsWith('update'): {
const statement = lines.slice(i).join(' ')
result.operation = 'update'
result = { ...result, ...parseStatement(statement, 'update') }
break parser
}

case line.startsWith('insert'): {
const statement = lines.slice(i).join(' ')
result.operation = 'insert'
result = { ...result, ...parseStatement(statement, 'insert') }
break parser
}

case line.startsWith('delete'): {
const statement = lines.slice(i).join(' ')
result.operation = 'delete'
result = { ...result, ...parseStatement(statement, 'delete') }
break parser
}
}
}

return result
}

/**
* Iterates through the provided string and removes all multi-line comments
* found therein.
*
* @param {string} input The string to parse.
*
* @returns {string} Cleaned up string.
*/
function removeMultiLineComments(input) {
const startPos = input.indexOf('/*')
if (startPos === -1) {
return input
}

const endPos = input.indexOf('*/', startPos + 2)
const part1 = input.slice(0, startPos).trim()
const part2 = input.slice(endPos + 2).trim()
return removeMultiLineComments(`${part1} ${part2}`)
}

/**
* Tests the start of the statement to determine if it looks like a valid
* SQL statement.
*
* @param {string} sql SQL statement with any comments stripped.
*
* @returns {boolean} True if the statement looks good. Otherwise, false.
*/
function looksLikeValidSql(sql) {
return /^\s*?(?:with|select|insert|update|delete)/i.test(sql.toLowerCase())
}

/**
* Extracts the collection, database, and table information from an SQL
* statement.
*
* @param {string} statement The SQL statement to parse.
* @param {string} [kind] The type of SQL statement being parsed. This
* dictates how the algorithm will determine where the desired fields are.
* Valid values are: `insert`, `delete`, `select`, and `update`.
*
* @returns {{database: string, collection, table}} The found information.
*/
function parseStatement(statement, kind = 'insert') {
let splitter
switch (kind) {
case 'insert': {
splitter = /\s*?\binto\b\s*?/i
break
}

let parsedStatement
case 'delete':
case 'select': {
splitter = /\s*?\bfrom\b\s*?/i
break
}

for (let i = 0, l = OPERATIONS.length; i < l; i++) {
parsedStatement = OPERATIONS[i].getParsedStatement(sql)
if (parsedStatement) {
case 'update': {
splitter = /\s*?\bupdate\b\s*?/i
break
}
}

if (parsedStatement) {
return parsedStatement
const targetIdentifier = statement
.split(splitter)
.pop()
.trim()
.split(/\s/)
.shift()
.replace(/[`'"]/g, '')

const identifierParts = targetIdentifier.split('.')
let collection
let database
let table
if (identifierParts.length === 1) {
table = normalizeTableName(identifierParts[0])
collection = table
} else {
database = identifierParts[0]
table = normalizeTableName(identifierParts[1])
collection = `${database}.${table}`
}

return {
operation: 'other',
collection: null,
query: sql
return { collection, database, table }
}

/**
* Our cross-application tests have tests that do not match any known SQL
* engine's valid syntax for table names. But we need to support them, so this
* function will inspect table names and try to return the correct thing.
*
* @param {string} tableIdentifier Something that _should_ represent a table
* name.
*
* @returns {string} The normalized table name.
*/
function normalizeTableName(tableIdentifier) {
if (tableIdentifier[0] === '(') {
// We might have a subquery. If there is a single word between the
// parentheticals, we return it as the table name (even though this is not
// valid SQL). Otherwise, we return a special value.

const parts = tableIdentifier.replace(/[()]/g, '').split(/\s/)
if (parts.length === 1) {
return parts[0]
}
return '(subquery)'
}

const parenPos = tableIdentifier.indexOf('(')
if (parenPos > 0) {
// We seem to accept `into foo(x,y)` as a valid table name, where we
// decide that "foo" is the actual table name.
return tableIdentifier.slice(0, parenPos)
}

const commaPos = tableIdentifier.indexOf(',')
if (commaPos > -1) {
// For some reason, we accept `from foo,bar` and decide that "foo" is
// the actual table name.
return tableIdentifier.slice(0, commaPos)
}

return tableIdentifier
}
54 changes: 0 additions & 54 deletions lib/db/statement-matcher.js

This file was deleted.

Loading

0 comments on commit b90d315

Please sign in to comment.