diff --git a/lib/db/query-parsers/sql.js b/lib/db/query-parsers/sql.js index c728704dbc..bc2974bb84 100644 --- a/lib/db/query-parsers/sql.js +++ b/lib/db/query-parsers/sql.js @@ -6,26 +6,17 @@ 'use strict' const logger = require('../../logger').child({ component: 'sql_query_parser' }) -const StatementMatcher = require('../statement-matcher') const stringify = require('json-stringify-safe') -const OPERATIONS = [ - new StatementMatcher( - 'select', - /^[^\S]*?select\b[\s\S]+?\bfrom[\s\n\r\[\(]+([^\]\s\n\r,)(;]*)/gim - ), - new StatementMatcher('update', /^[^\S]*?update[^\S]+?([^\s\n\r,;]+)/gim), - new StatementMatcher( - 'insert', - /^[^\S]*?insert(?:[^\S]+ignore)?[^\S]+into[^\S]+([^\s\n\r(,;]+)/gim - ), - new StatementMatcher('delete', /^[^\S]*?delete[^\S]+?from[^\S]+([^\s\n\r,(;]+)/gim) -] -const COMMENT_PATTERN = /\/\\*.*?\\*\//g - -// This must be called synchronously after the initial db call for backtraces to -// work correctly - +/** + * Parses a SQL statement into the parts we want to report as metadata in + * database transactions. + * + * @param {string} sql The statement to parse. + * + * @returns {{query: string, collection: null|string, operation: string}} Parsed + * metadata. + */ module.exports = function parseSql(sql) { // Sometimes we get an object here from MySQL. We have been unable to // reproduce it, so we'll just log what that object is and return a statement @@ -38,7 +29,7 @@ module.exports = function parseSql(sql) { try { logger.trace('parseSQL got an a non-string sql that looks like: %s', stringify(sql)) } catch (err) { - logger.debug(err, 'Unabler to stringify SQL') + logger.debug(err, 'Unable to stringify SQL') } } return { @@ -48,24 +39,199 @@ module.exports = function parseSql(sql) { } } - sql = sql.replace(COMMENT_PATTERN, '').trim() + sql = removeMultiLineComments(sql).trim() + let result = { + operation: 'other', + collection: null, + query: sql + } + if (looksLikeValidSql(sql) === false) { + return result + } + + const lines = sql.split('\n') + result = { ...result, ...parseLines(lines) } + result.query = sql.trim() + + return result +} + +/** + * Iterates the lines of an SQL statement, reducing them to the relevant lines, + * and returns the metadata found within. + * + * We do not inline this in `parseSql` because doing so will violate a + * code complexity linting rule. + * + * @param {string[]} lines Set of SQL statement lines. + * + * @returns {{collection: null, operation: string}} SQL statement metadata. + */ +function parseLines(lines) { + let result = { + operation: 'other', + collection: null + } + + parser: for (let i = 0; i < lines.length; i += 1) { + const line = lines[i].toLowerCase().trim() + switch (true) { + case line.startsWith('select'): { + const statement = lines.slice(i).join(' ') + result.operation = 'select' + result = { ...result, ...parseStatement(statement, 'select') } + break parser + } + + case line.startsWith('update'): { + const statement = lines.slice(i).join(' ') + result.operation = 'update' + result = { ...result, ...parseStatement(statement, 'update') } + break parser + } + + case line.startsWith('insert'): { + const statement = lines.slice(i).join(' ') + result.operation = 'insert' + result = { ...result, ...parseStatement(statement, 'insert') } + break parser + } + + case line.startsWith('delete'): { + const statement = lines.slice(i).join(' ') + result.operation = 'delete' + result = { ...result, ...parseStatement(statement, 'delete') } + break parser + } + } + } + + return result +} + +/** + * Iterates through the provided string and removes all multi-line comments + * found therein. + * + * @param {string} input The string to parse. + * + * @returns {string} Cleaned up string. + */ +function removeMultiLineComments(input) { + const startPos = input.indexOf('/*') + if (startPos === -1) { + return input + } + + const endPos = input.indexOf('*/', startPos + 2) + const part1 = input.slice(0, startPos).trim() + const part2 = input.slice(endPos + 2).trim() + return removeMultiLineComments(`${part1} ${part2}`) +} + +/** + * Tests the start of the statement to determine if it looks like a valid + * SQL statement. + * + * @param {string} sql SQL statement with any comments stripped. + * + * @returns {boolean} True if the statement looks good. Otherwise, false. + */ +function looksLikeValidSql(sql) { + return /^\s*?(?:with|select|insert|update|delete)/i.test(sql.toLowerCase()) +} + +/** + * Extracts the collection, database, and table information from an SQL + * statement. + * + * @param {string} statement The SQL statement to parse. + * @param {string} [kind] The type of SQL statement being parsed. This + * dictates how the algorithm will determine where the desired fields are. + * Valid values are: `insert`, `delete`, `select`, and `update`. + * + * @returns {{database: string, collection, table}} The found information. + */ +function parseStatement(statement, kind = 'insert') { + let splitter + switch (kind) { + case 'insert': { + splitter = /\s*?\binto\b\s*?/i + break + } - let parsedStatement + case 'delete': + case 'select': { + splitter = /\s*?\bfrom\b\s*?/i + break + } - for (let i = 0, l = OPERATIONS.length; i < l; i++) { - parsedStatement = OPERATIONS[i].getParsedStatement(sql) - if (parsedStatement) { + case 'update': { + splitter = /\s*?\bupdate\b\s*?/i break } } - if (parsedStatement) { - return parsedStatement + const targetIdentifier = statement + .split(splitter) + .pop() + .trim() + .split(/\s/) + .shift() + .replace(/[`'"]/g, '') + + const identifierParts = targetIdentifier.split('.') + let collection + let database + let table + if (identifierParts.length === 1) { + table = normalizeTableName(identifierParts[0]) + collection = table + } else { + database = identifierParts[0] + table = normalizeTableName(identifierParts[1]) + collection = `${database}.${table}` } - return { - operation: 'other', - collection: null, - query: sql + return { collection, database, table } +} + +/** + * Our cross-application tests have tests that do not match any known SQL + * engine's valid syntax for table names. But we need to support them, so this + * function will inspect table names and try to return the correct thing. + * + * @param {string} tableIdentifier Something that _should_ represent a table + * name. + * + * @returns {string} The normalized table name. + */ +function normalizeTableName(tableIdentifier) { + if (tableIdentifier[0] === '(') { + // We might have a subquery. If there is a single word between the + // parentheticals, we return it as the table name (even though this is not + // valid SQL). Otherwise, we return a special value. + + const parts = tableIdentifier.replace(/[()]/g, '').split(/\s/) + if (parts.length === 1) { + return parts[0] + } + return '(subquery)' + } + + const parenPos = tableIdentifier.indexOf('(') + if (parenPos > 0) { + // We seem to accept `into foo(x,y)` as a valid table name, where we + // decide that "foo" is the actual table name. + return tableIdentifier.slice(0, parenPos) } + + const commaPos = tableIdentifier.indexOf(',') + if (commaPos > -1) { + // For some reason, we accept `from foo,bar` and decide that "foo" is + // the actual table name. + return tableIdentifier.slice(0, commaPos) + } + + return tableIdentifier } diff --git a/lib/db/statement-matcher.js b/lib/db/statement-matcher.js deleted file mode 100644 index 1e79a38ef1..0000000000 --- a/lib/db/statement-matcher.js +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2020 New Relic Corporation. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - */ - -'use strict' - -// ( ` database` . ` table ` ) -const CLEANER = /^\(?(?:([`'"]?)(.*?)\1\.)?([`'"]?)(.*?)\3\)?$/ - -function StatementMatcher(operation, operationPattern) { - this.operation = operation - this.matcher = new RegExp('^\\s*' + operation, 'ig') - this.operationPattern = operationPattern -} - -StatementMatcher.prototype.getParsedStatement = function getParsedStatement(sql) { - this.operationPattern.lastIndex = 0 - this.matcher.lastIndex = 0 - CLEANER.lastIndex = 0 - - if (this.matcher.test(sql)) { - const queryMatch = this.operationPattern.exec(sql) - let collection = queryMatch ? queryMatch[1] : 'unknown' - let database = null - - // If the cleaner can match this collection, pull out the cleaned up names - // from there. The spec doesn't want the database names in the collection - // name, but for legacy reasons we keep it. - // TODO: Either update the spec (and CATs) to accept database name in the - // collection name or remove it here. - const cleanerMatch = CLEANER.exec(collection) - if (cleanerMatch && cleanerMatch[4]) { - collection = cleanerMatch[4] - if (cleanerMatch[2]) { - database = cleanerMatch[2] - collection = database + '.' + collection - } - } - - // TODO: Pass through the database here to the parsed statement. It could - // be used for datastore attributes. - return { - operation: this.operation, - database: database, - collection: collection, - query: sql - } - } - - return null -} - -module.exports = StatementMatcher diff --git a/test/unit/db/query-parsers/sql.test.js b/test/unit/db/query-parsers/sql.test.js index d9168b5549..2c973877c7 100644 --- a/test/unit/db/query-parsers/sql.test.js +++ b/test/unit/db/query-parsers/sql.test.js @@ -7,6 +7,8 @@ const test = require('node:test') const assert = require('node:assert') + +const match = require('../../../lib/custom-assertions/match') const parseSql = require('../../../../lib/db/query-parsers/sql') const CATs = require('../../../lib/cross_agent_tests/sql_parsing') @@ -161,3 +163,115 @@ test('database query parser', async (t) => { } }) }) + +// test( +// 'handles statement with many solidus characters in a timely manner', +// { timeout: 1_000 }, +// (t) => { +// const times = 800_000 +// const json = JSON.stringify({ +// something: '/' + '\\'.repeat(times) + 'aaa' + '\\'.repeat(times) + '/' +// }) +// const statement = `insert into foo (col1) values('${json}')` +// const ps = parseSql(statement) +// +// assert.equal(ps.operation, 'insert') +// // assert.equal(ps.table, 'foo') +// assert.equal(ps.collection, 'foo') +// } +// ) + +// test('reports correct info if inline comments present', () => { +// const statement = `--insert into` +// }) + +test('reports correct info if multi-line comments present', () => { + const expected = { operation: 'insert', collection: 'foo', table: 'foo' } + + let statement = `/*insert into bar some stuff*/ + insert into foo (col1) values('bar') + ` + let found = parseSql(statement) + match(found, expected) + + statement = `/**** + insert into bar some stuff + ****/ + insert into + foo (col1) + values('bar') + ` + found = parseSql(statement) + match(found, expected) + + statement = `insert /* insert into bar */ into foo` + found = parseSql(statement) + match(found, expected) + + statement = `/* insert into bar some stuff */ insert into foo (col1)` + found = parseSql(statement) + match(found, expected) + + statement = `insert into /* insert into bar some stuff */ foo (col1)` + found = parseSql(statement) + match(found, expected) + + statement = `insert /* comments! */ into /* insert into bar some stuff */ foo /* MOAR */ (col1)` + found = parseSql(statement) + match(found, expected) +}) + +test('handles quoted names', () => { + const expected = { operation: 'insert', collection: 'foo', table: 'foo' } + + let statement = 'insert into `foo` (col1)' + let found = parseSql(statement) + match(found, expected) + + statement = `insert into 'foo' (col1)` + found = parseSql(statement) + match(found, expected) + + statement = `insert into "foo" (col1)` + found = parseSql(statement) + match(found, expected) +}) + +test('handles fully qualified names', () => { + const expected = { operation: 'insert', collection: 'myDb.foo', table: 'foo', database: 'myDb' } + + let statement = 'insert into `myDb.foo` (col1)' + let found = parseSql(statement) + match(found, expected) + + statement = `insert into 'myDb.foo' (col1)` + found = parseSql(statement) + match(found, expected) + + statement = `insert into "myDb.foo" (col1)` + found = parseSql(statement) + match(found, expected) +}) + +test('handles leading CTE', () => { + const statement = `with cte1 as ( + select + linking_col + from + linking_table + ) + select + foo_col + from + foo_table a + join cte1 linking_col + where + a.bar_col = 'bar'` + const found = parseSql(statement) + match(found, { + operation: 'select', + collection: 'foo_table', + table: 'foo_table', + database: undefined + }) +})