Skip to content

Commit 158135d

Browse files
azmy60rathboma
andauthored
Add initial support for table identification (#76)
Co-authored-by: Matthew Rathbone <[email protected]>
1 parent 1f28746 commit 158135d

10 files changed

+192
-32
lines changed

package.json

+1-2
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,11 @@
3434
"prettier": "^2.3.2",
3535
"terser-webpack-plugin": "^5.1.1",
3636
"ts-loader": "^8.0.17",
37-
"ts-node": "^10.9.1",
37+
"ts-node": "^10.9.2",
3838
"typescript": "^4.1.5",
3939
"webpack": "^5.11.1",
4040
"webpack-cli": "^4.3.1"
4141
},
42-
"dependencies": {},
4342
"engines": {
4443
"node": ">= 10.13"
4544
}

src/defines.ts

+5
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ export type ExecutionType = 'LISTING' | 'MODIFICATION' | 'INFORMATION' | 'ANON_B
7979
export interface IdentifyOptions {
8080
strict?: boolean;
8181
dialect?: Dialect;
82+
identifyTables?: boolean;
8283
}
8384

8485
export interface IdentifyResult {
@@ -88,6 +89,7 @@ export interface IdentifyResult {
8889
type: StatementType;
8990
executionType: ExecutionType;
9091
parameters: string[];
92+
tables: string[];
9193
}
9294

9395
export interface Statement {
@@ -101,6 +103,8 @@ export interface Statement {
101103
algorithm?: number;
102104
sqlSecurity?: number;
103105
parameters: string[];
106+
tables: string[];
107+
isCte?: boolean;
104108
}
105109

106110
export interface ConcreteStatement extends Statement {
@@ -124,6 +128,7 @@ export interface Token {
124128
| 'semicolon'
125129
| 'keyword'
126130
| 'parameter'
131+
| 'table'
127132
| 'unknown';
128133
value: string;
129134
start: number;

src/index.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ export function identify(query: string, options: IdentifyOptions = {}): Identify
2121
throw new Error(`Unknown dialect. Allowed values: ${DIALECTS.join(', ')}`);
2222
}
2323

24-
const result = parse(query, isStrict, dialect);
24+
const result = parse(query, isStrict, dialect, options.identifyTables);
2525

2626
return result.body.map((statement) => {
2727
const result: IdentifyResult = {
@@ -32,6 +32,7 @@ export function identify(query: string, options: IdentifyOptions = {}): Identify
3232
executionType: statement.executionType,
3333
// we want to sort the postgres params: $1 $2 $3, regardless of the order they appear
3434
parameters: dialect === 'psql' ? statement.parameters.sort() : statement.parameters,
35+
tables: statement.tables || [],
3536
};
3637
return result;
3738
});

src/parser.ts

+53-28
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ const statementsWithEnds = [
9898
'UNKNOWN',
9999
];
100100

101+
// keywords that come directly before a table name.
102+
// v1 - keeping it very simple.
103+
const PRE_TABLE_KEYWORDS = /^from$|^join$|^into$/i;
104+
101105
const blockOpeners: Record<Dialect, string[]> = {
102106
generic: ['BEGIN', 'CASE'],
103107
psql: ['BEGIN', 'CASE', 'LOOP', 'IF'],
@@ -111,29 +115,36 @@ const blockOpeners: Record<Dialect, string[]> = {
111115
interface ParseOptions {
112116
isStrict: boolean;
113117
dialect: Dialect;
118+
identifyTables: boolean;
114119
}
115120

116121
function createInitialStatement(): Statement {
117122
return {
118123
start: -1,
119124
end: 0,
120125
parameters: [],
126+
tables: [],
121127
};
122128
}
123129

124-
function nextNonWhitespaceToken(state: State): Token {
130+
function nextNonWhitespaceToken(state: State, dialect: Dialect): Token {
125131
let token: Token;
126132
do {
127133
state = initState({ prevState: state });
128-
token = scanToken(state);
134+
token = scanToken(state, dialect);
129135
} while (token.type === 'whitespace');
130136
return token;
131137
}
132138

133139
/**
134140
* Parser
135141
*/
136-
export function parse(input: string, isStrict = true, dialect: Dialect = 'generic'): ParseResult {
142+
export function parse(
143+
input: string,
144+
isStrict = true,
145+
dialect: Dialect = 'generic',
146+
identifyTables = false,
147+
): ParseResult {
137148
const topLevelState = initState({ input });
138149
const topLevelStatement: ParseResult = {
139150
type: 'QUERY',
@@ -164,14 +175,13 @@ export function parse(input: string, isStrict = true, dialect: Dialect = 'generi
164175
while (prevState.position < topLevelState.end) {
165176
const tokenState = initState({ prevState });
166177
const token = scanToken(tokenState, dialect);
167-
const nextToken = nextNonWhitespaceToken(tokenState);
178+
const nextToken = nextNonWhitespaceToken(tokenState, dialect);
168179

169180
if (!statementParser) {
170181
// ignore blank tokens before the start of a CTE / not part of a statement
171182
if (!cteState.isCte && ignoreOutsideBlankTokens.includes(token.type)) {
172183
topLevelStatement.tokens.push(token);
173184
prevState = tokenState;
174-
continue;
175185
} else if (
176186
!cteState.isCte &&
177187
token.type === 'keyword' &&
@@ -181,7 +191,7 @@ export function parse(input: string, isStrict = true, dialect: Dialect = 'generi
181191
topLevelStatement.tokens.push(token);
182192
cteState.state = tokenState;
183193
prevState = tokenState;
184-
continue;
194+
185195
// If we're scanning in a CTE, handle someone putting a semicolon anywhere (after 'with',
186196
// after semicolon, etc.) along it to "early terminate".
187197
} else if (cteState.isCte && token.type === 'semicolon') {
@@ -193,12 +203,12 @@ export function parse(input: string, isStrict = true, dialect: Dialect = 'generi
193203
type: 'UNKNOWN',
194204
executionType: 'UNKNOWN',
195205
parameters: [],
206+
tables: [],
196207
});
197208
cteState.isCte = false;
198209
cteState.asSeen = false;
199210
cteState.statementEnd = false;
200211
cteState.parens = 0;
201-
continue;
202212
} else if (cteState.isCte && !cteState.statementEnd) {
203213
if (cteState.asSeen) {
204214
if (token.value === '(') {
@@ -215,14 +225,13 @@ export function parse(input: string, isStrict = true, dialect: Dialect = 'generi
215225

216226
topLevelStatement.tokens.push(token);
217227
prevState = tokenState;
218-
continue;
219228
} else if (cteState.isCte && cteState.statementEnd && token.value === ',') {
220229
cteState.asSeen = false;
221230
cteState.statementEnd = false;
222231

223232
topLevelStatement.tokens.push(token);
224233
prevState = tokenState;
225-
continue;
234+
226235
// Ignore blank tokens after the end of the CTE till start of statement
227236
} else if (
228237
cteState.isCte &&
@@ -231,28 +240,32 @@ export function parse(input: string, isStrict = true, dialect: Dialect = 'generi
231240
) {
232241
topLevelStatement.tokens.push(token);
233242
prevState = tokenState;
234-
continue;
243+
} else {
244+
statementParser = createStatementParserByToken(token, nextToken, {
245+
isStrict,
246+
dialect,
247+
identifyTables,
248+
});
249+
if (cteState.isCte) {
250+
statementParser.getStatement().start = cteState.state.start;
251+
statementParser.getStatement().isCte = true;
252+
cteState.isCte = false;
253+
cteState.asSeen = false;
254+
cteState.statementEnd = false;
255+
}
235256
}
257+
} else {
258+
statementParser.addToken(token, nextToken);
259+
topLevelStatement.tokens.push(token);
260+
prevState = tokenState;
236261

237-
statementParser = createStatementParserByToken(token, nextToken, { isStrict, dialect });
238-
if (cteState.isCte) {
239-
statementParser.getStatement().start = cteState.state.start;
240-
cteState.isCte = false;
241-
cteState.asSeen = false;
242-
cteState.statementEnd = false;
262+
const statement = statementParser.getStatement();
263+
if (statement.endStatement) {
264+
statement.end = token.end;
265+
topLevelStatement.body.push(statement as ConcreteStatement);
266+
statementParser = null;
243267
}
244268
}
245-
246-
statementParser.addToken(token, nextToken);
247-
topLevelStatement.tokens.push(token);
248-
prevState = tokenState;
249-
250-
const statement = statementParser.getStatement();
251-
if (statement.endStatement) {
252-
statement.end = token.end;
253-
topLevelStatement.body.push(statement as ConcreteStatement);
254-
statementParser = null;
255-
}
256269
}
257270

258271
// last statement without ending key
@@ -708,7 +721,7 @@ function createUnknownStatementParser(options: ParseOptions) {
708721
function stateMachineStatementParser(
709722
statement: Statement,
710723
steps: Step[],
711-
{ isStrict, dialect }: ParseOptions,
724+
{ isStrict, dialect, identifyTables }: ParseOptions,
712725
): StatementParser {
713726
let currentStepIndex = 0;
714727
let prevToken: Token | undefined;
@@ -808,6 +821,18 @@ function stateMachineStatementParser(
808821
}
809822
}
810823

824+
if (
825+
identifyTables &&
826+
PRE_TABLE_KEYWORDS.exec(token.value) &&
827+
!statement.isCte &&
828+
statement.type?.match(/SELECT|INSERT/)
829+
) {
830+
const tableValue = nextToken.value;
831+
if (!statement.tables.includes(tableValue)) {
832+
statement.tables.push(tableValue);
833+
}
834+
}
835+
811836
if (
812837
token.type === 'parameter' &&
813838
(token.value === '?' || !statement.parameters.includes(token.value))

test/identifier/inner-statements.spec.ts

+4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ describe('identifier', () => {
1616
type: 'INSERT',
1717
executionType: 'MODIFICATION',
1818
parameters: [],
19+
tables: [],
1920
},
2021
];
2122

@@ -34,6 +35,7 @@ describe('identifier', () => {
3435
type: 'INSERT',
3536
executionType: 'MODIFICATION',
3637
parameters: [],
38+
tables: [],
3739
},
3840
];
3941

@@ -54,6 +56,7 @@ describe('identifier', () => {
5456
type: 'INSERT',
5557
executionType: 'MODIFICATION',
5658
parameters: [],
59+
tables: [],
5760
},
5861
];
5962

@@ -75,6 +78,7 @@ describe('identifier', () => {
7578
type: 'INSERT',
7679
executionType: 'MODIFICATION',
7780
parameters: [],
81+
tables: [],
7882
},
7983
];
8084

0 commit comments

Comments
 (0)