@@ -98,6 +98,10 @@ const statementsWithEnds = [
98
98
'UNKNOWN' ,
99
99
] ;
100
100
101
+ // keywords that come directly before a table name.
102
+ // v1 - keeping it very simple.
103
+ const PRE_TABLE_KEYWORDS = / ^ f r o m $ | ^ j o i n $ | ^ i n t o $ / i;
104
+
101
105
const blockOpeners : Record < Dialect , string [ ] > = {
102
106
generic : [ 'BEGIN' , 'CASE' ] ,
103
107
psql : [ 'BEGIN' , 'CASE' , 'LOOP' , 'IF' ] ,
@@ -111,29 +115,36 @@ const blockOpeners: Record<Dialect, string[]> = {
111
115
interface ParseOptions {
112
116
isStrict : boolean ;
113
117
dialect : Dialect ;
118
+ identifyTables : boolean ;
114
119
}
115
120
116
121
function createInitialStatement ( ) : Statement {
117
122
return {
118
123
start : - 1 ,
119
124
end : 0 ,
120
125
parameters : [ ] ,
126
+ tables : [ ] ,
121
127
} ;
122
128
}
123
129
124
- function nextNonWhitespaceToken ( state : State ) : Token {
130
+ function nextNonWhitespaceToken ( state : State , dialect : Dialect ) : Token {
125
131
let token : Token ;
126
132
do {
127
133
state = initState ( { prevState : state } ) ;
128
- token = scanToken ( state ) ;
134
+ token = scanToken ( state , dialect ) ;
129
135
} while ( token . type === 'whitespace' ) ;
130
136
return token ;
131
137
}
132
138
133
139
/**
134
140
* Parser
135
141
*/
136
- export function parse ( input : string , isStrict = true , dialect : Dialect = 'generic' ) : ParseResult {
142
+ export function parse (
143
+ input : string ,
144
+ isStrict = true ,
145
+ dialect : Dialect = 'generic' ,
146
+ identifyTables = false ,
147
+ ) : ParseResult {
137
148
const topLevelState = initState ( { input } ) ;
138
149
const topLevelStatement : ParseResult = {
139
150
type : 'QUERY' ,
@@ -164,14 +175,13 @@ export function parse(input: string, isStrict = true, dialect: Dialect = 'generi
164
175
while ( prevState . position < topLevelState . end ) {
165
176
const tokenState = initState ( { prevState } ) ;
166
177
const token = scanToken ( tokenState , dialect ) ;
167
- const nextToken = nextNonWhitespaceToken ( tokenState ) ;
178
+ const nextToken = nextNonWhitespaceToken ( tokenState , dialect ) ;
168
179
169
180
if ( ! statementParser ) {
170
181
// ignore blank tokens before the start of a CTE / not part of a statement
171
182
if ( ! cteState . isCte && ignoreOutsideBlankTokens . includes ( token . type ) ) {
172
183
topLevelStatement . tokens . push ( token ) ;
173
184
prevState = tokenState ;
174
- continue ;
175
185
} else if (
176
186
! cteState . isCte &&
177
187
token . type === 'keyword' &&
@@ -181,7 +191,7 @@ export function parse(input: string, isStrict = true, dialect: Dialect = 'generi
181
191
topLevelStatement . tokens . push ( token ) ;
182
192
cteState . state = tokenState ;
183
193
prevState = tokenState ;
184
- continue ;
194
+
185
195
// If we're scanning in a CTE, handle someone putting a semicolon anywhere (after 'with',
186
196
// after semicolon, etc.) along it to "early terminate".
187
197
} else if ( cteState . isCte && token . type === 'semicolon' ) {
@@ -193,12 +203,12 @@ export function parse(input: string, isStrict = true, dialect: Dialect = 'generi
193
203
type : 'UNKNOWN' ,
194
204
executionType : 'UNKNOWN' ,
195
205
parameters : [ ] ,
206
+ tables : [ ] ,
196
207
} ) ;
197
208
cteState . isCte = false ;
198
209
cteState . asSeen = false ;
199
210
cteState . statementEnd = false ;
200
211
cteState . parens = 0 ;
201
- continue ;
202
212
} else if ( cteState . isCte && ! cteState . statementEnd ) {
203
213
if ( cteState . asSeen ) {
204
214
if ( token . value === '(' ) {
@@ -215,14 +225,13 @@ export function parse(input: string, isStrict = true, dialect: Dialect = 'generi
215
225
216
226
topLevelStatement . tokens . push ( token ) ;
217
227
prevState = tokenState ;
218
- continue ;
219
228
} else if ( cteState . isCte && cteState . statementEnd && token . value === ',' ) {
220
229
cteState . asSeen = false ;
221
230
cteState . statementEnd = false ;
222
231
223
232
topLevelStatement . tokens . push ( token ) ;
224
233
prevState = tokenState ;
225
- continue ;
234
+
226
235
// Ignore blank tokens after the end of the CTE till start of statement
227
236
} else if (
228
237
cteState . isCte &&
@@ -231,28 +240,32 @@ export function parse(input: string, isStrict = true, dialect: Dialect = 'generi
231
240
) {
232
241
topLevelStatement . tokens . push ( token ) ;
233
242
prevState = tokenState ;
234
- continue ;
243
+ } else {
244
+ statementParser = createStatementParserByToken ( token , nextToken , {
245
+ isStrict,
246
+ dialect,
247
+ identifyTables,
248
+ } ) ;
249
+ if ( cteState . isCte ) {
250
+ statementParser . getStatement ( ) . start = cteState . state . start ;
251
+ statementParser . getStatement ( ) . isCte = true ;
252
+ cteState . isCte = false ;
253
+ cteState . asSeen = false ;
254
+ cteState . statementEnd = false ;
255
+ }
235
256
}
257
+ } else {
258
+ statementParser . addToken ( token , nextToken ) ;
259
+ topLevelStatement . tokens . push ( token ) ;
260
+ prevState = tokenState ;
236
261
237
- statementParser = createStatementParserByToken ( token , nextToken , { isStrict, dialect } ) ;
238
- if ( cteState . isCte ) {
239
- statementParser . getStatement ( ) . start = cteState . state . start ;
240
- cteState . isCte = false ;
241
- cteState . asSeen = false ;
242
- cteState . statementEnd = false ;
262
+ const statement = statementParser . getStatement ( ) ;
263
+ if ( statement . endStatement ) {
264
+ statement . end = token . end ;
265
+ topLevelStatement . body . push ( statement as ConcreteStatement ) ;
266
+ statementParser = null ;
243
267
}
244
268
}
245
-
246
- statementParser . addToken ( token , nextToken ) ;
247
- topLevelStatement . tokens . push ( token ) ;
248
- prevState = tokenState ;
249
-
250
- const statement = statementParser . getStatement ( ) ;
251
- if ( statement . endStatement ) {
252
- statement . end = token . end ;
253
- topLevelStatement . body . push ( statement as ConcreteStatement ) ;
254
- statementParser = null ;
255
- }
256
269
}
257
270
258
271
// last statement without ending key
@@ -708,7 +721,7 @@ function createUnknownStatementParser(options: ParseOptions) {
708
721
function stateMachineStatementParser (
709
722
statement : Statement ,
710
723
steps : Step [ ] ,
711
- { isStrict, dialect } : ParseOptions ,
724
+ { isStrict, dialect, identifyTables } : ParseOptions ,
712
725
) : StatementParser {
713
726
let currentStepIndex = 0 ;
714
727
let prevToken : Token | undefined ;
@@ -808,6 +821,18 @@ function stateMachineStatementParser(
808
821
}
809
822
}
810
823
824
+ if (
825
+ identifyTables &&
826
+ PRE_TABLE_KEYWORDS . exec ( token . value ) &&
827
+ ! statement . isCte &&
828
+ statement . type ?. match ( / S E L E C T | I N S E R T / )
829
+ ) {
830
+ const tableValue = nextToken . value ;
831
+ if ( ! statement . tables . includes ( tableValue ) ) {
832
+ statement . tables . push ( tableValue ) ;
833
+ }
834
+ }
835
+
811
836
if (
812
837
token . type === 'parameter' &&
813
838
( token . value === '?' || ! statement . parameters . includes ( token . value ) )
0 commit comments