Skip to content

Commit d56d117

Browse files
authored
Skip indexing large files above 1mb, add option --max-file-byte-size (#271)
* Add configurable option to skip large files Previously, scip-typescript indexed all files regardless of file size. This could result in scip-typescript stalling progress to index very large files that were (frequently) auto-generated. This commit changes the default behavior to skip indexing files that are larger than 1mb, and makes this threshold configurable via the new `--max-file-byte-size` flag. * Print out when large files are skipped
1 parent 67cfeeb commit d56d117

4 files changed

+110
-4
lines changed

src/CommandLineOptions.ts

+23-4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import ts from 'typescript'
44

55
import packageJson from '../package.json'
66

7+
import { parseHumanByteSizeIntoNumber } from './parseHumanByteSizeIntoNumber'
78
import * as scip from './scip'
89

910
/** Configuration options to index a multi-project workspace. */
@@ -14,6 +15,8 @@ export interface MultiProjectOptions {
1415
yarnBerryWorkspaces: boolean
1516
pnpmWorkspaces: boolean
1617
globalCaches: boolean
18+
maxFileByteSize?: string
19+
maxFileByteSizeNumber?: number
1720
cwd: string
1821
output: string
1922
indexedProjects: Set<string>
@@ -36,7 +39,7 @@ export interface GlobalCache {
3639
}
3740

3841
export function mainCommand(
39-
indexAction: (projects: string[], otpions: MultiProjectOptions) => void
42+
indexAction: (projects: string[], options: MultiProjectOptions) => void
4043
): Command {
4144
const command = new Command()
4245
command
@@ -67,12 +70,28 @@ export function mainCommand(
6770
'--no-global-caches',
6871
'whether to disable global caches between TypeScript projects'
6972
)
73+
.option(
74+
'--max-file-byte-size <value>',
75+
'skip files that have a larger byte size than the provided value. Supported formats: 1kb, 1mb, 1gb.',
76+
'1mb'
77+
)
7078
.argument('[projects...]')
7179
.action((parsedProjects, parsedOptions) => {
72-
indexAction(
73-
parsedProjects as string[],
74-
parsedOptions as MultiProjectOptions
80+
const options = parsedOptions as MultiProjectOptions
81+
82+
// Parse and validate human-provided --max-file-byte-size value
83+
options.maxFileByteSizeNumber = parseHumanByteSizeIntoNumber(
84+
options.maxFileByteSize ?? '1mb'
7585
)
86+
if (isNaN(options.maxFileByteSizeNumber)) {
87+
console.error(
88+
`invalid byte size '${options.maxFileByteSize}'. To fix this problem, change the value of the flag --max-file-byte-size to use a valid byte size format: 1kb, 1mb, 1gb.`
89+
)
90+
process.exitCode = 1
91+
return
92+
}
93+
94+
indexAction(parsedProjects as string[], options)
7695
})
7796
return command
7897
}

src/FileIndexer.ts

+18
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import {
1515
} from './Descriptor'
1616
import { Input } from './Input'
1717
import { Packages } from './Packages'
18+
import { formatByteSizeAsHumanReadable } from './parseHumanByteSizeIntoNumber'
1819
import { Range } from './Range'
1920
import * as scip from './scip'
2021
import { ScipSymbol } from './ScipSymbol'
@@ -42,6 +43,23 @@ export class FileIndexer {
4243
// if (!this.sourceFile.fileName.includes('constructor')) {
4344
// return
4445
// }
46+
47+
const byteSize = Buffer.from(this.sourceFile.getText()).length
48+
if (
49+
this.options.maxFileByteSizeNumber &&
50+
byteSize > this.options.maxFileByteSizeNumber
51+
) {
52+
const humanSize = formatByteSizeAsHumanReadable(byteSize)
53+
const humanMaxSize = formatByteSizeAsHumanReadable(
54+
this.options.maxFileByteSizeNumber
55+
)
56+
console.log(
57+
`info: skipping file '${this.sourceFile.fileName}' because it has byte size ${humanSize} that exceeds the maximum threshold ${humanMaxSize}. ` +
58+
'If you intended to index this file, use the flag --max-file-byte-size to configure the maximum file size threshold.'
59+
)
60+
return
61+
}
62+
4563
this.emitSourceFileOccurrence()
4664
this.visit(this.sourceFile)
4765
}
+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import { test } from 'uvu'
2+
import * as assert from 'uvu/assert'
3+
4+
import { parseHumanByteSizeIntoNumber } from './parseHumanByteSizeIntoNumber'
5+
6+
function checkHumanByteSize(
7+
humanInput: string,
8+
expectedByteNumber: number
9+
): void {
10+
test(humanInput, () => {
11+
const obtained = parseHumanByteSizeIntoNumber(humanInput)
12+
assert.equal(obtained, expectedByteNumber)
13+
})
14+
}
15+
16+
// Invalid formats
17+
checkHumanByteSize('invalid', NaN)
18+
checkHumanByteSize('15tb', NaN)
19+
checkHumanByteSize('15b', NaN)
20+
21+
// All numeral
22+
checkHumanByteSize('1001', 1001)
23+
24+
// All lowercase
25+
checkHumanByteSize('1.2kb', 1_200)
26+
checkHumanByteSize('1.2mb', 1_200_000)
27+
checkHumanByteSize('1.2gb', 1_200_000_000)
28+
29+
// All uppercase
30+
checkHumanByteSize('1.2KB', 1_200)
31+
checkHumanByteSize('1.2MB', 1_200_000)
32+
checkHumanByteSize('1.2GB', 1_200_000_000)
33+
34+
// Mixed case
35+
checkHumanByteSize('1.2Kb', 1_200)
36+
checkHumanByteSize('1.2Mb', 1_200_000)
37+
checkHumanByteSize('1.2Gb', 1_200_000_000)

src/parseHumanByteSizeIntoNumber.ts

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
const kilo = 1_000
2+
const mega = 1_000_000
3+
const giga = 1_000_000_000
4+
5+
export function parseHumanByteSizeIntoNumber(humanByteSize: string): number {
6+
let value = humanByteSize.toLowerCase()
7+
let multiplier = 1
8+
if (value.endsWith('kb')) {
9+
multiplier = kilo
10+
value = value.slice(0, -2)
11+
} else if (value.endsWith('mb')) {
12+
multiplier = mega
13+
value = value.slice(0, -2)
14+
} else if (value.endsWith('gb')) {
15+
multiplier = giga
16+
value = value.slice(0, -2)
17+
}
18+
return Number.parseFloat(value) * multiplier
19+
}
20+
21+
export function formatByteSizeAsHumanReadable(byteSize: number): string {
22+
if (byteSize > giga) {
23+
return `${byteSize / giga}gb`
24+
}
25+
if (byteSize > mega) {
26+
return `${byteSize / mega}mb`
27+
}
28+
if (byteSize > kilo) {
29+
return `${byteSize / kilo}kb`
30+
}
31+
return byteSize.toString()
32+
}

0 commit comments

Comments
 (0)