Skip to content

Commit 434787f

Browse files
committed
Genericize the file parallel operations and make an email scraper
1 parent da9cd12 commit 434787f

File tree

3 files changed

+294
-70
lines changed

3 files changed

+294
-70
lines changed

cleancode.py

+81-69
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@ def green(text, **kwargs):
3939
def yellow(text, **kwargs):
4040
print('\033[33m', text, '\033[0m', sep='', **kwargs)
4141

42-
def get_win_path(posix_path):
42+
def removeNonAscii(string):
43+
return string.encode('ascii', errors='ignore').decode()
44+
45+
def getWinPath(posix_path):
4346
return posix_path.replace('/c/', 'C:\\')
4447

4548
def tryClangFormatPath(tryPath, verbose):
@@ -73,7 +76,7 @@ def setClangFormatPath(firstTry, verbose):
7376
return
7477

7578
pth = os.popen('which clang-format').read()
76-
pth = get_win_path(pth[:-1] + '.exe')
79+
pth = getWinPath(pth[:-1] + '.exe')
7780
if tryClangFormatPath(pth, verbose):
7881
return
7982

@@ -85,25 +88,6 @@ def setClangFormatPath(firstTry, verbose):
8588
red("Can't find clang-format.exe.")
8689
exit(1)
8790

88-
# Returns a list.
89-
def allFiles(dirs):
90-
'''Lists all files in the given directories'''
91-
paths = []
92-
93-
for dirPath in dirs:
94-
if os.path.isfile(dirPath):
95-
paths.append(dirPath)
96-
else:
97-
for root, dirs, filenames in os.walk(dirPath, topdown=True):
98-
dirs[:] = [d for d in dirs if d not in skipFolders]
99-
100-
for f in filenames:
101-
path = os.path.join(root, f)
102-
if os.path.isfile(path):
103-
paths.append(path)
104-
105-
return paths
106-
10791
def lineUnwrap(file, doWrite):
10892
'''Unwrap lines of C-like code to be semantically equivalent but as few lines as possible'''
10993

@@ -198,80 +182,108 @@ def commentFix(file, doWrite):
198182
with open(file, 'w') as outF:
199183
outF.write(outFile)
200184

201-
def cleanFiles(files, doCRLF, doWrite, doLineUnwrap, doEatWhite, doClangFormat, clangFormatArgs, clangFormatPath, verbose):
202-
for file in files:
203-
print(file, end=' ')
185+
def cleanFile(file, doCRLF, doWrite, doLineUnwrap, doEatWhite, doClangFormat, clangFormatArgs, clangFormatPath, verbose):
186+
print(file, end=' ')
204187

205-
filename, fileext = os.path.splitext(file)
188+
filename, fileext = os.path.splitext(file)
206189

207-
if fileext in clangFilterExts:
208-
commentFix(file, doWrite)
190+
if fileext in clangFilterExts:
191+
commentFix(file, doWrite)
209192

210-
if fileext in clangFilterExts and doLineUnwrap:
211-
lineUnwrap(file, doWrite)
193+
if fileext in clangFilterExts and doLineUnwrap:
194+
lineUnwrap(file, doWrite)
212195

213-
if fileext in eatWhiteExts and doEatWhite:
214-
eatwhite.fixFileWhitespace(file, doCRLF, doWrite, False, 0, 0, verbose, ' ')
196+
if fileext in eatWhiteExts and doEatWhite:
197+
eatwhite.fixFileWhitespace(file, doCRLF, doWrite, False, 0, 0, verbose, ' ')
215198

216-
if fileext in clangFilterExts and doClangFormat:
217-
fileInTmp = file
218-
if fileext in clangFilterCPPExts:
219-
fileInTmp = file + '_CF.cpp'
220-
shutil.copyfile(file, fileInTmp)
199+
if fileext in clangFilterExts and doClangFormat:
200+
fileInTmp = file
201+
if fileext in clangFilterCPPExts:
202+
fileInTmp = file + '_CF.cpp'
203+
shutil.copyfile(file, fileInTmp)
221204

222-
fileOutTmp = file + '.CF'
223-
cmd = '"' + clangFormatPath.as_posix() + '" ' + clangFormatArgs + ' ' + fileInTmp + ' > ' + fileOutTmp
224-
os.system(cmd)
205+
fileOutTmp = file + '.CF'
206+
cmd = '"' + clangFormatPath.as_posix() + '" ' + clangFormatArgs + ' ' + fileInTmp + ' > ' + fileOutTmp
207+
os.system(cmd)
225208

226-
if fileInTmp != file:
227-
os.remove(fileInTmp)
209+
if fileInTmp != file:
210+
os.remove(fileInTmp)
228211

229-
if not os.path.exists(fileOutTmp):
230-
red(' Failed to create temp output file:' + fileOutTmp)
231-
else:
232-
if filecmp.cmp(file, fileOutTmp):
233-
green(' Clang-format matched.')
234-
elif os.path.getsize(fileOutTmp) > 0:
235-
if doWrite:
236-
#os.remove(file)
237-
shutil.copyfile(fileOutTmp, file)
238-
yellow(' Clang-format saved.')
239-
else:
240-
red(' Clang-format changes not saved.')
241-
elif os.path.getsize(file) > 0:
242-
red(' Temp output file should not be empty!!!\nCommand:' + cmd)
212+
if not os.path.exists(fileOutTmp):
213+
red(' Failed to create temp output file:' + fileOutTmp)
214+
else:
215+
if filecmp.cmp(file, fileOutTmp):
216+
green(' Clang-format matched.')
217+
elif os.path.getsize(fileOutTmp) > 0:
218+
if doWrite:
219+
#os.remove(file)
220+
shutil.copyfile(fileOutTmp, file)
221+
yellow(' Clang-format saved.')
243222
else:
244-
red(' Input and temp files empty')
223+
red(' Clang-format changes not saved.')
224+
elif os.path.getsize(file) > 0:
225+
red(' Temp output file should not be empty!!!\nCommand:' + cmd)
226+
else:
227+
red(' Input and temp files empty')
245228

246-
os.remove(fileOutTmp)
247-
elif fileext not in eatWhiteExts:
248-
green('Skipping: ' + file)
229+
os.remove(fileOutTmp)
230+
elif fileext not in eatWhiteExts:
231+
green('Skipping: ' + file)
232+
else:
233+
print('')
234+
235+
def allFiles(dirs, allow_suffix = ''):
236+
'''Generates all files in the given directories and if allow_suffix is provided, filters it'''
237+
238+
for dirPath in dirs:
239+
if os.path.isfile(dirPath) and (allow_suffix in dirPath or not allow_suffix):
240+
dirPath = removeNonAscii(dirPath)
241+
yield dirPath
249242
else:
250-
print('')
243+
for root, dirs, filenames in os.walk(dirPath, topdown=True):
244+
dirs[:] = [d for d in dirs if d not in skipFolders]
251245

252-
def cleanDirs(dirs, doCRLF, doWrite, doLineUnwrap, doEatWhite, doClangFormat, doParallel, verbose):
253-
files = allFiles(dirs)
246+
for f in filenames:
247+
path = os.path.join(root, f)
248+
path = removeNonAscii(path)
249+
if os.path.isfile(path) and (allow_suffix in path or not allow_suffix):
250+
yield path
251+
252+
def processFileList(file_chunk, func, args):
253+
'''Helper function for processFilesParallel that takes a list of files and calls func on them serially'''
254+
255+
for f in file_chunk:
256+
func(f, *args)
257+
258+
def processFilesParallel(dirs, allow_suffix, func, args, doParallel):
259+
'''Run func with args on all files in all dirs that contain allow_suffix in their name'''
260+
'''func must take filename as first arg.'''
261+
262+
all_files = allFiles(dirs, allow_suffix)
254263

255264
if doParallel:
265+
# Put generated files in list for hashing
266+
files = list(all_files)
267+
256268
# Reorder the list of files arbitrarily to decorrelate the easy ones so thread workload is more uniform
257269
files = [y for x,y in sorted(zip([hashlib.md5(f.encode('utf-8')).hexdigest() for f in files], files))]
258270

259271
# Parallel implementation
260-
filesPerChunk = math.ceil(len(files) / multiprocessing.cpu_count())
261-
fileChunks = [files[i * filesPerChunk:(i + 1) * filesPerChunk] for i in range((len(files) + filesPerChunk - 1) // filesPerChunk )]
272+
core_count = multiprocessing.cpu_count()
273+
files_per_chunk = int((len(files) + core_count - 1) / core_count)
274+
file_chunks = [files[i * files_per_chunk:(i + 1) * files_per_chunk] for i in range(core_count)]
262275

263276
processes = []
264-
for cur_test in fileChunks:
265-
p = multiprocessing.Process(target=cleanFiles, args=(cur_test, doCRLF, doWrite, doLineUnwrap, doEatWhite,
266-
doClangFormat, clangFormatArgs, clangFormatPath, verbose))
277+
for file_chunk in file_chunks:
278+
p = multiprocessing.Process(target=processFileList, args=(file_chunk, func, args))
267279
processes.append(p)
268280
p.start()
269281

270282
for process in processes:
271283
process.join()
272284

273285
else:
274-
cleanFiles(files, doCRLF, doWrite, doLineUnwrap, doEatWhite, doClangFormat, clangFormatArgs, clangFormatPath, verbose)
286+
processFileList(all_files, func, args)
275287

276288
def main():
277289
parser = argparse.ArgumentParser(
@@ -328,7 +340,7 @@ def main():
328340
setClangFormatPath(args.clang_format_path, args.verbose)
329341
print('Using:', clangFormatPath, '\n')
330342

331-
cleanDirs(args.fname, args.to_crlf, not args.no_write, args.do_line_unwrap, args.do_eatwhite, args.do_clang_format, args.parallel, args.verbose)
343+
processFilesParallel(args.fname, '', cleanFile, (args.to_crlf, not args.no_write, args.do_line_unwrap, args.do_eatwhite, args.do_clang_format, clangFormatArgs, clangFormatPath, args.verbose), args.parallel)
332344

333345
if __name__ == "__main__":
334346
startTime = timeit.default_timer()

eatwhite.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def green(text, **kwargs):
2121
def yellow(text, **kwargs):
2222
print('\033[33m', text, '\033[0m', sep='', **kwargs)
2323

24-
def replaceLoop(content, oldt, newt, verbose, printNL):
24+
def replaceLoop(content, oldt, newt, verbose = False, printNL = None):
2525
replCnt = content.count(oldt)
2626
while replCnt > 0:
2727
if verbose:

0 commit comments

Comments
 (0)