replace countlines.py with more advanced version from mcpyrate

Technologicat · Technologicat · commit ab9c8363a6a4 · 2021-05-09T02:23:09.000+03:00
diff --git a/countlines.py b/countlines.py
@@ -1,60 +1,73 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-"""Estimate project size in lines of code.
+"""Estimate project size in lines of code."""
 
-Ignores blank lines, docstrings, and whole-line comments."""
+# TODO: add sorting options: name, code count, SLOC count, code ratio.
 
 import os
 import re
 from operator import itemgetter
 
 def listpy(path):
-    return list(sorted(fn for fn in os.listdir(path) if fn.endswith(".py")))
+    return list(sorted(filename for filename in os.listdir(path) if filename.endswith(".py")))
 
-def loc(code, blanks, docstrings, comments):  # blanks et al.: include this item?
+def count_sloc(code, *, blanks, docstrings, comments):
+    """blanks et al.: include this item?"""
     if not docstrings:
         # TODO: make sure it's a docstring (and not some other """...""" string)
         code = re.sub(r'""".*?"""', r'', code, flags=(re.MULTILINE + re.DOTALL))
+        code = re.sub(r"'''.*?'''", r'', code, flags=(re.MULTILINE + re.DOTALL))
     lines = code.split("\n")
     if not blanks:
         lines = [line for line in lines if line.strip()]
     if not comments:
-        # TODO: removes only whole-line comments.
-        lines = [line for line in lines if not line.strip().startswith("#")]
+        lines = [line for line in lines if not line.strip().startswith("#")]  # ignore whole-line comments
     return len(lines)
 
-def analyze(items, blanks=False, docstrings=False, comments=False):
-    grandtotal = 0
-    for name, p in items:
-        path = os.path.join(*p)
-        files = listpy(path)
-        ns = []
-        for fn in files:
-            with open(os.path.join(path, fn), "rt", encoding="utf-8") as f:
+def report(paths):
+    print(f"Code size for {os.getcwd()}")
+    def format_name(s, width=25):
+        return s.ljust(width)
+    def format_number(n, width=5):
+        return str(n).rjust(width)
+    def format_path(s):  # ./subdir/something
+        def label(s):
+            if s == ".":
+                return "top level"
+            return s[2:]
+        return format_name(label(s))
+    codes_grandtotal = 0
+    slocs_grandtotal = 0
+    for path in paths:
+        filenames = listpy(path)
+        results = []
+        for filename in filenames:
+            with open(os.path.join(path, filename), "rt", encoding="utf-8") as f:
                 content = f.read()
-            ns.append(loc(content, blanks, docstrings, comments))
-        # report
-        print(f"{name}:")
-        for fn, n in sorted(zip(files, ns), key=itemgetter(1)):
-            print(f"    {fn} {n}")
-        grouptotal = sum(ns)
-        print(f"  total for {name} {grouptotal}")
-        grandtotal += grouptotal
-    print(f"grand total {grandtotal}")
+            code = count_sloc(content, blanks=False, docstrings=False, comments=False)
+            sloc = count_sloc(content, blanks=True, docstrings=True, comments=True)
+            results.append((code, sloc))
+
+        if results:
+            codes, slocs = zip(*results)
+            codes = sum(codes)
+            slocs = sum(slocs)
+            print(f"\n  {format_path(path)}   {format_number(codes)} / {format_number(slocs)}  {int(codes / slocs * 100):d}% code")
+            for filename, (code, sloc) in sorted(zip(filenames, results), key=itemgetter(1)):
+                print(f"    {format_name(filename)} {format_number(code)} / {format_number(sloc)}  {int(code / sloc * 100):d}% code")
+            codes_grandtotal += codes
+            slocs_grandtotal += slocs
+    print(f"\n{format_name('Total')}     {format_number(codes_grandtotal)} / {format_number(slocs_grandtotal)}  {int(codes_grandtotal / slocs_grandtotal * 100):d}% code")
 
 def main():
-    items = (("top level", ["."]),
-             ("regular code", ["unpythonic"]),
-             ("regular code tests", ["unpythonic", "tests"]),
-             ("testing framework (not counting macros)", ["unpythonic", "test"]),
-             ("REPL/networking code", ["unpythonic", "net"]),
-             ("REPL/networking tests", ["unpythonic", "net", "tests"]),
-             ("macros", ["unpythonic", "syntax"]),
-             ("macro tests", ["unpythonic", "syntax", "tests"]))
-    print("Raw (with blanks, docstrings and comments)")
-    analyze(items, blanks=True, docstrings=True, comments=True)
-    print("\nFiltered (non-blank code lines only)")
-    analyze(items)
+    blacklist = [".git", "build", "dist", "__pycache__", "00_stuff"]
+    paths = []
+    for root, dirs, files in os.walk("."):
+        paths.append(root)
+        for x in blacklist:
+            if x in dirs:
+                dirs.remove(x)
+    report(sorted(paths))
 
 if __name__ == '__main__':
     main()