From b53733e2faa6426caf5cfed56bcac31e3d1cd2c1 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Fri, 3 Jun 2022 12:48:37 -0400
Subject: [PATCH 1/6] Add support for profiling benchmarks

---
 doc/cli.rst             |  2 ++
 pyperf/_manager.py      |  3 +++
 pyperf/_process_time.py | 42 ++++++++++++++++++++++++++++---
 pyperf/_runner.py       | 55 ++++++++++++++++++++++++++++++++++++++---
 pyperf/_utils.py        | 21 ++++++++++++++++
 5 files changed, 116 insertions(+), 7 deletions(-)

diff --git a/doc/cli.rst b/doc/cli.rst
index 7c4229e5..3cac1f4a 100644
--- a/doc/cli.rst
+++ b/doc/cli.rst
@@ -491,6 +491,7 @@ Usage
         [--duplicate DUPLICATE]
         [-s SETUP]
         [--teardown TEARDOWN]
+        [--profile PROFILE]
         stmt [stmt ...]
 
 Options:
@@ -512,6 +513,7 @@ Options:
 * ``--duplicate=DUPLICATE``: Duplicate statements (``stmt`` statements, not
   ``SETUP``) to reduce the overhead of the outer loop and multiply
   inner loops by DUPLICATE (see ``--inner-loops`` option).
+* ``--profile=PROFILE``: Run the benchmark inside the cProfile profiler and output to the given file. This is a convenient way to profile a specific benchmark, but it will make the actual benchmark timings much less accurate.
 
 .. note::
    timeit ``-n`` (number) and ``-r`` (repeat) options become ``-l`` (loops) and
diff --git a/pyperf/_manager.py b/pyperf/_manager.py
index f29af1fa..d478708d 100644
--- a/pyperf/_manager.py
+++ b/pyperf/_manager.py
@@ -66,6 +66,9 @@ def worker_cmd(self, calibrate_loops, calibrate_warmups, wpipe):
         if args.track_memory:
             cmd.append('--track-memory')
 
+        if args.profile:
+            cmd.extend(['--profile', args.profile])
+
         if self.runner._add_cmdline_args:
             self.runner._add_cmdline_args(cmd, args)
 
diff --git a/pyperf/_process_time.py b/pyperf/_process_time.py
index d3589b28..b184e210 100644
--- a/pyperf/_process_time.py
+++ b/pyperf/_process_time.py
@@ -17,6 +17,7 @@
 import os
 import subprocess
 import sys
+import tempfile
 import time
 
 try:
@@ -35,11 +36,31 @@ def get_max_rss():
         return 0
 
 
-def bench_process(loops, args, kw):
+def merge_profile_stats_files(src, dst):
+    """
+    Merging one existing pstats file into another.
+    """
+    import pstats
+    if os.path.isfile(dst):
+        src_stats = pstats.Stats(src)
+        dst_stats = pstats.Stats(dst)
+        dst_stats.add(src_stats)
+        dst_stats.dump_stats(dst)
+        os.unlink(src)
+    else:
+        os.rename(src, dst)
+
+
+def bench_process(loops, args, kw, profile_filename=None):
     max_rss = 0
     range_it = range(loops)
     start_time = time.perf_counter()
 
+    if profile_filename:
+        with tempfile.NamedTemporaryFile(suffix=".profile", delete=False) as fh:
+            temp_profile_filename = fh.name
+        args = [args[0], "-m", "cProfile", "-o", temp_profile_filename] + args[1:]
+
     for loop in range_it:
         start_rss = get_max_rss()
 
@@ -51,6 +72,8 @@ def bench_process(loops, args, kw):
         if exitcode != 0:
             print("Command failed with exit code %s" % exitcode,
                   file=sys.stderr)
+            if profile_filename:
+                os.unlink(temp_profile_filename)
             sys.exit(exitcode)
 
         proc = None
@@ -58,6 +81,11 @@ def bench_process(loops, args, kw):
         rss = get_max_rss() - start_rss
         max_rss = max(max_rss, rss)
 
+        if profile_filename:
+            merge_profile_stats_files(
+                temp_profile_filename, profile_filename
+            )
+
     dt = time.perf_counter() - start_time
     return (dt, max_rss)
 
@@ -70,10 +98,18 @@ def main():
         sys.exit(1)
 
     if len(sys.argv) < 3:
-        print("Usage: %s %s loops program [arg1 arg2 ...]"
+        print("Usage: %s %s loops program [arg1 arg2 ...] [--profile profile]"
               % (os.path.basename(sys.executable), __file__))
         sys.exit(1)
 
+    profile_idx = sys.argv.index("--profile")
+    if profile_idx != -1:
+        profile_filename = sys.argv[profile_idx + 1]
+        del sys.argv[profile_idx]
+        del sys.argv[profile_idx]
+    else:
+        profile_filename = None
+
     loops = int(sys.argv[1])
     args = sys.argv[2:]
 
@@ -88,7 +124,7 @@ def main():
         kw['stdout'] = devnull
     kw['stderr'] = subprocess.STDOUT
 
-    dt, max_rss = bench_process(loops, args, kw)
+    dt, max_rss = bench_process(loops, args, kw, profile_filename)
 
     if devnull is not None:
         devnull.close()
diff --git a/pyperf/_runner.py b/pyperf/_runner.py
index b70e3e52..6c0d9e2b 100644
--- a/pyperf/_runner.py
+++ b/pyperf/_runner.py
@@ -12,7 +12,8 @@
                                set_highest_priority)
 from pyperf._formatter import format_timedelta
 from pyperf._utils import (MS_WINDOWS, MAC_OS, abs_executable,
-                           WritePipe, get_python_names)
+                           WritePipe, get_python_names,
+                           merge_profile_stats)
 from pyperf._worker import WorkerProcessTask
 
 
@@ -48,6 +49,19 @@ def parse_python_names(names):
     return parts
 
 
+def profiling_wrapper(func):
+    """
+    Wrap a function to collect profiling.
+    """
+    import cProfile
+    profiler = cProfile.Profile()
+
+    def profiling_func(*args):
+        return profiler.runcall(func, *args)
+
+    return profiler, profiling_func
+
+
 class CLIError(Exception):
     pass
 
@@ -222,6 +236,11 @@ def __init__(self, values=None, warmups=None, processes=None,
                                  'PYTHON as CHANGED_NAME '
                                  'and REF_PYTHON as REF_NAME in results')
 
+        parser.add_argument('--profile',
+                            type=str,
+                            help='Collect profile data using cProfile '
+                                 'and output to the given file.')
+
         memory = parser.add_mutually_exclusive_group()
         memory.add_argument('--tracemalloc', action="store_true",
                             help='Trace memory allocations using tracemalloc')
@@ -454,12 +473,21 @@ def bench_time_func(self, name, time_func, *args, **kwargs):
         if not self._check_worker_task():
             return None
 
+        if self.args.profile:
+            profiler, time_func = profiling_wrapper(time_func)
+
         def task_func(task, loops):
             return time_func(loops, *args)
 
         task = WorkerProcessTask(self, name, task_func, metadata)
+
         task.inner_loops = inner_loops
-        return self._main(task)
+        result = self._main(task)
+
+        if self.args.profile:
+            merge_profile_stats(profiler, self.args.profile)
+
+        return result
 
     def bench_func(self, name, func, *args, **kwargs):
         """"Benchmark func(*args)."""
@@ -474,6 +502,9 @@ def bench_func(self, name, func, *args, **kwargs):
         if args:
             func = functools.partial(func, *args)
 
+        if self.args.profile:
+            profiler, func = profiling_wrapper(func)
+
         def task_func(task, loops):
             # use fast local variables
             local_timer = time.perf_counter
@@ -494,7 +525,12 @@ def task_func(task, loops):
 
         task = WorkerProcessTask(self, name, task_func, metadata)
         task.inner_loops = inner_loops
-        return self._main(task)
+        result = self._main(task)
+
+        if self.args.profile:
+            merge_profile_stats(profiler, self.args.profile)
+
+        return result
 
     def bench_async_func(self, name, func, *args, **kwargs):
         """Benchmark await func(*args)"""
@@ -509,6 +545,9 @@ def bench_async_func(self, name, func, *args, **kwargs):
         if args:
             func = functools.partial(func, *args)
 
+        if self.args.profile:
+            profiler, func = profiling_wrapper(func)
+
         def task_func(task, loops):
             if loops != 1:
                 async def main():
@@ -549,7 +588,12 @@ async def main():
 
         task = WorkerProcessTask(self, name, task_func, metadata)
         task.inner_loops = inner_loops
-        return self._main(task)
+        result = self._main(task)
+
+        if self.args.profile:
+            merge_profile_stats(profiler, self.args.profile)
+
+        return result
 
     def timeit(self, name, stmt=None, setup="pass", teardown="pass",
                inner_loops=None, duplicate=None, metadata=None, globals=None):
@@ -671,6 +715,9 @@ def bench_command(self, name, command):
         if not self._check_worker_task():
             return None
 
+        if self.args.profile:
+            command.extend(["--profile", self.args.profile])
+
         # Use lazy import to limit imports on 'import pyperf'
         from pyperf._command import BenchCommandTask
         task = BenchCommandTask(self, name, command)
diff --git a/pyperf/_utils.py b/pyperf/_utils.py
index 05dc54a6..99225466 100644
--- a/pyperf/_utils.py
+++ b/pyperf/_utils.py
@@ -405,3 +405,24 @@ def geometric_mean(data):
     if not data:
         raise ValueError("empty data")
     return _geometric_mean(data)
+
+
+def merge_profile_stats(profiler, dst):
+    """
+    Save pstats by merging into an existing file.
+    """
+    import pstats
+    if os.path.isfile(dst):
+        try:
+            src_stats = pstats.Stats(profiler)
+        except TypeError:
+            # If no actual stats were collected, a TypeError is raised
+            # and we don't need to merge anything into the output.
+            return
+
+        dst_stats = pstats.Stats(dst)
+        dst_stats.add(src_stats)
+        dst_stats.dump_stats(dst)
+    else:
+        profiler.dump_stats(dst)
+

From d566d5277499a8ccd8f1654fafe6a32607b2dda7 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Mon, 6 Jun 2022 09:20:08 -0400
Subject: [PATCH 2/6] Fix unit tests

---
 pyperf/_process_time.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyperf/_process_time.py b/pyperf/_process_time.py
index b184e210..4d010500 100644
--- a/pyperf/_process_time.py
+++ b/pyperf/_process_time.py
@@ -102,8 +102,8 @@ def main():
               % (os.path.basename(sys.executable), __file__))
         sys.exit(1)
 
-    profile_idx = sys.argv.index("--profile")
-    if profile_idx != -1:
+    if "--profile" in sys.argv:
+        profile_idx = sys.argv.index("--profile")
         profile_filename = sys.argv[profile_idx + 1]
         del sys.argv[profile_idx]
         del sys.argv[profile_idx]

From a960e67066ad22f7ec5eb032022fd5caa01da68f Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Mon, 13 Jun 2022 16:19:24 -0400
Subject: [PATCH 3/6] Add unit tests

---
 pyperf/tests/test_runner.py | 44 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/pyperf/tests/test_runner.py b/pyperf/tests/test_runner.py
index a61053ba..b3501a82 100644
--- a/pyperf/tests/test_runner.py
+++ b/pyperf/tests/test_runner.py
@@ -1,5 +1,6 @@
 import collections
 import os.path
+import pstats
 import sys
 import tempfile
 import textwrap
@@ -73,7 +74,7 @@ class ClockInfo:
         self.assertEqual(bench.get_nrun(), 1)
 
         return Result(runner, bench, stdout)
-
+    
     def test_worker(self):
         result = self.exec_runner('--worker', '-l1', '-w1')
         self.assertRegex(result.stdout,
@@ -83,6 +84,47 @@ def test_debug_single_value(self):
         result = self.exec_runner('--debug-single-value', '--worker')
         self.assertEqual(result.bench.get_nvalue(), 1)
 
+    def test_profile_time_func(self):
+        with tempfile.NamedTemporaryFile('wb+') as tmp:
+            name = tmp.name
+        args = ['--worker', '-l1', '-w1', '--profile', name]
+        runner = self.create_runner(args)
+
+        def time_func(loops):
+            return 1.0
+
+        runner.bench_time_func('bench1', time_func)
+
+        try:
+            s = pstats.Stats(name)
+            assert len(s.get_stats_profile().func_profiles)
+        finally:
+            if os.path.isfile(name):
+                os.unlink(name)
+
+    def test_profile_func(self):
+        with tempfile.NamedTemporaryFile('wb+') as tmp:
+            name = tmp.name
+        args = ['--worker', '-l1', '-w1', '--profile', name]
+        runner = self.create_runner(args)
+
+        def external():
+            return [1] * 1000
+
+        def func():
+            external()
+            return 1.0
+
+        runner.bench_func('bench1', func)
+
+        try:
+            import pstats
+            s = pstats.Stats(name)
+            assert len(s.get_stats_profile().func_profiles)
+        finally:
+            if os.path.isfile(name):
+                os.unlink(name)
+
     def test_pipe(self):
         rpipe, wpipe = create_pipe()
         with rpipe:

From e2890f9c28c1f78dd01b717a3493141d3a0d52b9 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Thu, 16 Jun 2022 15:28:34 -0400
Subject: [PATCH 4/6] Use mktemp instead

---
 pyperf/_process_time.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyperf/_process_time.py b/pyperf/_process_time.py
index 4d010500..7010195d 100644
--- a/pyperf/_process_time.py
+++ b/pyperf/_process_time.py
@@ -57,8 +57,7 @@ def bench_process(loops, args, kw, profile_filename=None):
     start_time = time.perf_counter()
 
     if profile_filename:
-        with tempfile.NamedTemporaryFile(suffix=".profile", delete=False) as fh:
-            temp_profile_filename = fh.name
+        temp_profile_filename = tempfile.mktemp()
         args = [args[0], "-m", "cProfile", "-o", temp_profile_filename] + args[1:]
 
     for loop in range_it:

From 14a6e19861e1d45048b717a1857f9374f42d2769 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Fri, 17 Jun 2022 11:34:10 -0400
Subject: [PATCH 5/6] Fix tests on < py3.9

---
 pyperf/tests/test_runner.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pyperf/tests/test_runner.py b/pyperf/tests/test_runner.py
index b3501a82..ffa1fbc1 100644
--- a/pyperf/tests/test_runner.py
+++ b/pyperf/tests/test_runner.py
@@ -97,7 +97,10 @@ def time_func(loops):
 
         try:
             s = pstats.Stats(name)
-            assert len(s.get_stats_profile().func_profiles)
+            if sys.version_info < (3, 9):
+                assert len(s.stats)
+            else:
+                assert len(s.get_stats_profile().func_profiles)
         finally:
             if os.path.isfile(name):
                 os.unlink(name)
@@ -120,7 +123,10 @@ def func():
         try:
             import pstats
             s = pstats.Stats(name)
-            assert len(s.get_stats_profile().func_profiles)
+            if sys.version_info < (3, 9):
+                assert len(s.stats)
+            else:
+                assert len(s.get_stats_profile().func_profiles)
         finally:
             if os.path.isfile(name):
                 os.unlink(name)

From f27c2e965a97775441e914482fb78b4f0430523c Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Thu, 14 Jul 2022 08:18:51 -0400
Subject: [PATCH 6/6] Update pyperf/tests/test_runner.py

Co-authored-by: Dong-hee Na <donghee.na92@gmail.com>
---
 pyperf/tests/test_runner.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyperf/tests/test_runner.py b/pyperf/tests/test_runner.py
index ffa1fbc1..c456a9ae 100644
--- a/pyperf/tests/test_runner.py
+++ b/pyperf/tests/test_runner.py
@@ -74,7 +74,6 @@ class ClockInfo:
         self.assertEqual(bench.get_nrun(), 1)
 
         return Result(runner, bench, stdout)
-    
     def test_worker(self):
         result = self.exec_runner('--worker', '-l1', '-w1')
         self.assertRegex(result.stdout,