Skip to content

Add support for profiling benchmarks #134

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 14, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/cli.rst
Original file line number Diff line number Diff line change
@@ -491,6 +491,7 @@ Usage
[--duplicate DUPLICATE]
[-s SETUP]
[--teardown TEARDOWN]
[--profile PROFILE]
stmt [stmt ...]

Options:
@@ -512,6 +513,7 @@ Options:
* ``--duplicate=DUPLICATE``: Duplicate statements (``stmt`` statements, not
``SETUP``) to reduce the overhead of the outer loop and multiply
inner loops by DUPLICATE (see ``--inner-loops`` option).
* ``--profile=PROFILE``: Run the benchmark inside the cProfile profiler and output to the given file. This is a convenient way to profile a specific benchmark, but it will make the actual benchmark timings much less accurate.

.. note::
timeit ``-n`` (number) and ``-r`` (repeat) options become ``-l`` (loops) and
3 changes: 3 additions & 0 deletions pyperf/_manager.py
Original file line number Diff line number Diff line change
@@ -66,6 +66,9 @@ def worker_cmd(self, calibrate_loops, calibrate_warmups, wpipe):
if args.track_memory:
cmd.append('--track-memory')

if args.profile:
cmd.extend(['--profile', args.profile])

if self.runner._add_cmdline_args:
self.runner._add_cmdline_args(cmd, args)

41 changes: 38 additions & 3 deletions pyperf/_process_time.py
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@
import os
import subprocess
import sys
import tempfile
import time

try:
@@ -35,11 +36,30 @@ def get_max_rss():
return 0


def bench_process(loops, args, kw):
def merge_profile_stats_files(src, dst):
"""
Merging one existing pstats file into another.
"""
import pstats
if os.path.isfile(dst):
src_stats = pstats.Stats(src)
dst_stats = pstats.Stats(dst)
dst_stats.add(src_stats)
dst_stats.dump_stats(dst)
os.unlink(src)
else:
os.rename(src, dst)


def bench_process(loops, args, kw, profile_filename=None):
max_rss = 0
range_it = range(loops)
start_time = time.perf_counter()

if profile_filename:
temp_profile_filename = tempfile.mktemp()
args = [args[0], "-m", "cProfile", "-o", temp_profile_filename] + args[1:]

for loop in range_it:
start_rss = get_max_rss()

@@ -51,13 +71,20 @@ def bench_process(loops, args, kw):
if exitcode != 0:
print("Command failed with exit code %s" % exitcode,
file=sys.stderr)
if profile_filename:
os.unlink(temp_profile_filename)
sys.exit(exitcode)

proc = None

rss = get_max_rss() - start_rss
max_rss = max(max_rss, rss)

if profile_filename:
merge_profile_stats_files(
temp_profile_filename, profile_filename
)

dt = time.perf_counter() - start_time
return (dt, max_rss)

@@ -70,10 +97,18 @@ def main():
sys.exit(1)

if len(sys.argv) < 3:
print("Usage: %s %s loops program [arg1 arg2 ...]"
print("Usage: %s %s loops program [arg1 arg2 ...] [--profile profile]"
% (os.path.basename(sys.executable), __file__))
sys.exit(1)

if "--profile" in sys.argv:
profile_idx = sys.argv.index("--profile")
profile_filename = sys.argv[profile_idx + 1]
del sys.argv[profile_idx]
del sys.argv[profile_idx]
else:
profile_filename = None

loops = int(sys.argv[1])
args = sys.argv[2:]

@@ -88,7 +123,7 @@ def main():
kw['stdout'] = devnull
kw['stderr'] = subprocess.STDOUT

dt, max_rss = bench_process(loops, args, kw)
dt, max_rss = bench_process(loops, args, kw, profile_filename)

if devnull is not None:
devnull.close()
55 changes: 51 additions & 4 deletions pyperf/_runner.py
Original file line number Diff line number Diff line change
@@ -12,7 +12,8 @@
set_highest_priority)
from pyperf._formatter import format_timedelta
from pyperf._utils import (MS_WINDOWS, MAC_OS, abs_executable,
WritePipe, get_python_names)
WritePipe, get_python_names,
merge_profile_stats)
from pyperf._worker import WorkerProcessTask


@@ -48,6 +49,19 @@ def parse_python_names(names):
return parts


def profiling_wrapper(func):
"""
Wrap a function to collect profiling.
"""
import cProfile
profiler = cProfile.Profile()

def profiling_func(*args):
return profiler.runcall(func, *args)

return profiler, profiling_func


class CLIError(Exception):
pass

@@ -222,6 +236,11 @@ def __init__(self, values=None, warmups=None, processes=None,
'PYTHON as CHANGED_NAME '
'and REF_PYTHON as REF_NAME in results')

parser.add_argument('--profile',
type=str,
help='Collect profile data using cProfile '
'and output to the given file.')

memory = parser.add_mutually_exclusive_group()
memory.add_argument('--tracemalloc', action="store_true",
help='Trace memory allocations using tracemalloc')
@@ -454,12 +473,21 @@ def bench_time_func(self, name, time_func, *args, **kwargs):
if not self._check_worker_task():
return None

if self.args.profile:
profiler, time_func = profiling_wrapper(time_func)

def task_func(task, loops):
return time_func(loops, *args)

task = WorkerProcessTask(self, name, task_func, metadata)

task.inner_loops = inner_loops
return self._main(task)
result = self._main(task)

if self.args.profile:
merge_profile_stats(profiler, self.args.profile)

return result

def bench_func(self, name, func, *args, **kwargs):
""""Benchmark func(*args)."""
@@ -474,6 +502,9 @@ def bench_func(self, name, func, *args, **kwargs):
if args:
func = functools.partial(func, *args)

if self.args.profile:
profiler, func = profiling_wrapper(func)

def task_func(task, loops):
# use fast local variables
local_timer = time.perf_counter
@@ -494,7 +525,12 @@ def task_func(task, loops):

task = WorkerProcessTask(self, name, task_func, metadata)
task.inner_loops = inner_loops
return self._main(task)
result = self._main(task)

if self.args.profile:
merge_profile_stats(profiler, self.args.profile)

return result

def bench_async_func(self, name, func, *args, **kwargs):
"""Benchmark await func(*args)"""
@@ -509,6 +545,9 @@ def bench_async_func(self, name, func, *args, **kwargs):
if args:
func = functools.partial(func, *args)

if self.args.profile:
profiler, func = profiling_wrapper(func)

def task_func(task, loops):
if loops != 1:
async def main():
@@ -549,7 +588,12 @@ async def main():

task = WorkerProcessTask(self, name, task_func, metadata)
task.inner_loops = inner_loops
return self._main(task)
result = self._main(task)

if self.args.profile:
merge_profile_stats(profiler, self.args.profile)

return result

def timeit(self, name, stmt=None, setup="pass", teardown="pass",
inner_loops=None, duplicate=None, metadata=None, globals=None):
@@ -671,6 +715,9 @@ def bench_command(self, name, command):
if not self._check_worker_task():
return None

if self.args.profile:
command.extend(["--profile", self.args.profile])

# Use lazy import to limit imports on 'import pyperf'
from pyperf._command import BenchCommandTask
task = BenchCommandTask(self, name, command)
21 changes: 21 additions & 0 deletions pyperf/_utils.py
Original file line number Diff line number Diff line change
@@ -405,3 +405,24 @@ def geometric_mean(data):
if not data:
raise ValueError("empty data")
return _geometric_mean(data)


def merge_profile_stats(profiler, dst):
"""
Save pstats by merging into an existing file.
"""
import pstats
if os.path.isfile(dst):
try:
src_stats = pstats.Stats(profiler)
except TypeError:
# If no actual stats were collected, a TypeError is raised
# and we don't need to merge anything into the output.
return

dst_stats = pstats.Stats(dst)
dst_stats.add(src_stats)
dst_stats.dump_stats(dst)
else:
profiler.dump_stats(dst)

49 changes: 48 additions & 1 deletion pyperf/tests/test_runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import collections
import os.path
import pstats
import sys
import tempfile
import textwrap
@@ -73,7 +74,6 @@ class ClockInfo:
self.assertEqual(bench.get_nrun(), 1)

return Result(runner, bench, stdout)

def test_worker(self):
result = self.exec_runner('--worker', '-l1', '-w1')
self.assertRegex(result.stdout,
@@ -83,6 +83,53 @@ def test_debug_single_value(self):
result = self.exec_runner('--debug-single-value', '--worker')
self.assertEqual(result.bench.get_nvalue(), 1)

def test_profile_time_func(self):
with tempfile.NamedTemporaryFile('wb+') as tmp:
name = tmp.name
args = ['--worker', '-l1', '-w1', '--profile', name]
runner = self.create_runner(args)

def time_func(loops):
return 1.0

runner.bench_time_func('bench1', time_func)

try:
s = pstats.Stats(name)
if sys.version_info < (3, 9):
assert len(s.stats)
else:
assert len(s.get_stats_profile().func_profiles)
finally:
if os.path.isfile(name):
os.unlink(name)

def test_profile_func(self):
with tempfile.NamedTemporaryFile('wb+') as tmp:
name = tmp.name
args = ['--worker', '-l1', '-w1', '--profile', name]
runner = self.create_runner(args)

def external():
return [1] * 1000

def func():
external()
return 1.0

runner.bench_func('bench1', func)

try:
import pstats
s = pstats.Stats(name)
if sys.version_info < (3, 9):
assert len(s.stats)
else:
assert len(s.get_stats_profile().func_profiles)
finally:
if os.path.isfile(name):
os.unlink(name)

def test_pipe(self):
rpipe, wpipe = create_pipe()
with rpipe: