From b210deb3936bcc1accc512c0f79ab2b842c2982f Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Tue, 8 Mar 2022 15:32:34 +0530 Subject: [PATCH 01/16] added c backend --- compyle/c_backend.py | 247 ++++++++++++++++++++++++++++++++++++++++++ compyle/cimport.py | 217 +++++++++++++++++++++++++++++++++++++ compyle/parallel.py | 241 ++++++++++++++++++++++++++++++++++++++++- compyle/transpiler.py | 15 ++- 4 files changed, 718 insertions(+), 2 deletions(-) create mode 100644 compyle/c_backend.py create mode 100644 compyle/cimport.py diff --git a/compyle/c_backend.py b/compyle/c_backend.py new file mode 100644 index 0000000..ea69e94 --- /dev/null +++ b/compyle/c_backend.py @@ -0,0 +1,247 @@ +from textwrap import dedent +import pybind11 +from .translator import CConverter +from mako.template import Template +from .translator import ocl_detect_type, OpenCLConverter, KnownType +from .cython_generator import CythonGenerator, get_func_definition, getsourcelines + +pybind11_wrap_fn = ''' +PYBIND11_MODULE(${name}, m) { + m.def("${name}", &${name}, "${doc}"); +} +''' + + +class CBackend(CythonGenerator): + def __init__(self, detect_type=ocl_detect_type, known_types=None): + super(CBackend, self).__init__() + # self.function_address_space = 'WITHIN_KERNEL ' + + def add_pybind11_func(self, node): + args = self._get_function_args(node) + name = node.body[0].name + doc = '' + template = Template(pybind11_wrap_fn) + src = template.render( + name = name, + doc = doc + ) + return src + + def get_func_signature_pyb11(self, func): + sourcelines = getsourcelines(func)[0] + defn, lines = get_func_definition(sourcelines) + f_name, returns, args = self._analyze_method(func, lines) + pyb11_args = [] + pyb11_call = [] + c_args = [] + c_call = [] + for arg, value in args: + c_type = self.detect_type(arg, value) + c_args.append('{type} {arg}'.format(type=c_type, arg=arg)) + + c_call.append(arg) + pyb11_type = self.ctype_to_pyb11(c_type) + pyb11_args.append('{type} {arg}'.format(type=pyb11_type, arg=arg)) + if c_type.endswith('*'): + pyb11_call.append('({ctype}){arg}.request().ptr'.format(arg = arg, ctype = c_type)) + else: + pyb11_call.append('{arg}'.format(arg=arg)) + + return (pyb11_args, pyb11_call), (c_args, c_call) + + def ctype_to_pyb11(self, c_type): + if c_type[-1] == '*': + return 'py::array_t<{}>'.format(c_type[:-1]) + else: + return c_type + + def _get_self_type(self): + return KnownType('GLOBAL_MEM %s*' % self._class_name) + + + +reduction_c_template = ''' +template +T combine(T a, T b){ + return ${red_expr}; +} + +template +T reduce_one_ar(int offset, int n, T initial_val, T* ary){ + T a, b, temp; + temp = initial_val; + + for (int i = offset; i < (n + offset); i++){ + a = temp; + b = ary[i]; + + temp = combine(a, b); + } + return temp; +} + +template +T reduce(int offset, int n, T initial_val${args_extra}){ + T a, b, temp; + temp = initial_val; + + for (int i = offset; i < (n + offset); i++){ + a = temp; + b = ${map_expr}; + + temp = combine(a, b); + } + return temp; +} + + +template +T reduce_all(long N, T initial_val${args_extra}){ + T ans = initial_val; + if (N > 0){ + %if openmp: + int ntiles = omp_get_max_threads(); + %else: + int ntiles = 1; + %endif + T* stage1_res = new T[ntiles]; + + #pragma omp parallel + { + // Step 1 - reducing each tile + %if openmp: + int itile = omp_get_thread_num(); + %else: + int itile = 0; + %endif + int last_tile = ntiles - 1; + int tile_size = (N / ntiles); + int last_tile_size = N - tile_size * last_tile; + int cur_tile_size = itile == ntiles - 1 ? last_tile_size : tile_size; + int cur_start_idx = itile * tile_size; + + stage1_res[itile] = reduce(cur_start_idx, cur_tile_size, initial_val${call_extra}); + #pragma omp barrier + + #pragma omp single + ans = reduce_one_ar(0, ntiles, initial_val, stage1_res); + } + delete[] stage1_res; + } + return ans; +} + +PYBIND11_MODULE(${name}, m) { + m.def("${name}", [](long n, ${type} initial${pyb_args}){ + return reduce_all(n, initial${pyb_call}); + }); +} + +''' + +scan_c_template = ''' + +template +T combine(T a, T b){ + return ${scan_expr}; +} + + +template +T reduce( T* ary, int offset, int n, T initial_val${args_in_extra}){ + T a, b, temp; + temp = initial_val; + + for (int i = offset; i < (n + offset); i++){ + a = temp; + b = ${scan_input_expr_call}; + + temp = combine(a, b); + } + return temp; +} + +template +void excl_scan_wo_ip_exp( T* ary, T* out, int N, T initial_val){ + if (N > 0){ + T a, b, temp; + temp = initial_val; + + for (int i = 0; i < N - 1; i++){ + a = temp; + b = ary[i]; + out[i] = temp; + temp = combine(a, b); + } + out[N - 1] = temp; + } +} + + +template +void incl_scan( T* ary, int offset, int cur_buf_size, int N, T initial_val${args_extra}){ + if (N > 0){ + T a, b, carry, prev_item, item; + carry = initial_val; + + for (int i = offset; i < (cur_buf_size + offset); i++){ + a = carry; + b = ${scan_input_expr_call}; + prev_item = carry; + carry = combine(a, b); + item = carry; + + ${scan_output_expr_call}; + } + } +} + + +template +void scan( T* ary, long N, T initial_val${args_extra}){ + if (N > 0){ + %if openmp: + int ntiles = omp_get_max_threads(); + %else: + int ntiles = 1; + %endif + T* stage1_res = new T[ntiles]; + T* stage2_res = new T[ntiles]; + + #pragma omp parallel + { + // Step 1 - reducing each tile + %if openmp: + int itile = omp_get_thread_num(); + %else: + int itile = 0; + %endif + int last_tile = ntiles - 1; + int tile_size = (N / ntiles); + int last_tile_size = N - tile_size * last_tile; + int cur_tile_size = itile == ntiles - 1 ? last_tile_size : tile_size; + int cur_start_idx = itile * tile_size; + + stage1_res[itile] = reduce(ary, cur_start_idx, cur_tile_size, initial_val${call_in_extra}); + #pragma omp barrier + + #pragma omp single + excl_scan_wo_ip_exp(stage1_res, stage2_res, ntiles, initial_val); + + incl_scan(ary, cur_start_idx, cur_tile_size, N, stage2_res[itile]${call_extra}); + } + delete[] stage1_res; + delete[] stage2_res; + py::print(ary); + } +} + + + +PYBIND11_MODULE(${name}, m) { + m.def("${name}", [](py::array_t<${type}> x, long n, ${type} initial${pyb_args}){ + return scan((${type}*) x.request().ptr, n, initial${pyb_call}); + }); +} +''' \ No newline at end of file diff --git a/compyle/cimport.py b/compyle/cimport.py new file mode 100644 index 0000000..bb3da89 --- /dev/null +++ b/compyle/cimport.py @@ -0,0 +1,217 @@ +import os +import hashlib +import json +from stat import S_ISREG +import struct +import io +import importlib +import logging +import shutil +import sys + +from os.path import exists, expanduser, isdir, join + +from distutils.sysconfig import get_config_vars, customize_compiler +from distutils.util import get_platform + +from distutils.extension import Extension +from distutils.command import build_ext +from distutils.core import setup +from distutils.errors import CompileError, LinkError +from distutils.ccompiler import new_compiler, get_default_compiler +from webbrowser import get + +from .ext_module import get_platform_dir, get_md5, get_ext_extension +from .capture_stream import CaptureMultipleStreams # noqa: 402 + + +_TAG = b"cmodule_compyle" +_FMT = struct.Struct("q" + str(len(_TAG)) + "s") + +logger = logging.getLogger(__name__) + + +def is_checksum_valid(module_data): + """ + Load the saved checksum from the extension file check if it matches the + checksum computed from current source files. + """ + deps, old_checksum = _load_checksum_trailer(module_data) + if old_checksum is None: + return False # Already logged error in load_checksum_trailer. + try: + return old_checksum == get_md5(module_data) + except OSError as e: + return False + + +def _load_checksum_trailer(module_data): + try: + with open(module_data["ext_path"], "rb") as f: + f.seek(-_FMT.size, 2) + json_len, tag = _FMT.unpack(f.read(_FMT.size)) + if tag != _TAG: + return None, None + f.seek(-(_FMT.size + json_len), 2) + json_s = f.read(json_len) + except FileNotFoundError: + return None, None + + try: + old_checksum = json.loads(json_s) + except ValueError: + return None, None + return old_checksum + +def _save_checksum_trailer(ext_path, cur_checksum): + # We can just append the checksum to the shared object; this is effectively + # legal (see e.g. https://stackoverflow.com/questions/10106447). + dump = json.dumps(cur_checksum).encode("ascii") + dump += _FMT.pack(len(dump), _TAG) + with open(ext_path, "ab") as file: + file.write(dump) + +def wget_tpnd_headers(): + import requests + baseurl = 'https://gitlab.inria.fr/tapenade/tapenade/-/raw/3.16/ADFirstAidKit/' + files = ['adBuffer.c', 'adBuffer.h', 'adStack.c', 'adStack.h'] + reqs = [requests.get(baseurl, file) for file in files] + saveloc = get_tpnd_obj_dir() + if not os.path.exists(saveloc): + os.mkdir(saveloc) + + for file, r in zip(files, reqs): + with open(join(saveloc, file), 'wb') as f: + f.write(r.content) + + +def get_tpnd_obj_dir(): + plat_dir = get_platform_dir() + root = expanduser(join('~', '.compyle', 'source', plat_dir)) + tpnd_dir = join(root, 'tapenade_src') + return tpnd_dir + + +def compile_tapenade_source(verbose=0): + try: + with CaptureMultipleStreams() as stream: + wget_tpnd_headers() + os.environ["CC"]='g++' + compiler = new_compiler(verbose=1) + customize_compiler(compiler) + compiler.compile([join(get_tpnd_obj_dir(), 'adBuffer.c')], output_dir=get_tpnd_obj_dir(), extra_preargs=['-c', '-fPIC']) + compiler.compile([join(get_tpnd_obj_dir(), 'adStack.c')], output_dir=get_tpnd_obj_dir(), extra_preargs=['-c', '-fPIC']) + objdir = join(get_tpnd_obj_dir(), get_tpnd_obj_dir()[1:]) + shutil.move(join(objdir, 'adBuffer.o'), join(get_tpnd_obj_dir(), 'adBuffer.o')) + shutil.move(join(objdir, 'adStack.o'), join(get_tpnd_obj_dir(), 'adStack.o')) + except (CompileError, LinkError): + hline = "*"*80 + print(hline + "\nERROR") + s_out = stream.get_output() + print(s_out[0]) + print(s_out[1]) + msg = "Compilation of tapenade source failed, please check "\ + "error messages above." + print(hline + "\n" + msg) + sys.exit(1) + + +class Cmodule: + def __init__(self, name, src, root=None, verbose=False, extra_inc_dir=[], extra_link_args=[], extra_compile_args=[]): + self.name = name + self.src = src + self.hash = get_md5(src) + self.verbose = verbose + self.extra_inc_dir = extra_inc_dir + self.extra_link_args = extra_link_args + self.extra_compile_args = extra_compile_args + + self._setup_root(root) + self._setup_filenames() + + + def _setup_root(self, root): + if root is None: + plat_dir = get_platform_dir() + self.root = expanduser(join('~', '.compyle', 'source', plat_dir)) + else: + self.root = root + + self.build_dir = join(self.root, 'build') + + if not isdir(self.build_dir): + try: + os.makedirs(self.build_dir) + except OSError: + pass + + def _write_source(self): + if not exists(self.src_path): + with io.open(self.src_path, 'w', encoding='utf-8') as f: + f.write(self.src) + + + def _setup_filenames(self): + base = 'm_' + self.hash + self.src_path = join(self.root, base + '.cpp') + self.ext_path = join(self.root, self.name + get_ext_extension()) + + def is_build_needed(self): + return True + + def build(self): + if self.is_build_needed: + ext = Extension(name=self.name, + sources=[self.src_path], + language='c++', + include_dirs=self.extra_inc_dir, + extra_link_args=self.extra_link_args, + extra_compile_args=self.extra_compile_args) + args = [ + "build_ext", + "--build-lib=" + self.build_dir, + "--build-temp=" + self.build_dir, + "-v", + ] + + + try: + with CaptureMultipleStreams() as stream: + setup(name=self.name, + ext_modules=[ext], + script_args=args, + cmdclass={"build_ext": build_ext.build_ext}) + shutil.move(join(self.build_dir, self.name + get_ext_extension()), self.ext_path) + + except: + hline = "*"*80 + print(hline + "\nERROR") + s_out = stream.get_output() + print(s_out[0]) + print(s_out[1]) + msg = "Compilation of code failed, please check "\ + "error messages above." + print(hline + "\n" + msg) + os.remove(self.src_path) + sys.exit(1) + + def write_and_build(self): + """Write source and build the extension module""" + if not exists(self.src_path): + self._write_source() + self.build() + else: + self._message("Precompiled code from:", self.src_path) + + def load(self): + self.write_and_build() + spec = importlib.util.spec_from_file_location(self.name, self.ext_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + def _message(self, *args): + msg = ' '.join(args) + logger.info(msg) + if self.verbose: + print(msg) \ No newline at end of file diff --git a/compyle/parallel.py b/compyle/parallel.py index b124735..599fdc0 100644 --- a/compyle/parallel.py +++ b/compyle/parallel.py @@ -6,12 +6,20 @@ """ +from compyle import c_backend from functools import wraps +from inspect import getmodule +import operator +from re import TEMPLATE from textwrap import wrap +import json from mako.template import Template import numpy as np +import pybind11 +from pyopencl.array import arange +from .cimport import Cmodule from .config import get_config from .profile import profile from .cython_generator import get_parallel_range, CythonGenerator @@ -20,6 +28,33 @@ from . import array +pyb11_bind_elwise = ''' +PYBIND11_MODULE(${name}, m) { + + m.def("${name}", [](${pyb11_args}){ + return elwise_${name}(${pyb11_call}); + }); +} +''' + +pyb11_setup_header = ''' +<% +cfg['compiler_args'] = ['-std=c++11', '-fopenmp'] +cfg['linker_args'] = ['-fopenmp'] +setup_pybind11(cfg) +%> +\n +''' +elementwise_pyb11_template = ''' +void ${name}(${arguments}){ + %if openmp: + #pragma omp parallel for + %endif + for(size_t i = 0; i < SIZE; i++){ + ${operations}; + } +} +''' elementwise_cy_template = ''' from cython.parallel import parallel, prange @@ -503,6 +538,57 @@ def _generate(self, declarations=None): # FIXME: it is difficult to get the sources from pycuda. self.all_source = self.source return knl + elif self.backend == 'c': + import cppimport + import os + import sys + + self.pyb11_backend = c_backend.CBackend() + py_data, c_data = self.pyb11_backend.get_func_signature_pyb11( + self.func) + pyb11_args = ', '.join(py_data[0][1:]) + size = '{arg}.request().size'.format(arg=c_data[1][1]) + pyb11_call = ', '.join([size] + py_data[1][1:]) + c_defn = ['size_t SIZE'] + c_data[0][1:] + arguments = ', '.join(c_defn) + name = self.func.__name__ + expr = '{func}({args})'.format( + func=name, + args=', '.join(c_data[1]) + ) + + openmp = self._config.use_openmp + + templete_elwise = Template(elementwise_pyb11_template) + src_elwise = templete_elwise.render( + name=self.name, + arguments=arguments, + openmp=openmp, + operations=expr + ) + + template = Template(pyb11_bind_elwise) + src_bind = template.render( + name=name, + pyb11_args=pyb11_args, + pyb11_call=pyb11_call + ) + + self.source = self.tp.get_code() + if openmp: + self.source = '#include \n' + self.source + self.all_source = pyb11_setup_header + \ + self.source + '\n' + src_elwise + '\n' + src_bind + + # print(self.all_source) + # exit() + cppfile = open(name + '.cpp', 'w') + cppfile.write(self.all_source) + cppfile.close() + sys.path.append(os.getcwd()) + fn = cppimport.imp(name) + knl = getattr(fn, name) + return knl def _correct_opencl_address_space(self, c_data): code = self.tp.blocks[-1].code.splitlines() @@ -552,7 +638,8 @@ def __call__(self, *args, **kw): self.c_func(*c_args, **kw) event.record() event.synchronize() - + elif self.backend == 'c': + self.c_func(*c_args) class Elementwise(object): def __init__(self, func, backend=None): @@ -662,6 +749,60 @@ def _generate(self, declarations=None): self.tp.compile() self.all_source = self.tp.source return getattr(self.tp.mod, 'py_' + self.name) + elif self.backend == 'c': + self.pyb11_backend = c_backend.CBackend() + if self.func is not None: + self.tp.add(self.func, declarations=declarations) + pyb_data, c_data = self.pyb11_backend.get_func_signature_pyb11( + self.func) + c_call = c_data[1] + + c_call_default = ['N', 'neutral'] + predefined_vars = ['i'] + c_call_default + c_args_extra = [[], []] + pyb_args_extra = [[], []] + for i, var in enumerate(c_call[1:]): + if var not in predefined_vars: + c_args_extra[0].append(c_data[0][i + 1]) + c_args_extra[1].append(var) + pyb_args_extra[0].append(pyb_data[0][i + 1]) + pyb_args_extra[1].append(pyb_data[1][i + 1]) + c_args_extra_str = ", " + ', '.join(c_args_extra[0]) + c_call_extra_str = ", " + ', '.join(c_args_extra[1]) + pyb_args_extra_str = ", " + ', '.join(pyb_args_extra[0]) + pyb_call_extra_str = ", " + ', '.join(pyb_args_extra[1]) + map_expr = f"{self.func.__name__}({', '.join(c_call)})" + else: + c_args_extra_str = f", {self.type + '*'} in" + c_call_extra_str = ", in" + pyb_args_extra_str = f", {self.pyb11_backend.ctype_to_pyb11(self.type + '*')} in" + pyb_call_extra_str = f", ({self.type}*) in.request().ptr" + map_expr = "in[i]" + self.source = self.tp.get_code() + openmp = self._config.use_openmp + if openmp: + self.source = '#include \n' + self.source + + template_red = Template(c_backend.reduction_c_template) + src_red = template_red.render( + args_extra=c_args_extra_str, + call_extra=c_call_extra_str, + map_expr=map_expr, + red_expr=self.reduce_expr, + name=self.name, + type=self.type, + pyb_args=pyb_args_extra_str, + pyb_call=pyb_call_extra_str, + openmp=openmp + ) + self.all_source = self.source + src_red + + extra_comp_args = ["-fopenmp", "-fPIC"] if openmp else [] + mod = Cmodule(self.name, self.all_source, extra_inc_dir=[pybind11.get_include( + )], extra_compile_args=extra_comp_args, extra_link_args=extra_comp_args) + module = mod.load() + return getattr(module, self.name) + elif self.backend == 'opencl': if self.func is not None: self.tp.add(self.func, declarations=declarations) @@ -815,6 +956,12 @@ def __call__(self, *args): event.record() event.synchronize() return result.get() + elif self.backend == 'c': + size = len(c_args[0]) + c_args.insert(0, json.loads(self.neutral)) + c_args.insert(0, size) + return self.c_func(*c_args) + pass class Reduction(object): @@ -939,6 +1086,8 @@ def _generate(self, declarations=None): return self._generate_cuda_kernel(declarations=declarations) elif self.backend == 'cython': return self._generate_cython_code(declarations=declarations) + elif self.backend == 'c': + return self._generate_c_code(declarations=declarations) def _default_cython_input_function(self): py_data = (['int i', '{type}[:] input'.format(type=self.type)], @@ -1051,6 +1200,91 @@ def _generate_cython_code(self, declarations=None): self.tp.compile() self.all_source = self.tp.source return getattr(self.tp.mod, 'py_' + self.name) + + def _generate_c_code(self, declarations=None): + self.pyb11_backend = c_backend.CBackend() + self.tp.add(self.input_func, declarations=declarations) + self.tp.add(self.output_func, declarations=declarations) + self.source = self.tp.get_code() + openmp = self._config.use_openmp + if openmp: + self.source = '#include \n' + self.source + pyb_data_in, c_data_in = self.pyb11_backend.get_func_signature_pyb11( + self.input_func) + c_call_in = c_data_in[1] + pyb_data_out, c_data_out = self.pyb11_backend.get_func_signature_pyb11( + self.output_func) + c_call_out = c_data_out[1] + c_call_default = ['ary', 'N', 'neutral'] + c_internal_var = ['item', 'prev_item', 'last_item'] + predefined_vars = c_call_default + c_internal_var + + c_args_in_extra = [[], []] + pyb_args_in_extra = [[], []] + for i, var in enumerate(c_call_in[1:]): + if var not in predefined_vars: + c_args_in_extra[0].append(c_data_in[0][i + 1]) + c_args_in_extra[1].append(var) + pyb_args_in_extra[0].append(pyb_data_in[0][i + 1]) + pyb_args_in_extra[1].append(pyb_data_in[1][i + 1]) + c_args_out_extra = [[], []] + pyb_args_extra = [[], []] + for i, var in enumerate(c_call_out[1:]): + if var not in predefined_vars: + c_args_out_extra[0].append(c_data_out[0][i + 1]) + c_args_out_extra[1].append(var) + pyb_args_extra[0].append(pyb_data_out[0][i + 1]) + pyb_args_extra[1].append(pyb_data_out[1][i + 1]) + + c_args_in_extra_str = f", {','.join(c_args_in_extra[0])}" if c_args_in_extra[1] else "" + c_call_in_extra_str = f", {','.join(c_args_in_extra[1])}" if c_args_in_extra[1] else "" + + c_args_extra = c_args_out_extra.copy() + for i, var in enumerate(c_args_in_extra[1]): + if var not in c_args_extra[1]: + c_args_extra[0].append(c_args_in_extra[0][i]) + c_args_extra[1].append(var) + pyb_args_extra[0].append(pyb_args_in_extra[0][i]) + pyb_args_extra[1].append(pyb_args_in_extra[1][i]) + + if not hasattr(self.output_func, 'arg_keys'): + self.output_func.arg_keys = {} + self.output_func.arg_keys[self._get_backend_key( + )] = c_call_default + c_args_extra[1] + + c_args_extra_str = f", {', '.join(c_args_extra[0])}" if c_args_extra[1] else "" + c_call_extra_str = f", {', '.join(c_args_extra[1])}" if c_args_extra[1] else "" + pyb_args_extra_str = f", {', '.join(pyb_args_extra[0])}" if pyb_args_extra[1] else "" + pyb_call_extra_str = f", {', '.join(pyb_args_extra[1])}" if pyb_args_extra[1] else "" + + c_call_in_str = f"{self.input_func.__name__}({', '.join(c_call_in)})" + c_call_out_str = f"{self.output_func.__name__}({', '.join(c_call_out)})" + + template_scan = Template(c_backend.scan_c_template) + src_scan = template_scan.render( + scan_expr=self.scan_expr, + scan_input_expr_call=c_call_in_str, + scan_output_expr_call=c_call_out_str, + args_extra=c_args_extra_str, + args_in_extra=c_args_in_extra_str, + call_extra=c_call_extra_str, + call_in_extra=c_call_in_extra_str, + name=self.name, + type=self.type, + pyb_args=pyb_args_extra_str, + pyb_call=pyb_call_extra_str, + openmp=openmp + ) + self.all_source = self.source + src_scan + print(self.all_source) + # exit() + + extra_comp_args = ["-fopenmp", "-fPIC"] if openmp else [] + mod = Cmodule(self.name, self.all_source, extra_inc_dir=[pybind11.get_include( + )], extra_compile_args=extra_comp_args, extra_link_args=extra_comp_args) + module = mod.load() + return getattr(module, self.name) + def _wrap_ocl_function(self, func, func_type=None, declarations=None): if func is not None: @@ -1234,6 +1468,11 @@ def __call__(self, **kwargs): self.c_func(*[c_args_dict[k] for k in output_arg_keys]) event.record() event.synchronize() + elif self.backend == 'c': + size = len(c_args_dict[output_arg_keys[0]]) + c_args_dict['N'] = size + c_args_dict['neutral'] = json.loads(self.neutral) + self.c_func(*[c_args_dict[k] for k in output_arg_keys]) class Scan(object): diff --git a/compyle/transpiler.py b/compyle/transpiler.py index 46663e6..8549c7a 100644 --- a/compyle/transpiler.py +++ b/compyle/transpiler.py @@ -8,7 +8,7 @@ from .config import get_config from .ast_utils import get_unknown_names_and_calls from .cython_generator import CythonGenerator, CodeGenerationError -from .translator import OpenCLConverter, CUDAConverter +from .translator import OpenCLConverter, CUDAConverter, CConverter from .ext_module import ExtModule from .extern import Extern, get_extern_code from .utils import getsourcelines @@ -187,6 +187,14 @@ def __init__(self, backend='cython', incl_cluda=True): #define max(x, y) fmax((double)(x), (double)(y)) ''') + elif backend == 'c': + self._cgen = CConverter() + self.header = dedent(''' + // c code for with PyBind11 binding + #include + #include + namespace py = pybind11; + ''') def _handle_symbol(self, name, value): backend = self.backend @@ -278,6 +286,11 @@ def add(self, obj, declarations=None): code = self._cgen.parse( obj, declarations=declarations.get(obj.__name__) if declarations else None) + elif self.backend == 'c': + code = self._cgen.parse( + obj, declarations=declarations.get(obj.__name__) + if declarations else None) + cb = CodeBlock(obj, code) self.blocks.append(cb) From 1292ceeca1d4dfd609699c9839ecf7a8fb8c334d Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Wed, 9 Mar 2022 09:21:30 +0530 Subject: [PATCH 02/16] c backend in ScanJit --- compyle/jit.py | 7 ++++++- compyle/parallel.py | 2 -- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/compyle/jit.py b/compyle/jit.py index 080fd42..7dd9aa4 100644 --- a/compyle/jit.py +++ b/compyle/jit.py @@ -5,7 +5,7 @@ import ast import importlib import warnings -import time +import json from pytools import memoize from .config import get_config from .cython_generator import CythonGenerator @@ -567,3 +567,8 @@ def __call__(self, **kwargs): c_func(*[c_args_dict[k] for k in output_arg_keys]) event.record() event.synchronize() + elif self.backend == 'c': + size = len(c_args_dict[output_arg_keys[0]]) + c_args_dict['N'] = size + c_args_dict['neutral'] = json.loads(self.neutral) + c_func(*[c_args_dict[k] for k in output_arg_keys]) diff --git a/compyle/parallel.py b/compyle/parallel.py index 599fdc0..4e072e6 100644 --- a/compyle/parallel.py +++ b/compyle/parallel.py @@ -1276,8 +1276,6 @@ def _generate_c_code(self, declarations=None): openmp=openmp ) self.all_source = self.source + src_scan - print(self.all_source) - # exit() extra_comp_args = ["-fopenmp", "-fPIC"] if openmp else [] mod = Cmodule(self.name, self.all_source, extra_inc_dir=[pybind11.get_include( From 20bf6b03bbf93f79c0e159e5d443c9d03962f218 Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Wed, 9 Mar 2022 13:54:20 +0530 Subject: [PATCH 03/16] working c backend --- compyle/c_backend.py | 10 ++-- compyle/cimport.py | 140 +++++++------------------------------------ compyle/jit.py | 2 + compyle/parallel.py | 35 +++-------- 4 files changed, 39 insertions(+), 148 deletions(-) diff --git a/compyle/c_backend.py b/compyle/c_backend.py index ea69e94..6ec49d4 100644 --- a/compyle/c_backend.py +++ b/compyle/c_backend.py @@ -23,8 +23,8 @@ def add_pybind11_func(self, node): doc = '' template = Template(pybind11_wrap_fn) src = template.render( - name = name, - doc = doc + name=name, + doc=doc ) return src @@ -44,7 +44,8 @@ def get_func_signature_pyb11(self, func): pyb11_type = self.ctype_to_pyb11(c_type) pyb11_args.append('{type} {arg}'.format(type=pyb11_type, arg=arg)) if c_type.endswith('*'): - pyb11_call.append('({ctype}){arg}.request().ptr'.format(arg = arg, ctype = c_type)) + pyb11_call.append( + '({ctype}){arg}.request().ptr'.format(arg=arg, ctype=c_type)) else: pyb11_call.append('{arg}'.format(arg=arg)) @@ -60,7 +61,6 @@ def _get_self_type(self): return KnownType('GLOBAL_MEM %s*' % self._class_name) - reduction_c_template = ''' template T combine(T a, T b){ @@ -244,4 +244,4 @@ def _get_self_type(self): return scan((${type}*) x.request().ptr, n, initial${pyb_call}); }); } -''' \ No newline at end of file +''' diff --git a/compyle/cimport.py b/compyle/cimport.py index bb3da89..d385647 100644 --- a/compyle/cimport.py +++ b/compyle/cimport.py @@ -25,97 +25,6 @@ from .capture_stream import CaptureMultipleStreams # noqa: 402 -_TAG = b"cmodule_compyle" -_FMT = struct.Struct("q" + str(len(_TAG)) + "s") - -logger = logging.getLogger(__name__) - - -def is_checksum_valid(module_data): - """ - Load the saved checksum from the extension file check if it matches the - checksum computed from current source files. - """ - deps, old_checksum = _load_checksum_trailer(module_data) - if old_checksum is None: - return False # Already logged error in load_checksum_trailer. - try: - return old_checksum == get_md5(module_data) - except OSError as e: - return False - - -def _load_checksum_trailer(module_data): - try: - with open(module_data["ext_path"], "rb") as f: - f.seek(-_FMT.size, 2) - json_len, tag = _FMT.unpack(f.read(_FMT.size)) - if tag != _TAG: - return None, None - f.seek(-(_FMT.size + json_len), 2) - json_s = f.read(json_len) - except FileNotFoundError: - return None, None - - try: - old_checksum = json.loads(json_s) - except ValueError: - return None, None - return old_checksum - -def _save_checksum_trailer(ext_path, cur_checksum): - # We can just append the checksum to the shared object; this is effectively - # legal (see e.g. https://stackoverflow.com/questions/10106447). - dump = json.dumps(cur_checksum).encode("ascii") - dump += _FMT.pack(len(dump), _TAG) - with open(ext_path, "ab") as file: - file.write(dump) - -def wget_tpnd_headers(): - import requests - baseurl = 'https://gitlab.inria.fr/tapenade/tapenade/-/raw/3.16/ADFirstAidKit/' - files = ['adBuffer.c', 'adBuffer.h', 'adStack.c', 'adStack.h'] - reqs = [requests.get(baseurl, file) for file in files] - saveloc = get_tpnd_obj_dir() - if not os.path.exists(saveloc): - os.mkdir(saveloc) - - for file, r in zip(files, reqs): - with open(join(saveloc, file), 'wb') as f: - f.write(r.content) - - -def get_tpnd_obj_dir(): - plat_dir = get_platform_dir() - root = expanduser(join('~', '.compyle', 'source', plat_dir)) - tpnd_dir = join(root, 'tapenade_src') - return tpnd_dir - - -def compile_tapenade_source(verbose=0): - try: - with CaptureMultipleStreams() as stream: - wget_tpnd_headers() - os.environ["CC"]='g++' - compiler = new_compiler(verbose=1) - customize_compiler(compiler) - compiler.compile([join(get_tpnd_obj_dir(), 'adBuffer.c')], output_dir=get_tpnd_obj_dir(), extra_preargs=['-c', '-fPIC']) - compiler.compile([join(get_tpnd_obj_dir(), 'adStack.c')], output_dir=get_tpnd_obj_dir(), extra_preargs=['-c', '-fPIC']) - objdir = join(get_tpnd_obj_dir(), get_tpnd_obj_dir()[1:]) - shutil.move(join(objdir, 'adBuffer.o'), join(get_tpnd_obj_dir(), 'adBuffer.o')) - shutil.move(join(objdir, 'adStack.o'), join(get_tpnd_obj_dir(), 'adStack.o')) - except (CompileError, LinkError): - hline = "*"*80 - print(hline + "\nERROR") - s_out = stream.get_output() - print(s_out[0]) - print(s_out[1]) - msg = "Compilation of tapenade source failed, please check "\ - "error messages above." - print(hline + "\n" + msg) - sys.exit(1) - - class Cmodule: def __init__(self, name, src, root=None, verbose=False, extra_inc_dir=[], extra_link_args=[], extra_compile_args=[]): self.name = name @@ -125,11 +34,10 @@ def __init__(self, name, src, root=None, verbose=False, extra_inc_dir=[], extra_ self.extra_inc_dir = extra_inc_dir self.extra_link_args = extra_link_args self.extra_compile_args = extra_compile_args - + self._setup_root(root) self._setup_filenames() - - + def _setup_root(self, root): if root is None: plat_dir = get_platform_dir() @@ -144,21 +52,20 @@ def _setup_root(self, root): os.makedirs(self.build_dir) except OSError: pass - + def _write_source(self): if not exists(self.src_path): with io.open(self.src_path, 'w', encoding='utf-8') as f: f.write(self.src) - def _setup_filenames(self): base = 'm_' + self.hash self.src_path = join(self.root, base + '.cpp') self.ext_path = join(self.root, self.name + get_ext_extension()) - + def is_build_needed(self): return True - + def build(self): if self.is_build_needed: ext = Extension(name=self.name, @@ -168,21 +75,21 @@ def build(self): extra_link_args=self.extra_link_args, extra_compile_args=self.extra_compile_args) args = [ - "build_ext", - "--build-lib=" + self.build_dir, - "--build-temp=" + self.build_dir, - "-v", - ] - - + "build_ext", + "--build-lib=" + self.build_dir, + "--build-temp=" + self.build_dir, + "-v", + ] + try: with CaptureMultipleStreams() as stream: setup(name=self.name, - ext_modules=[ext], - script_args=args, - cmdclass={"build_ext": build_ext.build_ext}) - shutil.move(join(self.build_dir, self.name + get_ext_extension()), self.ext_path) - + ext_modules=[ext], + script_args=args, + cmdclass={"build_ext": build_ext.build_ext}) + shutil.move(join(self.build_dir, self.name + + get_ext_extension()), self.ext_path) + except: hline = "*"*80 print(hline + "\nERROR") @@ -190,28 +97,27 @@ def build(self): print(s_out[0]) print(s_out[1]) msg = "Compilation of code failed, please check "\ - "error messages above." + "error messages above." print(hline + "\n" + msg) os.remove(self.src_path) sys.exit(1) - + def write_and_build(self): """Write source and build the extension module""" - if not exists(self.src_path): + if not (exists(self.src_path) and exists(self.ext_path)): self._write_source() self.build() else: self._message("Precompiled code from:", self.src_path) - + def load(self): self.write_and_build() spec = importlib.util.spec_from_file_location(self.name, self.ext_path) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) return module - + def _message(self, *args): msg = ' '.join(args) - logger.info(msg) if self.verbose: - print(msg) \ No newline at end of file + print(msg) diff --git a/compyle/jit.py b/compyle/jit.py index 7dd9aa4..261d54f 100644 --- a/compyle/jit.py +++ b/compyle/jit.py @@ -368,6 +368,8 @@ def __call__(self, *args, **kw): c_func(*c_args, **kw) event.record() event.synchronize() + elif self.backend == 'c': + c_func(*c_args) class ReductionJIT(parallel.ReductionBase): diff --git a/compyle/parallel.py b/compyle/parallel.py index 4e072e6..052d2b3 100644 --- a/compyle/parallel.py +++ b/compyle/parallel.py @@ -37,14 +37,6 @@ } ''' -pyb11_setup_header = ''' -<% -cfg['compiler_args'] = ['-std=c++11', '-fopenmp'] -cfg['linker_args'] = ['-fopenmp'] -setup_pybind11(cfg) -%> -\n -''' elementwise_pyb11_template = ''' void ${name}(${arguments}){ %if openmp: @@ -539,10 +531,6 @@ def _generate(self, declarations=None): self.all_source = self.source return knl elif self.backend == 'c': - import cppimport - import os - import sys - self.pyb11_backend = c_backend.CBackend() py_data, c_data = self.pyb11_backend.get_func_signature_pyb11( self.func) @@ -577,18 +565,13 @@ def _generate(self, declarations=None): self.source = self.tp.get_code() if openmp: self.source = '#include \n' + self.source - self.all_source = pyb11_setup_header + \ - self.source + '\n' + src_elwise + '\n' + src_bind - - # print(self.all_source) - # exit() - cppfile = open(name + '.cpp', 'w') - cppfile.write(self.all_source) - cppfile.close() - sys.path.append(os.getcwd()) - fn = cppimport.imp(name) - knl = getattr(fn, name) - return knl + self.all_source = self.source + '\n' + src_elwise + '\n' + src_bind + + extra_comp_args = ["-fopenmp", "-fPIC"] if openmp else [] + mod = Cmodule(name, self.all_source, extra_inc_dir=[pybind11.get_include( + )], extra_compile_args=extra_comp_args, extra_link_args=extra_comp_args) + module = mod.load() + return getattr(module, name) def _correct_opencl_address_space(self, c_data): code = self.tp.blocks[-1].code.splitlines() @@ -641,6 +624,7 @@ def __call__(self, *args, **kw): elif self.backend == 'c': self.c_func(*c_args) + class Elementwise(object): def __init__(self, func, backend=None): self._func = func @@ -1200,7 +1184,7 @@ def _generate_cython_code(self, declarations=None): self.tp.compile() self.all_source = self.tp.source return getattr(self.tp.mod, 'py_' + self.name) - + def _generate_c_code(self, declarations=None): self.pyb11_backend = c_backend.CBackend() self.tp.add(self.input_func, declarations=declarations) @@ -1283,7 +1267,6 @@ def _generate_c_code(self, declarations=None): module = mod.load() return getattr(module, self.name) - def _wrap_ocl_function(self, func, func_type=None, declarations=None): if func is not None: self.tp.add(func, declarations=declarations) From 61082baa1a1fb52e47a7cf78e3a6a5407a2345b4 Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Thu, 17 Mar 2022 16:36:42 +0530 Subject: [PATCH 04/16] working c backend with tests --- compyle/array.py | 2 +- compyle/c_backend.py | 67 +++++++++++++---------- compyle/cimport.py | 46 +++++++--------- compyle/jit.py | 5 +- compyle/parallel.py | 97 +++++++++++++++++++-------------- compyle/tests/test_c_backend.py | 27 +++++++++ compyle/tests/test_cimport.py | 64 ++++++++++++++++++++++ compyle/tests/test_parallel.py | 61 ++++++++++++++++++++- compyle/transpiler.py | 4 +- 9 files changed, 272 insertions(+), 101 deletions(-) create mode 100644 compyle/tests/test_c_backend.py create mode 100644 compyle/tests/test_cimport.py diff --git a/compyle/array.py b/compyle/array.py index 172add4..bfab231 100644 --- a/compyle/array.py +++ b/compyle/array.py @@ -1023,7 +1023,7 @@ def get_buff(self, offset=0, length=0): return cu_bufint(self._data, nbytes, int(offset)) def get(self): - if self.backend == 'cython': + if self.backend == 'cython' or self.backend == 'c': return self.dev elif self.backend == 'opencl' or self.backend == 'cuda': return self.dev.get() diff --git a/compyle/c_backend.py b/compyle/c_backend.py index 6ec49d4..2d9b7b9 100644 --- a/compyle/c_backend.py +++ b/compyle/c_backend.py @@ -1,33 +1,15 @@ from textwrap import dedent -import pybind11 from .translator import CConverter from mako.template import Template from .translator import ocl_detect_type, OpenCLConverter, KnownType from .cython_generator import CythonGenerator, get_func_definition, getsourcelines -pybind11_wrap_fn = ''' -PYBIND11_MODULE(${name}, m) { - m.def("${name}", &${name}, "${doc}"); -} -''' - class CBackend(CythonGenerator): def __init__(self, detect_type=ocl_detect_type, known_types=None): super(CBackend, self).__init__() # self.function_address_space = 'WITHIN_KERNEL ' - def add_pybind11_func(self, node): - args = self._get_function_args(node) - name = node.body[0].name - doc = '' - template = Template(pybind11_wrap_fn) - src = template.render( - name=name, - doc=doc - ) - return src - def get_func_signature_pyb11(self, func): sourcelines = getsourcelines(func)[0] defn, lines = get_func_definition(sourcelines) @@ -61,6 +43,30 @@ def _get_self_type(self): return KnownType('GLOBAL_MEM %s*' % self._class_name) +elwise_c_pybind = ''' + +PYBIND11_MODULE(${modname}, m) { + + m.def("${modname}", [](${pyb11_args}){ + return ${name}(${pyb11_call}); + }); +} + +''' + +elwise_c_template = ''' + +void ${name}(${arguments}){ + %if openmp: + #pragma omp parallel for + %endif + for(size_t i = 0; i < SIZE; i++){ + ${operations}; + } +} + +''' + reduction_c_template = ''' template T combine(T a, T b){ @@ -131,10 +137,13 @@ def _get_self_type(self): } return ans; } +''' + +reduction_c_pybind = ''' PYBIND11_MODULE(${name}, m) { - m.def("${name}", [](long n, ${type} initial${pyb_args}){ - return reduce_all(n, initial${pyb_call}); + m.def("${name}", [](long n${pyb_args}){ + return reduce_all(n, (${type})${neutral}${pyb_call}); }); } @@ -168,19 +177,19 @@ def _get_self_type(self): T a, b, temp; temp = initial_val; - for (int i = 0; i < N - 1; i++){ + for (int i = 0; i < N; i++){ a = temp; b = ary[i]; out[i] = temp; temp = combine(a, b); } - out[N - 1] = temp; + out[N] = temp; } } template -void incl_scan( T* ary, int offset, int cur_buf_size, int N, T initial_val${args_extra}){ +void incl_scan( T* ary, int offset, int cur_buf_size, int N, T initial_val, T last_item${args_extra}){ if (N > 0){ T a, b, carry, prev_item, item; carry = initial_val; @@ -207,7 +216,7 @@ def _get_self_type(self): int ntiles = 1; %endif T* stage1_res = new T[ntiles]; - T* stage2_res = new T[ntiles]; + T* stage2_res = new T[ntiles + 1]; #pragma omp parallel { @@ -229,19 +238,19 @@ def _get_self_type(self): #pragma omp single excl_scan_wo_ip_exp(stage1_res, stage2_res, ntiles, initial_val); - incl_scan(ary, cur_start_idx, cur_tile_size, N, stage2_res[itile]${call_extra}); + incl_scan(ary, cur_start_idx, cur_tile_size, N, stage2_res[itile], stage2_res[ntiles]${call_extra}); } delete[] stage1_res; delete[] stage2_res; py::print(ary); } } - - +''' +scan_c_pybind = ''' PYBIND11_MODULE(${name}, m) { - m.def("${name}", [](py::array_t<${type}> x, long n, ${type} initial${pyb_args}){ - return scan((${type}*) x.request().ptr, n, initial${pyb_call}); + m.def("${name}", [](py::array_t<${type}> x, long n${pyb_args}){ + return scan((${type}*) x.request().ptr, n, (${type})${neutral}${pyb_call}); }); } ''' diff --git a/compyle/cimport.py b/compyle/cimport.py index d385647..513f0a2 100644 --- a/compyle/cimport.py +++ b/compyle/cimport.py @@ -1,35 +1,27 @@ import os -import hashlib -import json from stat import S_ISREG -import struct import io import importlib -import logging import shutil import sys +from filelock import FileLock, Timeout from os.path import exists, expanduser, isdir, join -from distutils.sysconfig import get_config_vars, customize_compiler -from distutils.util import get_platform - +import pybind11 from distutils.extension import Extension from distutils.command import build_ext from distutils.core import setup -from distutils.errors import CompileError, LinkError -from distutils.ccompiler import new_compiler, get_default_compiler -from webbrowser import get from .ext_module import get_platform_dir, get_md5, get_ext_extension from .capture_stream import CaptureMultipleStreams # noqa: 402 class Cmodule: - def __init__(self, name, src, root=None, verbose=False, extra_inc_dir=[], extra_link_args=[], extra_compile_args=[]): - self.name = name + def __init__(self, src, hash_fn, root=None, verbose=False, extra_inc_dir=[pybind11.get_include()], extra_link_args=[], extra_compile_args=[]): self.src = src - self.hash = get_md5(src) + self.hash = hash_fn + self.name = f'm_{self.hash}' self.verbose = verbose self.extra_inc_dir = extra_inc_dir self.extra_link_args = extra_link_args @@ -37,6 +29,7 @@ def __init__(self, name, src, root=None, verbose=False, extra_inc_dir=[], extra_ self._setup_root(root) self._setup_filenames() + self.lock = FileLock(self.lock_path, timeout=120) def _setup_root(self, root): if root is None: @@ -59,21 +52,20 @@ def _write_source(self): f.write(self.src) def _setup_filenames(self): - base = 'm_' + self.hash - self.src_path = join(self.root, base + '.cpp') + self.src_path = join(self.root, self.name + '.cpp') self.ext_path = join(self.root, self.name + get_ext_extension()) + self.lock_path = join(self.root, self.name + '.lock') def is_build_needed(self): - return True + return not exists(self.ext_path) def build(self): - if self.is_build_needed: - ext = Extension(name=self.name, - sources=[self.src_path], - language='c++', - include_dirs=self.extra_inc_dir, - extra_link_args=self.extra_link_args, - extra_compile_args=self.extra_compile_args) + ext = Extension(name=self.name, + sources=[self.src_path], + language='c++', + include_dirs=self.extra_inc_dir, + extra_link_args=self.extra_link_args, + extra_compile_args=self.extra_compile_args) args = [ "build_ext", "--build-lib=" + self.build_dir, @@ -99,14 +91,14 @@ def build(self): msg = "Compilation of code failed, please check "\ "error messages above." print(hline + "\n" + msg) - os.remove(self.src_path) sys.exit(1) def write_and_build(self): """Write source and build the extension module""" - if not (exists(self.src_path) and exists(self.ext_path)): - self._write_source() - self.build() + if self.is_build_needed(): + with self.lock: + self._write_source() + self.build() else: self._message("Precompiled code from:", self.src_path) diff --git a/compyle/jit.py b/compyle/jit.py index 261d54f..bf0ee4f 100644 --- a/compyle/jit.py +++ b/compyle/jit.py @@ -450,6 +450,10 @@ def __call__(self, *args, **kw): event.record() event.synchronize() return result.get() + elif self.backend == 'c': + size = len(c_args[0]) + c_args.insert(0, size) + return c_func(*c_args) class ScanJIT(parallel.ScanBase): @@ -572,5 +576,4 @@ def __call__(self, **kwargs): elif self.backend == 'c': size = len(c_args_dict[output_arg_keys[0]]) c_args_dict['N'] = size - c_args_dict['neutral'] = json.loads(self.neutral) c_func(*[c_args_dict[k] for k in output_arg_keys]) diff --git a/compyle/parallel.py b/compyle/parallel.py index 052d2b3..7b2bf4c 100644 --- a/compyle/parallel.py +++ b/compyle/parallel.py @@ -16,6 +16,7 @@ from mako.template import Template import numpy as np +import py import pybind11 from pyopencl.array import arange @@ -24,29 +25,11 @@ from .profile import profile from .cython_generator import get_parallel_range, CythonGenerator from .transpiler import Transpiler, convert_to_float_if_needed -from .types import dtype_to_ctype +from .types import TYPES, annotate, dtype_to_ctype +from .ext_module import get_md5 from . import array -pyb11_bind_elwise = ''' -PYBIND11_MODULE(${name}, m) { - - m.def("${name}", [](${pyb11_args}){ - return elwise_${name}(${pyb11_call}); - }); -} -''' - -elementwise_pyb11_template = ''' -void ${name}(${arguments}){ - %if openmp: - #pragma omp parallel for - %endif - for(size_t i = 0; i < SIZE; i++){ - ${operations}; - } -} -''' elementwise_cy_template = ''' from cython.parallel import parallel, prange @@ -547,7 +530,7 @@ def _generate(self, declarations=None): openmp = self._config.use_openmp - templete_elwise = Template(elementwise_pyb11_template) + templete_elwise = Template(c_backend.elwise_c_template) src_elwise = templete_elwise.render( name=self.name, arguments=arguments, @@ -555,23 +538,28 @@ def _generate(self, declarations=None): operations=expr ) - template = Template(pyb11_bind_elwise) + self.source = self.tp.get_code() + if openmp: + self.source = '#include \n' + self.source + self.all_source = self.source + '\n' + src_elwise + hash_fn = get_md5(self.all_source) + modname = f'm_{hash_fn}' + + template = Template(c_backend.elwise_c_pybind) src_bind = template.render( - name=name, + name=self.name, + modname=modname, pyb11_args=pyb11_args, pyb11_call=pyb11_call ) - self.source = self.tp.get_code() - if openmp: - self.source = '#include \n' + self.source - self.all_source = self.source + '\n' + src_elwise + '\n' + src_bind + self.all_source += src_bind extra_comp_args = ["-fopenmp", "-fPIC"] if openmp else [] - mod = Cmodule(name, self.all_source, extra_inc_dir=[pybind11.get_include( + mod = Cmodule(self.all_source, hash_fn, extra_inc_dir=[pybind11.get_include( )], extra_compile_args=extra_comp_args, extra_link_args=extra_comp_args) module = mod.load() - return getattr(module, name) + return getattr(module, modname) def _correct_opencl_address_space(self, c_data): code = self.tp.blocks[-1].code.splitlines() @@ -736,6 +724,7 @@ def _generate(self, declarations=None): elif self.backend == 'c': self.pyb11_backend = c_backend.CBackend() if self.func is not None: + self.func.__annotations__['return'] = TYPES[self.type] self.tp.add(self.func, declarations=declarations) pyb_data, c_data = self.pyb11_backend.get_func_signature_pyb11( self.func) @@ -775,17 +764,27 @@ def _generate(self, declarations=None): red_expr=self.reduce_expr, name=self.name, type=self.type, - pyb_args=pyb_args_extra_str, - pyb_call=pyb_call_extra_str, openmp=openmp ) self.all_source = self.source + src_red + hash_fn = get_md5(self.all_source) + modname = f'm_{hash_fn}' + + template_pybind = Template(c_backend.reduction_c_pybind) + src_pybind = template_pybind.render( + name=modname, + type=self.type, + pyb_args=pyb_args_extra_str, + pyb_call=pyb_call_extra_str, + neutral=self.neutral, + ) + self.all_source += src_pybind extra_comp_args = ["-fopenmp", "-fPIC"] if openmp else [] - mod = Cmodule(self.name, self.all_source, extra_inc_dir=[pybind11.get_include( + mod = Cmodule(self.all_source, hash_fn, extra_inc_dir=[pybind11.get_include( )], extra_compile_args=extra_comp_args, extra_link_args=extra_comp_args) module = mod.load() - return getattr(module, self.name) + return getattr(module, modname) elif self.backend == 'opencl': if self.func is not None: @@ -942,10 +941,8 @@ def __call__(self, *args): return result.get() elif self.backend == 'c': size = len(c_args[0]) - c_args.insert(0, json.loads(self.neutral)) c_args.insert(0, size) return self.c_func(*c_args) - pass class Reduction(object): @@ -1187,19 +1184,25 @@ def _generate_cython_code(self, declarations=None): def _generate_c_code(self, declarations=None): self.pyb11_backend = c_backend.CBackend() + if not self.input_func: + @annotate(i='int', ary=f'{self.type}p', return_=f'{self.type}') + def input_expr(i, ary): + return ary[i] + self.input_func = input_expr + self.tp.add(self.input_func, declarations=declarations) + pyb_data_in, c_data_in = self.pyb11_backend.get_func_signature_pyb11( + self.input_func) self.tp.add(self.output_func, declarations=declarations) self.source = self.tp.get_code() openmp = self._config.use_openmp if openmp: self.source = '#include \n' + self.source - pyb_data_in, c_data_in = self.pyb11_backend.get_func_signature_pyb11( - self.input_func) c_call_in = c_data_in[1] pyb_data_out, c_data_out = self.pyb11_backend.get_func_signature_pyb11( self.output_func) c_call_out = c_data_out[1] - c_call_default = ['ary', 'N', 'neutral'] + c_call_default = ['ary', 'N'] c_internal_var = ['item', 'prev_item', 'last_item'] predefined_vars = c_call_default + c_internal_var @@ -1260,12 +1263,25 @@ def _generate_c_code(self, declarations=None): openmp=openmp ) self.all_source = self.source + src_scan + hash_fn = get_md5(self.all_source) + modname = f'm_{hash_fn}' + + pybind_template = Template(c_backend.scan_c_pybind) + src_pybind = pybind_template.render( + name=modname, + type=self.type, + pyb_args=pyb_args_extra_str, + pyb_call=pyb_call_extra_str, + neutral=self.neutral + ) + + self.all_source += src_pybind extra_comp_args = ["-fopenmp", "-fPIC"] if openmp else [] - mod = Cmodule(self.name, self.all_source, extra_inc_dir=[pybind11.get_include( + mod = Cmodule(self.all_source, hash_fn, extra_inc_dir=[pybind11.get_include( )], extra_compile_args=extra_comp_args, extra_link_args=extra_comp_args) module = mod.load() - return getattr(module, self.name) + return getattr(module, modname) def _wrap_ocl_function(self, func, func_type=None, declarations=None): if func is not None: @@ -1452,7 +1468,6 @@ def __call__(self, **kwargs): elif self.backend == 'c': size = len(c_args_dict[output_arg_keys[0]]) c_args_dict['N'] = size - c_args_dict['neutral'] = json.loads(self.neutral) self.c_func(*[c_args_dict[k] for k in output_arg_keys]) diff --git a/compyle/tests/test_c_backend.py b/compyle/tests/test_c_backend.py new file mode 100644 index 0000000..901fc86 --- /dev/null +++ b/compyle/tests/test_c_backend.py @@ -0,0 +1,27 @@ +import unittest +from unittest import TestCase +from ..c_backend import CBackend +from ..types import annotate +import numpy as np + +class TestCBackend(TestCase): + def test_get_func_signature(self): + cbackend = CBackend() + @annotate(x='int', y='intp', z='int', w='double') + def test_fn(x, y, z=2, w=3.0): + return x+y+z+w + (pyb11_args, pyb11_call), (c_args, c_call) = cbackend.get_func_signature(test_fn) + exp_pyb11_args = ['int x', 'int[:] y', 'int z', 'double w'] + exp_pyb11_call = ['x', '&y[0]', 'z', 'w'] + exp_c_args = ['int x', 'int* y', 'int z', 'double w'] + exp_c_call = ['x', 'y', 'z', 'w'] + + self.assertListEqual(pyb11_args, exp_pyb11_args) + self.assertListEqual(pyb11_call, exp_pyb11_call) + self.assertListEqual(c_args, exp_c_args) + self.assertListEqual(c_call, exp_c_call) + + +if __name__ == '__main__': + unittest.main() + \ No newline at end of file diff --git a/compyle/tests/test_cimport.py b/compyle/tests/test_cimport.py new file mode 100644 index 0000000..75b490f --- /dev/null +++ b/compyle/tests/test_cimport.py @@ -0,0 +1,64 @@ +from genericpath import exists +from ntpath import join +import tempfile +import unittest +from unittest import TestCase +import numpy as np +from os.path import exists, expanduser, isdir, join +import sys +import os +from mako.template import Template + + +from compyle.cimport import Cmodule +from compyle.types import annotate +from compyle.ext_module import get_platform_dir, get_md5, get_ext_extension + +dummy_module = ''' +#include +#include +namespace py = pybind11; + +void f(int n, int* x, int* y) +{ + for(int i = 0; i < n; i++){ + y[i] = (2 * x[i]); + } +} +''' +pybind = """ + +PYBIND11_MODULE(${name}, m) { + + m.def("${name}", [](py::array_t x, py::array_t y){ + return f(x.request().size, (int*)x.request().ptr, (int*)y.request().ptr); + }); +} +""" + + +class TestCmodule(TestCase): + def setUp(self): + self.root = tempfile.mkdtemp() + + def test_build(self): + hash_fn = get_md5(dummy_module) + name = f'm_{hash_fn}' + pyb_template = Template(pybind) + src_pybind = pyb_template.render(name=name) + + all_src = dummy_module + src_pybind + mod = Cmodule(all_src, hash_fn=hash_fn, root=self.root) + checksum = get_md5(dummy_module) + self.assertTrue(mod.is_build_needed()) + + mod.load() + self.assertTrue(exists(join(self.root, 'build'))) + self.assertTrue(exists(join(self.root, 'm_' + checksum + '.cpp'))) + self.assertTrue( + exists(join(self.root, f'{name}' + get_ext_extension()))) + self.assertFalse(mod.is_build_needed()) + + +if __name__ == '__main__': + unittest.main() diff --git a/compyle/tests/test_parallel.py b/compyle/tests/test_parallel.py index 8fed025..1601a26 100644 --- a/compyle/tests/test_parallel.py +++ b/compyle/tests/test_parallel.py @@ -19,6 +19,65 @@ def external(x): return x +class ParallelUtilsBaseC(object): + def test_elementwise_works_with_c(self): + self._check_simple_elementwise(backend='c') + + def test_elementwise_works_with_global_constant_c(self): + self._check_elementwise_with_constant(backend='c') + + def test_reduction_works_without_map_c(self): + self._check_simple_reduction(backend='c') + + def test_reduction_works_with_map_c(self): + self._check_reduction_with_map(backend='c') + + def test_reduction_works_with_external_func_c(self): + self._check_reduction_with_external_func(backend='c') + + def test_reduction_works_neutral_c(self): + self._check_reduction_min(backend='c') + + def test_scan_works_c(self): + self._test_scan(backend='c') + + def test_scan_works_c_parallel(self): + with use_config(use_openmp=True): + self._test_scan(backend='c') + + def test_large_scan_works_c_parallel(self): + with use_config(use_openmp=True): + self._test_large_scan(backend='c') + + def test_scan_works_with_external_func_c(self): + self._test_scan_with_external_func(backend='c') + + def test_scan_works_with_external_func_c_parallel(self): + with use_config(use_openmp=True): + self._test_scan_with_external_func(backend='c') + + def test_scan_last_item_c_parallel(self): + with use_config(use_openmp=True): + self._test_scan_last_item(backend='c') + + def test_scan_last_item_c_serial(self): + self._test_scan_last_item(backend='c') + + def test_unique_scan_c(self): + self._test_unique_scan(backend='c') + + def test_unique_scan_c_parallel(self): + with use_config(use_openmp=True): + self._test_unique_scan(backend='c') + + def test_repeated_scans_with_different_settings_c(self): + with use_config(use_openmp=False): + self._test_unique_scan(backend='c') + + with use_config(use_openmp=True): + self._test_unique_scan(backend='c') + + class ParallelUtilsBase(object): def test_elementwise_works_with_cython(self): self._check_simple_elementwise(backend='cython') @@ -221,7 +280,7 @@ def test_repeated_scans_with_different_settings(self): self._test_unique_scan(backend='cython') -class TestParallelUtils(ParallelUtilsBase, unittest.TestCase): +class TestParallelUtils(ParallelUtilsBase, ParallelUtilsBaseC, unittest.TestCase): def setUp(self): cfg = get_config() self._use_double = cfg.use_double diff --git a/compyle/transpiler.py b/compyle/transpiler.py index 8549c7a..2fd709f 100644 --- a/compyle/transpiler.py +++ b/compyle/transpiler.py @@ -194,6 +194,7 @@ def __init__(self, backend='cython', incl_cluda=True): #include #include namespace py = pybind11; + using namespace std; ''') def _handle_symbol(self, name, value): @@ -224,6 +225,8 @@ def _handle_symbol(self, name, value): return '#define {name} {value}'.format( name=name, value=value ) + elif self.backend == 'c': + return f"{ctype} {name} = {value};" def _get_comment(self): return '#' if self.backend == 'cython' else '//' @@ -291,7 +294,6 @@ def add(self, obj, declarations=None): obj, declarations=declarations.get(obj.__name__) if declarations else None) - cb = CodeBlock(obj, code) self.blocks.append(cb) From 333b37a87f8ca43af26a2c462f3416cf1f1807f6 Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Thu, 17 Mar 2022 20:20:12 +0530 Subject: [PATCH 05/16] pep8 formatting --- compyle/array.py | 1 + compyle/c_backend.py | 46 +++++++++++++++++++++++++------------------- compyle/cimport.py | 9 +++++---- compyle/parallel.py | 45 ++++++++++++++++++++++++++++--------------- 4 files changed, 62 insertions(+), 39 deletions(-) diff --git a/compyle/array.py b/compyle/array.py index bfab231..12acd4f 100644 --- a/compyle/array.py +++ b/compyle/array.py @@ -526,6 +526,7 @@ def trapz(y, x=None, dx=1.0, backend=None): out = dot(d, sum_ar) * 0.5 return out + @annotate def where_elwise(i, condition, x, y, ans): if condition[i]: diff --git a/compyle/c_backend.py b/compyle/c_backend.py index 2d9b7b9..f7b09c6 100644 --- a/compyle/c_backend.py +++ b/compyle/c_backend.py @@ -1,8 +1,6 @@ -from textwrap import dedent -from .translator import CConverter -from mako.template import Template -from .translator import ocl_detect_type, OpenCLConverter, KnownType -from .cython_generator import CythonGenerator, get_func_definition, getsourcelines +from .translator import ocl_detect_type, KnownType +from .cython_generator import CythonGenerator, get_func_definition +from .cython_generator import getsourcelines class CBackend(CythonGenerator): @@ -27,7 +25,8 @@ def get_func_signature_pyb11(self, func): pyb11_args.append('{type} {arg}'.format(type=pyb11_type, arg=arg)) if c_type.endswith('*'): pyb11_call.append( - '({ctype}){arg}.request().ptr'.format(arg=arg, ctype=c_type)) + '({ctype}){arg}.request().ptr' + .format(arg=arg, ctype=c_type)) else: pyb11_call.append('{arg}'.format(arg=arg)) @@ -46,7 +45,7 @@ def _get_self_type(self): elwise_c_pybind = ''' PYBIND11_MODULE(${modname}, m) { - + m.def("${modname}", [](${pyb11_args}){ return ${name}(${pyb11_call}); }); @@ -61,7 +60,7 @@ def _get_self_type(self): #pragma omp parallel for %endif for(size_t i = 0; i < SIZE; i++){ - ${operations}; + ${operations}; } } @@ -123,11 +122,12 @@ def _get_self_type(self): %endif int last_tile = ntiles - 1; int tile_size = (N / ntiles); - int last_tile_size = N - tile_size * last_tile; - int cur_tile_size = itile == ntiles - 1 ? last_tile_size : tile_size; + int last_tile_sz = N - tile_size * last_tile; + int cur_tile_size = itile == ntiles - 1 ? last_tile_sz : tile_size; int cur_start_idx = itile * tile_size; - stage1_res[itile] = reduce(cur_start_idx, cur_tile_size, initial_val${call_extra}); + stage1_res[itile] = reduce(cur_start_idx, cur_tile_size, + initial_val${call_extra}); #pragma omp barrier #pragma omp single @@ -176,7 +176,7 @@ def _get_self_type(self): if (N > 0){ T a, b, temp; temp = initial_val; - + for (int i = 0; i < N; i++){ a = temp; b = ary[i]; @@ -189,7 +189,9 @@ def _get_self_type(self): template -void incl_scan( T* ary, int offset, int cur_buf_size, int N, T initial_val, T last_item${args_extra}){ +void incl_scan( T* ary, int offset, int cur_buf_size, int N, + T initial_val, T last_item${args_extra}) +{ if (N > 0){ T a, b, carry, prev_item, item; carry = initial_val; @@ -200,7 +202,7 @@ def _get_self_type(self): prev_item = carry; carry = combine(a, b); item = carry; - + ${scan_output_expr_call}; } } @@ -228,17 +230,20 @@ def _get_self_type(self): %endif int last_tile = ntiles - 1; int tile_size = (N / ntiles); - int last_tile_size = N - tile_size * last_tile; - int cur_tile_size = itile == ntiles - 1 ? last_tile_size : tile_size; + int last_tile_sz = N - tile_size * last_tile; + int cur_tile_size = itile == ntiles - 1 ? last_tile_sz : tile_size; int cur_start_idx = itile * tile_size; - stage1_res[itile] = reduce(ary, cur_start_idx, cur_tile_size, initial_val${call_in_extra}); + stage1_res[itile] = reduce(ary, cur_start_idx, cur_tile_size, + initial_val${call_in_extra}); #pragma omp barrier #pragma omp single - excl_scan_wo_ip_exp(stage1_res, stage2_res, ntiles, initial_val); + excl_scan_wo_ip_exp(stage1_res, stage2_res, + ntiles, initial_val); - incl_scan(ary, cur_start_idx, cur_tile_size, N, stage2_res[itile], stage2_res[ntiles]${call_extra}); + incl_scan(ary, cur_start_idx, cur_tile_size, N, + stage2_res[itile],stage2_res[ntiles]${call_extra}); } delete[] stage1_res; delete[] stage2_res; @@ -250,7 +255,8 @@ def _get_self_type(self): PYBIND11_MODULE(${name}, m) { m.def("${name}", [](py::array_t<${type}> x, long n${pyb_args}){ - return scan((${type}*) x.request().ptr, n, (${type})${neutral}${pyb_call}); + return scan((${type}*) x.request().ptr, n, + (${type})${neutral}${pyb_call}); }); } ''' diff --git a/compyle/cimport.py b/compyle/cimport.py index 513f0a2..475c796 100644 --- a/compyle/cimport.py +++ b/compyle/cimport.py @@ -1,10 +1,9 @@ import os -from stat import S_ISREG import io import importlib import shutil import sys -from filelock import FileLock, Timeout +from filelock import FileLock from os.path import exists, expanduser, isdir, join @@ -13,12 +12,14 @@ from distutils.command import build_ext from distutils.core import setup -from .ext_module import get_platform_dir, get_md5, get_ext_extension +from .ext_module import get_platform_dir, get_ext_extension from .capture_stream import CaptureMultipleStreams # noqa: 402 class Cmodule: - def __init__(self, src, hash_fn, root=None, verbose=False, extra_inc_dir=[pybind11.get_include()], extra_link_args=[], extra_compile_args=[]): + def __init__(self, src, hash_fn, root=None, verbose=False, + extra_inc_dir=[pybind11.get_include()], + extra_link_args=[], extra_compile_args=[]): self.src = src self.hash = hash_fn self.name = f'm_{self.hash}' diff --git a/compyle/parallel.py b/compyle/parallel.py index 7b2bf4c..2f1dc99 100644 --- a/compyle/parallel.py +++ b/compyle/parallel.py @@ -556,8 +556,10 @@ def _generate(self, declarations=None): self.all_source += src_bind extra_comp_args = ["-fopenmp", "-fPIC"] if openmp else [] - mod = Cmodule(self.all_source, hash_fn, extra_inc_dir=[pybind11.get_include( - )], extra_compile_args=extra_comp_args, extra_link_args=extra_comp_args) + mod = Cmodule(self.all_source, hash_fn, + extra_inc_dir=[pybind11.get_include()], + extra_compile_args=extra_comp_args, + extra_link_args=extra_comp_args) module = mod.load() return getattr(module, modname) @@ -748,7 +750,8 @@ def _generate(self, declarations=None): else: c_args_extra_str = f", {self.type + '*'} in" c_call_extra_str = ", in" - pyb_args_extra_str = f", {self.pyb11_backend.ctype_to_pyb11(self.type + '*')} in" + arg_typ = self.pyb11_backend.ctype_to_pyb11(self.type + '*') + pyb_args_extra_str = f", {arg_typ} in" pyb_call_extra_str = f", ({self.type}*) in.request().ptr" map_expr = "in[i]" self.source = self.tp.get_code() @@ -781,8 +784,10 @@ def _generate(self, declarations=None): self.all_source += src_pybind extra_comp_args = ["-fopenmp", "-fPIC"] if openmp else [] - mod = Cmodule(self.all_source, hash_fn, extra_inc_dir=[pybind11.get_include( - )], extra_compile_args=extra_comp_args, extra_link_args=extra_comp_args) + mod = Cmodule(self.all_source, hash_fn, + extra_inc_dir=[pybind11.get_include()], + extra_compile_args=extra_comp_args, + extra_link_args=extra_comp_args) module = mod.load() return getattr(module, modname) @@ -1223,8 +1228,10 @@ def input_expr(i, ary): pyb_args_extra[0].append(pyb_data_out[0][i + 1]) pyb_args_extra[1].append(pyb_data_out[1][i + 1]) - c_args_in_extra_str = f", {','.join(c_args_in_extra[0])}" if c_args_in_extra[1] else "" - c_call_in_extra_str = f", {','.join(c_args_in_extra[1])}" if c_args_in_extra[1] else "" + c0 = c_args_in_extra[0] + c1 = c_args_in_extra[1] + c_args_in_extra_str = f", {','.join(c0)}" if c1 else "" + c_call_in_extra_str = f", {','.join(c1)}" if c1 else "" c_args_extra = c_args_out_extra.copy() for i, var in enumerate(c_args_in_extra[1]): @@ -1239,13 +1246,19 @@ def input_expr(i, ary): self.output_func.arg_keys[self._get_backend_key( )] = c_call_default + c_args_extra[1] - c_args_extra_str = f", {', '.join(c_args_extra[0])}" if c_args_extra[1] else "" - c_call_extra_str = f", {', '.join(c_args_extra[1])}" if c_args_extra[1] else "" - pyb_args_extra_str = f", {', '.join(pyb_args_extra[0])}" if pyb_args_extra[1] else "" - pyb_call_extra_str = f", {', '.join(pyb_args_extra[1])}" if pyb_args_extra[1] else "" + c0 = c_args_extra[0] + c1 = c_args_extra[1] + p0 = pyb_args_extra[0] + p1 = pyb_args_extra[1] + c_args_extra_str = f", {', '.join(c0)}" if c1 else "" + c_call_extra_str = f", {', '.join(c1)}" if c1 else "" + pyb_args_extra_str = f", {', '.join(p0)}" if p1 else "" + pyb_call_extra_str = f", {', '.join(p1)}" if p1 else "" - c_call_in_str = f"{self.input_func.__name__}({', '.join(c_call_in)})" - c_call_out_str = f"{self.output_func.__name__}({', '.join(c_call_out)})" + ip_fname = self.input_func.__name__ + op_fname = self.output_func.__name__ + c_call_in_str = f"{ip_fname}({', '.join(c_call_in)})" + c_call_out_str = f"{op_fname}({', '.join(c_call_out)})" template_scan = Template(c_backend.scan_c_template) src_scan = template_scan.render( @@ -1278,8 +1291,10 @@ def input_expr(i, ary): self.all_source += src_pybind extra_comp_args = ["-fopenmp", "-fPIC"] if openmp else [] - mod = Cmodule(self.all_source, hash_fn, extra_inc_dir=[pybind11.get_include( - )], extra_compile_args=extra_comp_args, extra_link_args=extra_comp_args) + mod = Cmodule(self.all_source, hash_fn, + extra_inc_dir=[pybind11.get_include()], + extra_compile_args=extra_comp_args, + extra_link_args=extra_comp_args) module = mod.load() return getattr(module, modname) From 7ac3bb6ad427c28979c3068f76cdbf791f41aa71 Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Thu, 17 Mar 2022 22:00:55 +0530 Subject: [PATCH 06/16] pep8 modifications --- compyle/cimport.py | 3 ++- compyle/tests/test_c_backend.py | 10 ++++++---- compyle/tests/test_cimport.py | 3 ++- compyle/tests/test_parallel.py | 4 +++- compyle/transpiler.py | 4 ++-- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/compyle/cimport.py b/compyle/cimport.py index 475c796..53d54dc 100644 --- a/compyle/cimport.py +++ b/compyle/cimport.py @@ -11,6 +11,7 @@ from distutils.extension import Extension from distutils.command import build_ext from distutils.core import setup +from distutils.errors import CompileError, LinkError from .ext_module import get_platform_dir, get_ext_extension from .capture_stream import CaptureMultipleStreams # noqa: 402 @@ -83,7 +84,7 @@ def build(self): shutil.move(join(self.build_dir, self.name + get_ext_extension()), self.ext_path) - except: + except(CompileError, LinkError): hline = "*"*80 print(hline + "\nERROR") s_out = stream.get_output() diff --git a/compyle/tests/test_c_backend.py b/compyle/tests/test_c_backend.py index 901fc86..06f68fc 100644 --- a/compyle/tests/test_c_backend.py +++ b/compyle/tests/test_c_backend.py @@ -4,24 +4,26 @@ from ..types import annotate import numpy as np + class TestCBackend(TestCase): def test_get_func_signature(self): cbackend = CBackend() + @annotate(x='int', y='intp', z='int', w='double') def test_fn(x, y, z=2, w=3.0): return x+y+z+w - (pyb11_args, pyb11_call), (c_args, c_call) = cbackend.get_func_signature(test_fn) + temp = cbackend.get_func_signature(test_fn) + (pyb11_args, pyb11_call), (c_args, c_call) = temp exp_pyb11_args = ['int x', 'int[:] y', 'int z', 'double w'] exp_pyb11_call = ['x', '&y[0]', 'z', 'w'] exp_c_args = ['int x', 'int* y', 'int z', 'double w'] exp_c_call = ['x', 'y', 'z', 'w'] - + self.assertListEqual(pyb11_args, exp_pyb11_args) self.assertListEqual(pyb11_call, exp_pyb11_call) self.assertListEqual(c_args, exp_c_args) self.assertListEqual(c_call, exp_c_call) - + if __name__ == '__main__': unittest.main() - \ No newline at end of file diff --git a/compyle/tests/test_cimport.py b/compyle/tests/test_cimport.py index 75b490f..9d0d5b9 100644 --- a/compyle/tests/test_cimport.py +++ b/compyle/tests/test_cimport.py @@ -31,7 +31,8 @@ PYBIND11_MODULE(${name}, m) { m.def("${name}", [](py::array_t x, py::array_t y){ - return f(x.request().size, (int*)x.request().ptr, (int*)y.request().ptr); + return f(x.request().size, (int*)x.request().ptr, + (int*)y.request().ptr); }); } """ diff --git a/compyle/tests/test_parallel.py b/compyle/tests/test_parallel.py index 1601a26..ce1667a 100644 --- a/compyle/tests/test_parallel.py +++ b/compyle/tests/test_parallel.py @@ -280,7 +280,9 @@ def test_repeated_scans_with_different_settings(self): self._test_unique_scan(backend='cython') -class TestParallelUtils(ParallelUtilsBase, ParallelUtilsBaseC, unittest.TestCase): +class TestParallelUtils(ParallelUtilsBase, + ParallelUtilsBaseC, + unittest.TestCase): def setUp(self): cfg = get_config() self._use_double = cfg.use_double diff --git a/compyle/transpiler.py b/compyle/transpiler.py index 2fd709f..bc59619 100644 --- a/compyle/transpiler.py +++ b/compyle/transpiler.py @@ -194,8 +194,8 @@ def __init__(self, backend='cython', incl_cluda=True): #include #include namespace py = pybind11; - using namespace std; - ''') + using namespace std; + ''') def _handle_symbol(self, name, value): backend = self.backend From 76e036118e7b4bfae40a52273977fe24ccbb014f Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Thu, 17 Mar 2022 22:06:22 +0530 Subject: [PATCH 07/16] updated requirements --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index db7fea3..ae429d5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,5 @@ pytools cython numpy pytest +pybind11 +filelock \ No newline at end of file From 19a06d71f23c2bf621da7d9e77f91eab30172486 Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Fri, 18 Mar 2022 19:54:54 +0530 Subject: [PATCH 08/16] remove unused imports --- compyle/array.py | 6 +----- compyle/parallel.py | 6 ------ 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/compyle/array.py b/compyle/array.py index 12acd4f..496eda8 100644 --- a/compyle/array.py +++ b/compyle/array.py @@ -1,11 +1,10 @@ import numpy as np import math import mako.template as mkt -import time from pytools import memoize, memoize_method from .config import get_config -from .types import (annotate, dtype_to_ctype, ctype_to_dtype, declare, +from .types import (annotate, dtype_to_ctype, declare, dtype_to_knowntype, knowntype_to_ctype) from .template import Template from .sort import radix_sort @@ -394,7 +393,6 @@ def linspace(start, stop, num, dtype=np.float64, backend='opencl', out = out * delta+start elif backend == 'cuda': import pycuda.gpuarray as gpuarray - import pycuda.autoinit if endpoint: delta = (stop-start)/(num-1) else: @@ -445,7 +443,6 @@ def diff(a, n, backend=None): backend = a.backend if backend == 'opencl' or backend == 'cuda': - from compyle.api import Elementwise binom_coeff = np.zeros(n+1) sign_fac = 1 if (n % 2 == 0) else -1 for i in range(n+1): @@ -873,7 +870,6 @@ def comparison_kernel(func, backend, ary_type, other_type): def comparison_template(func, other, arr, backend=None): if backend is None: backend = arr.backend - from compyle.parallel import Elementwise other_type = dtype_to_ctype(type(other)) ary_type = dtype_to_ctype(arr.dtype) + 'p' ans = empty(arr.length, dtype=np.int32, backend=arr.backend) diff --git a/compyle/parallel.py b/compyle/parallel.py index 2f1dc99..6ce3b33 100644 --- a/compyle/parallel.py +++ b/compyle/parallel.py @@ -8,17 +8,11 @@ from compyle import c_backend from functools import wraps -from inspect import getmodule -import operator -from re import TEMPLATE from textwrap import wrap -import json from mako.template import Template import numpy as np -import py import pybind11 -from pyopencl.array import arange from .cimport import Cmodule from .config import get_config From eca10d8c592d52dfd97e1025d53d2d822b24486d Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Wed, 30 Mar 2022 11:17:19 +0530 Subject: [PATCH 09/16] windows support --- compyle/c_backend.py | 3 ++- compyle/cimport.py | 12 ++++++++++-- compyle/parallel.py | 21 ++++++--------------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/compyle/c_backend.py b/compyle/c_backend.py index f7b09c6..9ac6a04 100644 --- a/compyle/c_backend.py +++ b/compyle/c_backend.py @@ -219,8 +219,9 @@ def _get_self_type(self): %endif T* stage1_res = new T[ntiles]; T* stage2_res = new T[ntiles + 1]; - + %if openmp: #pragma omp parallel + %endif { // Step 1 - reducing each tile %if openmp: diff --git a/compyle/cimport.py b/compyle/cimport.py index 53d54dc..02ddbf6 100644 --- a/compyle/cimport.py +++ b/compyle/cimport.py @@ -13,18 +13,19 @@ from distutils.core import setup from distutils.errors import CompileError, LinkError -from .ext_module import get_platform_dir, get_ext_extension +from .ext_module import get_platform_dir, get_ext_extension, get_openmp_flags from .capture_stream import CaptureMultipleStreams # noqa: 402 class Cmodule: - def __init__(self, src, hash_fn, root=None, verbose=False, + def __init__(self, src, hash_fn, root=None, verbose=False, openmp=False, extra_inc_dir=[pybind11.get_include()], extra_link_args=[], extra_compile_args=[]): self.src = src self.hash = hash_fn self.name = f'm_{self.hash}' self.verbose = verbose + self.openmp = openmp self.extra_inc_dir = extra_inc_dir self.extra_link_args = extra_link_args self.extra_compile_args = extra_compile_args @@ -62,6 +63,7 @@ def is_build_needed(self): return not exists(self.ext_path) def build(self): + self._include_openmp() ext = Extension(name=self.name, sources=[self.src_path], language='c++', @@ -111,6 +113,12 @@ def load(self): spec.loader.exec_module(module) return module + def _include_openmp(self): + if self.openmp: + ec, el = get_openmp_flags() + self.extra_compile_args += ec + self.extra_link_args += el + def _message(self, *args): msg = ' '.join(args) if self.verbose: diff --git a/compyle/parallel.py b/compyle/parallel.py index 6ce3b33..e66a9ef 100644 --- a/compyle/parallel.py +++ b/compyle/parallel.py @@ -549,11 +549,8 @@ def _generate(self, declarations=None): self.all_source += src_bind - extra_comp_args = ["-fopenmp", "-fPIC"] if openmp else [] - mod = Cmodule(self.all_source, hash_fn, - extra_inc_dir=[pybind11.get_include()], - extra_compile_args=extra_comp_args, - extra_link_args=extra_comp_args) + mod = Cmodule(self.all_source, hash_fn, openmp=openmp, + extra_inc_dir=[pybind11.get_include()]) module = mod.load() return getattr(module, modname) @@ -777,11 +774,8 @@ def _generate(self, declarations=None): ) self.all_source += src_pybind - extra_comp_args = ["-fopenmp", "-fPIC"] if openmp else [] - mod = Cmodule(self.all_source, hash_fn, - extra_inc_dir=[pybind11.get_include()], - extra_compile_args=extra_comp_args, - extra_link_args=extra_comp_args) + mod = Cmodule(self.all_source, hash_fn, openmp=openmp, + extra_inc_dir=[pybind11.get_include()]) module = mod.load() return getattr(module, modname) @@ -1284,11 +1278,8 @@ def input_expr(i, ary): self.all_source += src_pybind - extra_comp_args = ["-fopenmp", "-fPIC"] if openmp else [] - mod = Cmodule(self.all_source, hash_fn, - extra_inc_dir=[pybind11.get_include()], - extra_compile_args=extra_comp_args, - extra_link_args=extra_comp_args) + mod = Cmodule(self.all_source, hash_fn, openmp=openmp, + extra_inc_dir=[pybind11.get_include()]) module = mod.load() return getattr(module, modname) From 758933523c0c98e749bf12ccc72192c8e94e5044 Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Wed, 30 Mar 2022 16:17:47 +0530 Subject: [PATCH 10/16] catch systemexit --- compyle/c_backend.py | 9 +++++++-- compyle/cimport.py | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/compyle/c_backend.py b/compyle/c_backend.py index 9ac6a04..0f27922 100644 --- a/compyle/c_backend.py +++ b/compyle/c_backend.py @@ -111,8 +111,9 @@ def _get_self_type(self): int ntiles = 1; %endif T* stage1_res = new T[ntiles]; - - #pragma omp parallel + %if openmp: + #pragma omp parallel for + %endif { // Step 1 - reducing each tile %if openmp: @@ -128,9 +129,11 @@ def _get_self_type(self): stage1_res[itile] = reduce(cur_start_idx, cur_tile_size, initial_val${call_extra}); + %if openmp: #pragma omp barrier #pragma omp single + %endif ans = reduce_one_ar(0, ntiles, initial_val, stage1_res); } delete[] stage1_res; @@ -237,9 +240,11 @@ def _get_self_type(self): stage1_res[itile] = reduce(ary, cur_start_idx, cur_tile_size, initial_val${call_in_extra}); + %if openmp: #pragma omp barrier #pragma omp single + %endif excl_scan_wo_ip_exp(stage1_res, stage2_res, ntiles, initial_val); diff --git a/compyle/cimport.py b/compyle/cimport.py index 02ddbf6..d435a62 100644 --- a/compyle/cimport.py +++ b/compyle/cimport.py @@ -86,7 +86,7 @@ def build(self): shutil.move(join(self.build_dir, self.name + get_ext_extension()), self.ext_path) - except(CompileError, LinkError): + except(CompileError, LinkError, SystemExit): hline = "*"*80 print(hline + "\nERROR") s_out = stream.get_output() From cfd66a89f0bc19be42b5bef7c96f53cb282885d8 Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Fri, 3 Jun 2022 14:46:14 +0530 Subject: [PATCH 11/16] update to use cpp11 --- compyle/cimport.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compyle/cimport.py b/compyle/cimport.py index d435a62..16b21f3 100644 --- a/compyle/cimport.py +++ b/compyle/cimport.py @@ -29,6 +29,7 @@ def __init__(self, src, hash_fn, root=None, verbose=False, openmp=False, self.extra_inc_dir = extra_inc_dir self.extra_link_args = extra_link_args self.extra_compile_args = extra_compile_args + self._use_cpp11() self._setup_root(root) self._setup_filenames() @@ -119,6 +120,9 @@ def _include_openmp(self): self.extra_compile_args += ec self.extra_link_args += el + def _use_cpp11(self): + self.extra_compile_args += ['-std=c++11'] + def _message(self, *args): msg = ' '.join(args) if self.verbose: From c710666892b1416c45062387680a3d0abc8b35b2 Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Tue, 14 Jun 2022 13:33:34 +0530 Subject: [PATCH 12/16] added ccompile class --- compyle/c_backend.py | 76 ++++++++++++++++++++++++++++----- compyle/tests/test_c_backend.py | 18 +++++++- 2 files changed, 81 insertions(+), 13 deletions(-) diff --git a/compyle/c_backend.py b/compyle/c_backend.py index 0f27922..bdabff6 100644 --- a/compyle/c_backend.py +++ b/compyle/c_backend.py @@ -1,6 +1,27 @@ +from compyle.profile import profile from .translator import ocl_detect_type, KnownType from .cython_generator import CythonGenerator, get_func_definition from .cython_generator import getsourcelines +from mako.template import Template +from .ext_module import get_md5 +from .cimport import Cmodule +from .transpiler import Transpiler +from . import array + +import pybind11 +import numpy as np + + +elwise_c_pybind = ''' + +PYBIND11_MODULE(${modname}, m) { + + m.def("${modname}", [](${pyb11_args}){ + return ${name}(${pyb11_call}); + }); +} + +''' class CBackend(CythonGenerator): @@ -41,17 +62,50 @@ def ctype_to_pyb11(self, c_type): def _get_self_type(self): return KnownType('GLOBAL_MEM %s*' % self._class_name) - -elwise_c_pybind = ''' - -PYBIND11_MODULE(${modname}, m) { - - m.def("${modname}", [](${pyb11_args}){ - return ${name}(${pyb11_call}); - }); -} - -''' +class CCompile(CBackend): + def __init__(self, func): + super(CCompile, self).__init__() + self.func = func + self.src = "not yet generated" + self.tp = Transpiler(backend='c') + self.c_func = self._compile() + + def _compile(self): + self.tp.add(self.func) + self.src = self.tp.get_code() + + py_data, c_data = self.get_func_signature_pyb11(self.func) + + pyb11_args = ', '.join(py_data[0][:]) + pyb11_call = ', '.join(py_data[1][:]) + hash_fn = get_md5(self.src) + modname = f'm_{hash_fn}' + template = Template(elwise_c_pybind) + src_bind = template.render( + name=self.func.__name__, + modname=modname, + pyb11_args=pyb11_args, + pyb11_call=pyb11_call + ) + self.src += src_bind + + mod = Cmodule(self.src, hash_fn, openmp=False, + extra_inc_dir=[pybind11.get_include()]) + module = mod.load() + return getattr(module, modname) + + def _massage_arg(self, x): + if isinstance(x, array.Array): + return x.dev + elif isinstance(x, np.ndarray): + return x + else: + return np.asarray(x) + + @profile + def __call__(self, *args, **kwargs): + c_args = [self._massage_arg(x) for x in args] + self.c_func(*c_args) elwise_c_template = ''' diff --git a/compyle/tests/test_c_backend.py b/compyle/tests/test_c_backend.py index 06f68fc..8b1c82d 100644 --- a/compyle/tests/test_c_backend.py +++ b/compyle/tests/test_c_backend.py @@ -1,6 +1,6 @@ import unittest from unittest import TestCase -from ..c_backend import CBackend +from ..c_backend import CBackend, CCompile from ..types import annotate import numpy as np @@ -24,6 +24,20 @@ def test_fn(x, y, z=2, w=3.0): self.assertListEqual(c_args, exp_c_args) self.assertListEqual(c_call, exp_c_call) - +class TestCCompile(TestCase): + def test_compile(self): + @annotate(int='n, p', intp='x, y') + def get_pow(n, p, x, y): + for i in range(n): + y[i] = x[i]**p + c_get_pow = CCompile(get_pow) + n = 5 + p = 5 + x = np.ones(n, dtype=np.int32) * 2 + y = np.zeros(n, dtype=np.int32) + y_exp = np.ones(n, dtype=np.int32) * 32 + c_get_pow(n, p, x, y) + assert(np.all(y == y_exp)) + if __name__ == '__main__': unittest.main() From 643ae5b2efe87311859cda1bfc6f317c4ae1c624 Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Tue, 8 Mar 2022 15:32:34 +0530 Subject: [PATCH 13/16] added c backend --- compyle/parallel.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/compyle/parallel.py b/compyle/parallel.py index e66a9ef..c075c53 100644 --- a/compyle/parallel.py +++ b/compyle/parallel.py @@ -8,7 +8,11 @@ from compyle import c_backend from functools import wraps +from inspect import getmodule +import operator +from re import TEMPLATE from textwrap import wrap +import json from mako.template import Template import numpy as np @@ -24,6 +28,33 @@ from . import array +pyb11_bind_elwise = ''' +PYBIND11_MODULE(${name}, m) { + + m.def("${name}", [](${pyb11_args}){ + return elwise_${name}(${pyb11_call}); + }); +} +''' + +pyb11_setup_header = ''' +<% +cfg['compiler_args'] = ['-std=c++11', '-fopenmp'] +cfg['linker_args'] = ['-fopenmp'] +setup_pybind11(cfg) +%> +\n +''' +elementwise_pyb11_template = ''' +void ${name}(${arguments}){ + %if openmp: + #pragma omp parallel for + %endif + for(size_t i = 0; i < SIZE; i++){ + ${operations}; + } +} +''' elementwise_cy_template = ''' from cython.parallel import parallel, prange @@ -605,7 +636,6 @@ def __call__(self, *args, **kw): elif self.backend == 'c': self.c_func(*c_args) - class Elementwise(object): def __init__(self, func, backend=None): self._func = func From 7ca1cf804f8785b84ffec48ad8f180c5d3eb58e3 Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Wed, 9 Mar 2022 13:54:20 +0530 Subject: [PATCH 14/16] working c backend --- compyle/parallel.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/compyle/parallel.py b/compyle/parallel.py index c075c53..1303674 100644 --- a/compyle/parallel.py +++ b/compyle/parallel.py @@ -37,14 +37,6 @@ } ''' -pyb11_setup_header = ''' -<% -cfg['compiler_args'] = ['-std=c++11', '-fopenmp'] -cfg['linker_args'] = ['-fopenmp'] -setup_pybind11(cfg) -%> -\n -''' elementwise_pyb11_template = ''' void ${name}(${arguments}){ %if openmp: @@ -636,6 +628,7 @@ def __call__(self, *args, **kw): elif self.backend == 'c': self.c_func(*c_args) + class Elementwise(object): def __init__(self, func, backend=None): self._func = func From 6cec932105a0fbee3c1e352a438020eb7d5cc1d3 Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Thu, 17 Mar 2022 16:36:42 +0530 Subject: [PATCH 15/16] working c backend with tests --- compyle/parallel.py | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/compyle/parallel.py b/compyle/parallel.py index 1303674..c3eba33 100644 --- a/compyle/parallel.py +++ b/compyle/parallel.py @@ -16,6 +16,7 @@ from mako.template import Template import numpy as np +import py import pybind11 from .cimport import Cmodule @@ -28,25 +29,6 @@ from . import array -pyb11_bind_elwise = ''' -PYBIND11_MODULE(${name}, m) { - - m.def("${name}", [](${pyb11_args}){ - return elwise_${name}(${pyb11_call}); - }); -} -''' - -elementwise_pyb11_template = ''' -void ${name}(${arguments}){ - %if openmp: - #pragma omp parallel for - %endif - for(size_t i = 0; i < SIZE; i++){ - ${operations}; - } -} -''' elementwise_cy_template = ''' from cython.parallel import parallel, prange From 6a50f02e27fb266852a67e755efa611fdf4ca0ae Mon Sep 17 00:00:00 2001 From: Rohit Tembhare Date: Fri, 18 Mar 2022 19:54:54 +0530 Subject: [PATCH 16/16] remove unused imports --- compyle/parallel.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/compyle/parallel.py b/compyle/parallel.py index c3eba33..e66a9ef 100644 --- a/compyle/parallel.py +++ b/compyle/parallel.py @@ -8,15 +8,10 @@ from compyle import c_backend from functools import wraps -from inspect import getmodule -import operator -from re import TEMPLATE from textwrap import wrap -import json from mako.template import Template import numpy as np -import py import pybind11 from .cimport import Cmodule