11
11
12
12
from mako .template import Template
13
13
import numpy as np
14
- import time
15
14
16
15
from .config import get_config
17
16
from .profile import profile
@@ -412,6 +411,10 @@ def __init__(self, func, backend=None):
412
411
self ._config = get_config ()
413
412
self .cython_gen = CythonGenerator ()
414
413
self .queue = None
414
+ # This is the source generated for the user code.
415
+ self .source = '# Source not yet generated.'
416
+ # This is all the source code used for the elementwise.
417
+ self .all_source = '# Source not yet generated.'
415
418
self .c_func = self ._generate ()
416
419
417
420
def _generate (self , declarations = None ):
@@ -433,8 +436,12 @@ def _generate(self, declarations=None):
433
436
self .func , 'is_serial' , False ),
434
437
get_parallel_range = get_parallel_range
435
438
)
439
+ # This is the user code source.
440
+ self .source = self .tp .get_code ()
436
441
self .tp .add_code (src )
437
442
self .tp .compile ()
443
+ # All the source code for the elementwise
444
+ self .all_source = self .tp .source
438
445
return getattr (self .tp .mod , 'py_' + self .name [7 :])
439
446
elif self .backend == 'opencl' :
440
447
py_data , c_data = self .cython_gen .get_func_signature (self .func )
@@ -462,6 +469,10 @@ def _generate(self, declarations=None):
462
469
operation = expr ,
463
470
preamble = "\n " .join ([cluda_preamble , preamble ])
464
471
)
472
+ # only code we generate is saved here.
473
+ self .source = "\n " .join ([cluda_preamble , preamble ])
474
+ all_source = knl .get_kernel (False )[0 ].program .source
475
+ self .all_source = all_source or self .source
465
476
return knl
466
477
elif self .backend == 'cuda' :
467
478
py_data , c_data = self .cython_gen .get_func_signature (self .func )
@@ -487,6 +498,10 @@ def _generate(self, declarations=None):
487
498
operation = expr ,
488
499
preamble = "\n " .join ([cluda_preamble , preamble ])
489
500
)
501
+ # only code we generate is saved here.
502
+ self .source = cluda_preamble + preamble
503
+ # FIXME: it is difficult to get the sources from pycuda.
504
+ self .all_source = self .source
490
505
return knl
491
506
492
507
def _correct_opencl_address_space (self , c_data ):
@@ -551,6 +566,9 @@ def __init__(self, func, backend=None):
551
566
def __getattr__ (self , name ):
552
567
return getattr (self .elementwise , name )
553
568
569
+ def __dir__ (self ):
570
+ return sorted (dir (self .elementwise ) + ['elementwise' ])
571
+
554
572
def __call__ (self , * args , ** kwargs ):
555
573
self .elementwise (* args , ** kwargs )
556
574
@@ -588,6 +606,10 @@ def __init__(self, reduce_expr, map_func=None, dtype_out=np.float64,
588
606
self ._config = get_config ()
589
607
self .cython_gen = CythonGenerator ()
590
608
self .queue = None
609
+ # This is the source generated for the user code.
610
+ self .source = '# Source not yet generated.'
611
+ # This is all the source code used.
612
+ self .all_source = '# Source not yet generated.'
591
613
self .c_func = self ._generate ()
592
614
593
615
def _generate (self , declarations = None ):
@@ -621,8 +643,11 @@ def _generate(self, declarations=None):
621
643
openmp = self ._config .use_openmp ,
622
644
get_parallel_range = get_parallel_range
623
645
)
646
+ # This is the user code source.
647
+ self .source = self .tp .get_code ()
624
648
self .tp .add_code (src )
625
649
self .tp .compile ()
650
+ self .all_source = self .tp .source
626
651
return getattr (self .tp .mod , 'py_' + self .name )
627
652
elif self .backend == 'opencl' :
628
653
if self .func is not None :
@@ -661,6 +686,17 @@ def _generate(self, declarations=None):
661
686
arguments = arguments ,
662
687
preamble = "\n " .join ([cluda_preamble , preamble ])
663
688
)
689
+ # only code we generate is saved here.
690
+ self .source = "\n " .join ([cluda_preamble , preamble ])
691
+ if knl .stage_1_inf .source :
692
+ self .all_source = "\n " .join ([
693
+ "// ------ stage 1 -----" ,
694
+ knl .stage_1_inf .source ,
695
+ "// ------ stage 2 -----" ,
696
+ knl .stage_2_inf .source ,
697
+ ])
698
+ else :
699
+ self .all_source = self .source
664
700
return knl
665
701
elif self .backend == 'cuda' :
666
702
if self .func is not None :
@@ -697,6 +733,10 @@ def _generate(self, declarations=None):
697
733
arguments = arguments ,
698
734
preamble = "\n " .join ([cluda_preamble , preamble ])
699
735
)
736
+ # only code we generate is saved here.
737
+ self .source = cluda_preamble + preamble
738
+ # FIXME: it is difficult to get the sources from pycuda.
739
+ self .all_source = self .source
700
740
return knl
701
741
702
742
def _correct_return_type (self , c_data ):
@@ -780,6 +820,9 @@ def __init__(self, reduce_expr, map_func=None, dtype_out=np.float64,
780
820
neutral = neutral ,
781
821
backend = backend )
782
822
823
+ def __dir__ (self ):
824
+ return sorted (dir (self .reduction ) + ['reduction' ])
825
+
783
826
def __getattr__ (self , name ):
784
827
return getattr (self .reduction , name )
785
828
@@ -812,6 +855,10 @@ def __init__(self, input=None, output=None, scan_expr="a+b",
812
855
else :
813
856
self .neutral = neutral
814
857
self ._config = get_config ()
858
+ # This is the source generated for the user code.
859
+ self .source = '# Source not yet generated.'
860
+ # This is all the source code used for the elementwise.
861
+ self .all_source = '# Source not yet generated.'
815
862
self .cython_gen = CythonGenerator ()
816
863
self .queue = None
817
864
self .c_func = self ._generate ()
@@ -895,7 +942,6 @@ def _append_cython_arg_data(self, all_py_data, all_c_data, py_data,
895
942
all_c_data [1 ].extend (self ._filter_ignored (c_data [1 ], select ))
896
943
897
944
def _generate_cython_code (self , declarations = None ):
898
- name = self .name
899
945
all_py_data = [[], []]
900
946
all_c_data = [[], []]
901
947
@@ -911,7 +957,9 @@ def _generate_cython_code(self, declarations=None):
911
957
# Process segment function
912
958
use_segment = True if self .is_segment_func is not None else False
913
959
py_data , c_data , segment_expr = self ._wrap_cython_code (
914
- self .is_segment_func , func_type = 'segment' , declarations = declarations )
960
+ self .is_segment_func , func_type = 'segment' ,
961
+ declarations = declarations
962
+ )
915
963
self ._append_cython_arg_data (all_py_data , all_c_data , py_data , c_data )
916
964
917
965
# Process output expression
@@ -963,8 +1011,10 @@ def _generate_cython_code(self, declarations=None):
963
1011
is_segment_start_expr = segment_expr ,
964
1012
complex_map = self .complex_map
965
1013
)
1014
+ self .source = self .tp .get_code ()
966
1015
self .tp .add_code (src )
967
1016
self .tp .compile ()
1017
+ self .all_source = self .tp .source
968
1018
return getattr (self .tp .mod , 'py_' + self .name )
969
1019
970
1020
def _wrap_ocl_function (self , func , func_type = None , declarations = None ):
@@ -1053,6 +1103,18 @@ def _generate_opencl_kernel(self, declarations=None):
1053
1103
is_segment_start_expr = segment_expr ,
1054
1104
preamble = preamble
1055
1105
)
1106
+ self .source = preamble
1107
+ if knl .first_level_scan_info .kernel .program .source :
1108
+ self .all_source = '\n ' .join ([
1109
+ '// ----- Level 1 ------' ,
1110
+ knl .first_level_scan_info .kernel .program .source ,
1111
+ '// ----- Level 2 ------' ,
1112
+ knl .second_level_scan_info .kernel .program .source ,
1113
+ '// ----- Final output ------' ,
1114
+ knl .final_update_info .kernel .program .source ,
1115
+ ])
1116
+ else :
1117
+ self .all_source = self .source
1056
1118
return knl
1057
1119
1058
1120
def _generate_cuda_kernel (self , declarations = None ):
@@ -1073,6 +1135,9 @@ def _generate_cuda_kernel(self, declarations=None):
1073
1135
is_segment_start_expr = segment_expr ,
1074
1136
preamble = preamble
1075
1137
)
1138
+ self .source = preamble
1139
+ # FIXME: Difficult to get the pycuda sources
1140
+ self .all_source = self .source
1076
1141
return knl
1077
1142
1078
1143
def _add_address_space (self , arg ):
@@ -1113,11 +1178,13 @@ def _massage_arg(self, x):
1113
1178
def __call__ (self , ** kwargs ):
1114
1179
c_args_dict = {k : self ._massage_arg (x ) for k , x in kwargs .items ()}
1115
1180
if self ._get_backend_key () in self .output_func .arg_keys :
1116
- output_arg_keys = self .output_func .arg_keys [self ._get_backend_key ()]
1181
+ output_arg_keys = self .output_func .arg_keys [
1182
+ self ._get_backend_key ()
1183
+ ]
1117
1184
else :
1118
1185
raise ValueError ("No kernel arguments found for backend = %s, "
1119
- "use_openmp = %s, use_double = %s" %
1120
- self ._get_backend_key ())
1186
+ "use_openmp = %s, use_double = %s" %
1187
+ self ._get_backend_key ())
1121
1188
1122
1189
if self .backend == 'cython' :
1123
1190
size = len (c_args_dict [output_arg_keys [1 ]])
@@ -1165,6 +1232,9 @@ def __init__(self, input=None, output=None, scan_expr="a+b",
1165
1232
complex_map = complex_map ,
1166
1233
backend = backend )
1167
1234
1235
+ def __dir__ (self ):
1236
+ return sorted (dir (self .scan ) + ['scan' ])
1237
+
1168
1238
def __getattr__ (self , name ):
1169
1239
return getattr (self .scan , name )
1170
1240
0 commit comments