fastmachinelearning · jmduarte · Mar 8, 2021 · Mar 8, 2021 · Mar 8, 2021 · Mar 9, 2021
diff --git a/example-models b/example-models
diff --git a/hls4ml/__init__.py b/hls4ml/__init__.py
@@ -1,6 +1,6 @@
 from __future__ import absolute_import
 
-__version__ = '0.5.1'
+__version__ = '0.6.0'
 
 from hls4ml import converters
 from hls4ml import report

diff --git a/hls4ml/converters/keras/core.py b/hls4ml/converters/keras/core.py
@@ -104,7 +104,7 @@ def parse_activation_layer(keras_layer, input_names, input_shapes, data_reader,
 
 @keras_handler('BatchNormalization')
 def parse_batchnorm_layer(keras_layer, input_names, input_shapes, data_reader, config):
-    assert('BatchNormalization' in keras_layer['class_name'] or 'QConv2DBatchnorm' in keras_layer['class_name'])
+    assert('BatchNormalization' in keras_layer['class_name'] or 'QConv2DBatchnorm' in keras_layer['class_name'] or 'QDenseBatchnorm' in keras_layer['class_name'])
 
     layer = parse_default_keras_layer(keras_layer, input_names)
 

diff --git a/hls4ml/converters/keras/qkeras_layers.py b/hls4ml/converters/keras/qkeras_layers.py
@@ -110,3 +110,12 @@ def parse_qconv2dbatchnorm_layer(keras_layer, input_names, input_shapes, data_re
     temp_shape = intermediate_shape
     batch_layer, out_shape = parse_batchnorm_layer(keras_layer, input_names, temp_shape, data_reader, config)
     return {**conv_layer, **batch_layer}, out_shape
+
+@keras_handler('QDenseBatchnorm')
+def parse_qdensebatchnorm_layer(keras_layer, input_names, input_shapes, data_reader, config):
+    intermediate_shape = list()
+    dense_layer, shape_qdense = parse_qdense_layer(keras_layer, input_names, input_shapes, data_reader, config)
+    intermediate_shape.append(shape_qdense)
+    temp_shape = intermediate_shape
+    batch_layer, out_shape = parse_batchnorm_layer(keras_layer, input_names, temp_shape, data_reader, config)
+    return {**dense_layer, **batch_layer}, out_shape
diff --git a/hls4ml/model/hls_layers.py b/hls4ml/model/hls_layers.py
@@ -199,6 +199,7 @@ def __init__(self, shape, dim_names, proxy, **kwargs):
         self.shape = shape
         self.dim_names = dim_names
         self.type = proxy.type
+        self.cppname = proxy.name
         self.name = proxy.name
         self.size = proxy.size
 
@@ -365,6 +366,7 @@ def __init__(self, model, name, attributes, inputs, outputs=None):
         self.set_attr('accum_t', accum_t.precision)
         self.reuse_factor = self.model.config.get_reuse_factor(self)
         self.target_cycles = self.model.config.get_target_cycles(self)
+        self.merged_relu = False
 
         layer_config = self.model.config.get_layer_config(self)
         for config_key, config_value in layer_config.items():
@@ -410,6 +412,10 @@ def get_output_variable(self, output_name=None):
         else:
             return next(iter(self.variables.values()))
 
+    def set_output_variable(self, output_name, output_value):
+        self.variables[output_name] = output_value
+
+
     def get_weights(self, var_name=None):
         if var_name:
             return self.weights[var_name]
@@ -450,6 +456,8 @@ def make_array_variable(self, shape, dim_names, var_name='layer{index}_out', typ
 
     def make_stream_variable(self, shape, dim_names, var_name='layer{index}_out', type_name='layer{index}_t', precision=None, depth=0):
         pack_factor = self.model.config.get_layer_config_value(self, 'PackFactor', default=1)
+        if depth == 0:
+            depth = self.model.config.get_layer_config_value(self, 'StreamDepth', default=0)
 
         return StreamVariable(shape, dim_names, var_name=var_name, type_name=type_name, precision=precision, n_pack=pack_factor, depth=depth, index=self.index)
 
@@ -541,6 +549,12 @@ def _default_config_params(self):
     def get_layer_precision(self):
         return self.precision
 
+    def get_merged_relu(self):
+        return self.merged_relu
+
+    def set_merged_relu(self, merged_relu):
+        self.merged_relu = merged_relu # Bool flag to set merged_relu
+
     # myproject.cpp/h
     def function_cpp(self):
         raise NotImplementedError
@@ -589,7 +603,6 @@ def initialize(self):
         out_name = self.outputs[0]
         proxy = self.get_input_variable()
         out = InplaceVariable(shape, dims, proxy, index=self.get_input_node().index)
-
         self.variables[out_name] = out
         self.model.register_output_variable(out_name, out)
 
@@ -646,9 +659,61 @@ def config_cpp(self):
         params['nonzeros'] = self.get_weights('weight').nonzeros
         params['product_type'] = self.model.config.backend.product_type(self.get_input_variable().type.precision, self.get_weights('weight').type.precision)
         params['strategy'] = self.get_attr('strategy')
-
+        params['merged_relu'] = "true" if self.get_merged_relu() else "false"
+        params['out_t'] = self.get_output_variable().type.name
         return self._config_template.format(**params)
 
+class DenseBatchnorm(Dense):
+    def _get_folded_weights(self):
+        """
+        Function to get the batchnorm folded weights.
+        This function converts the weights by folding batchnorm parameters into
+        the weight of QDense. The high-level equation:
+        W_fold = gamma * W / sqrt(variance + epsilon)
+        bias_fold = gamma * (bias - moving_mean) / sqrt(variance + epsilon) + beta
+        """
+        kernel = self.model.get_weights_data(self.name, 'kernel')
+        bias = self.model.get_weights_data(self.name, 'bias')
+        if bias is None:
+            bias = 0
+
+        # get batchnorm weights and moving stats
+        gamma = self.model.get_weights_data(self.name, 'gamma')
+        beta = self.model.get_weights_data(self.name, 'beta')
+        moving_mean = self.model.get_weights_data(self.name, 'moving_mean')
+        moving_variance = self.model.get_weights_data(self.name, 'moving_variance')
+        # get the inversion factor so that we replace division by multiplication
+        inv = np.reciprocal(np.sqrt(moving_variance + self.get_attr('epsilon')))
+        if gamma is not None:
+            inv *= gamma
+
+        # wrap conv kernel and bias with bn parameters
+        folded_kernel = inv * kernel
+        folded_bias = inv * (bias - moving_mean) + beta
+
+        return [folded_kernel, folded_bias]
+
+    def initialize(self):
+        super(DenseBatchnorm, self).initialize()
+        folded_weights, folded_bias = self._get_folded_weights()
+        if self.model.config.is_resource_strategy(self) and self.model.config.backend.name in ['Vivado', 'VivadoAccelerator']:
+            self.weights['weight'].data_unquantized = np.transpose(folded_weights)
+            self.weights['weight'].data = self.get_attr('weight_quantizer')(self.weights['weight'].data_unquantized)
+
+        else:
+            self.weights['weight'].data_unquantized = folded_weights
+            self.weights['weight'].data = self.get_attr('weight_quantizer')(folded_weights)
+        self.weights['bias'].data_unquantized = folded_bias
+        bias_q = self.get_attr('bias_quantizer')
+        if bias_q is not None:
+            self.weights['bias'].data = bias_q(folded_bias)
+
+    def function_cpp(self):
+        return super(DenseBatchnorm, self).function_cpp()
+
+    def config_cpp(self):
+        return super(DenseBatchnorm, self).config_cpp()
+
 class Conv1D(Layer):
     def initialize(self):
         if self.get_attr('data_format') == 'channels_last':
@@ -854,7 +919,9 @@ def initialize(self):
         else:
             shape = [self.attributes['n_filt'], self.attributes['out_height'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'OUT_HEIGHT_{}'.format(self.index), 'OUT_WIDTH_{}'.format(self.index)]
+        self.attributes['intermediate_index'] = self.index
         self.add_output_variable(shape, dims)
+        self.intermediate_op = self.get_output_variable()
         self.add_weights(quantizer=self.get_attr('weight_quantizer'))
         self.add_bias(quantizer=self.get_attr('bias_quantizer'))
         if len(self.weights['weight'].data.shape) == 2: # This can happen if we assign weights of Dense layer to 1x1 Conv2D
@@ -921,6 +988,8 @@ def config_cpp(self):
         mult_params['n_in'] = self.get_attr('n_chan') * self.get_attr('filt_height') * self.get_attr('filt_width')
         mult_params['n_out'] = self.get_attr('n_filt')
         mult_params['product_type'] = self.model.config.backend.product_type(self.get_input_variable().type.precision, self.get_weights('weight').type.precision)
+        mult_params['merged_relu'] = "true" if self.get_merged_relu() else "false"
+        mult_params['out_t'] = self.intermediate_op.type.name
         mult_config = self._config_template[1].format(**mult_params)
 
         return mult_config + '\n' + conv_config
@@ -1865,6 +1934,7 @@ def _get_transforms_config(self, params):
     'BinaryDense'            : Dense,
     'TernaryDense'           : Dense,
     'QDense'                 : Dense,
+    'QDenseBatchnorm'        : DenseBatchnorm,
     'Conv1D'                 : Conv1D,
     'QConv1D'                : Conv1D,
     'Conv2D'                 : Conv2D,

diff --git a/hls4ml/model/hls_model.py b/hls4ml/model/hls_model.py
@@ -59,6 +59,18 @@ def get_project_name(self):
     def get_output_dir(self):
         return self.get_config_value('OutputDir')
 
+    def get_merged_relu(self, default=None):
+        hls_config = self.config['HLSConfig']
+
+        model_config = hls_config.get('Model', None)
+        key = 'MergedRelu'
+
+        if model_config is not None:
+            tempbool = model_config.get(key, default)
+            return tempbool
+
+        return default
+
     def get_layer_config_value(self, layer, key, default=None):
         hls_config = self.config['HLSConfig']
 

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
@@ -12,6 +12,7 @@
 from hls4ml.model.optimizer.passes.conv_same_pad import InsertZeroPaddingBeforeConv2D
 from hls4ml.model.optimizer.passes.pointwise import OptimizePointwiseConv
 from hls4ml.model.optimizer.passes.clone import CloneOutput
+from hls4ml.model.optimizer.passes.relu_merge import MergeRelu
 from hls4ml.model.optimizer.passes.repack_stream import ReshapeStream, BroadcastStream, RemoveFinalReshape
 from hls4ml.model.optimizer.passes.transpose_opt import RemoveUselessTranspose
 from hls4ml.model.optimizer.passes.multi_dense import ReplaceMultidimensionalDenseWithConv
@@ -40,6 +41,7 @@
 register_pass('conv2d_same_pad', InsertZeroPaddingBeforeConv2D)
 register_pass('optimize_pointwise_conv', OptimizePointwiseConv)
 register_pass('clone_output', CloneOutput)
+register_pass('relu_merge', MergeRelu)
 register_pass('remove_final_reshape', RemoveFinalReshape)
 register_pass('reshape_stream', ReshapeStream)
 register_pass('remove_useless_transpose', RemoveUselessTranspose)

diff --git a/hls4ml/model/optimizer/passes/relu_merge.py b/hls4ml/model/optimizer/passes/relu_merge.py
@@ -0,0 +1,48 @@
+from hls4ml.model.optimizer import OptimizerPass
+
+class MergeRelu(OptimizerPass):
+    def match(self, node):
+        supported_layers = ['Conv2D', 'Conv2DBatchnorm', 'Dense']
+        is_match = node.get_input_node().__class__.__name__ in supported_layers
+
+        # hls4ml names ReLU activations 'Activation'
+        is_match = is_match and (node.__class__.__name__ == 'Activation') 
+        return is_match
+
+    def transform(self, model, node):
+        # Merge ReLU and Convolution/Dense layer
+        previous_node = node.get_input_node()
+        previous_node.index = node.index
+        previous_node.set_merged_relu(True) # Turn on merged_relu flag for this Conv/Dense layer
+        if 'Conv2D' in previous_node.__class__.__name__:
+            if previous_node.get_attr('data_format') == 'channels_last':
+                shape = [previous_node.attributes['out_height'], previous_node.attributes['out_width'], previous_node.attributes['n_filt']]
+                dims = ['OUT_HEIGHT_{}'.format(previous_node.index), 'OUT_WIDTH_{}'.format(previous_node.index), 'N_FILT_{}'.format(previous_node.index)]
+            else:
+                shape = [previous_node.attributes['n_filt'], previous_node.attributes['out_height'], previous_node.attributes['out_width']]
+                dims = ['N_FILT_{}'.format(previous_node.index), 'OUT_HEIGHT_{}'.format(previous_node.index), 'OUT_WIDTH_{}'.format(previous_node.index)]
+            activation_precision, _ = model.config.get_precision(node, var='result')
+            previous_node.add_output_variable(shape, dims, precision=activation_precision)
+            if not node.get_output_nodes():
+                print("WARNING: {} is the output layer! No rewiring performed.".format(node.name))
+                model.remove_node(node, rewire=False)
+            else:
+                model.remove_node(node, rewire=True)
+            return True 
+        elif 'Dense' in previous_node.__class__.__name__:
+            shape = previous_node.get_input_variable().shape[:]
+            shape[-1] = previous_node.attributes['n_out']
+            if len(shape) > 1:
+                dims = ['N_LAYER_{}_{}'.format(i, previous_node.index) for i in range(1, len(shape) + 1)]
+            else:
+                dims = ['N_LAYER_{}'.format(previous_node.index)]
+            print('shape: {}'.format(shape))
+            print('dims: {}'.format(dims))
+            activation_precision, _ = model.config.get_precision(node, var='result')
+            previous_node.add_output_variable(shape, dims, precision=activation_precision)
+            if not node.get_output_nodes():
+                print("WARNING: {} is the output layer! No rewiring performed.".format(node.name))
+                model.remove_node(node, rewire=False)
+            else:
+                model.remove_node(node, rewire=True)
+            return True
diff --git a/hls4ml/model/profiling.py b/hls4ml/model/profiling.py
@@ -1,3 +1,6 @@
+from pyDigitalWaveTools.vcd.parser import VcdParser
+
+import hls4ml
 from hls4ml.model.hls_model import HLSModel
 from hls4ml.model.hls_layers import IntegerPrecisionType, FixedPrecisionType
 import matplotlib.pyplot as plt
@@ -26,6 +29,101 @@
     __torch_profiling_enabled__ = False
 
 
+def optimize_fifos_depth(hls_model, init_large_fifo=True, reset=True, csim=True, synth=True,
+                         cosim=True, validation=True, export=True, vsynth=True, **kwargs,):
+
+    cfg = hls_model.config.config.copy()
+    hls_config = cfg['HLSConfig']
+    out_dir = hls_model.config.get_output_dir()
+
+    values = []
+
+    def populate_values(name, data, depth):
+        values.append({'name': name, 'data': [], 'max': 0, 'depth': 0})
+        get_values = lambda x: int(x[1][1:], 2)
+        values[-1]['data'] = [get_values(x) for x in data]
+        values[-1]['max'] = max(values[-1]['data'])
+        values[-1]['depth'] = int(depth[1:], 2)
+
+    if not hls_config['Model']['FIFO_opt']:
+        raise Exception('To use this optimization you have to set `FIFO_opt` field to True in the HLS config')
+
+
+    # initialize all the fifos to 10000 so that they will be automatically implemented in BRAMs and so they will be
+    # profiled
+
+    if init_large_fifo:
+
+        for k,_ in hls_model.output_vars.items():
+            if k not in hls_config['LayerName']:
+                hls_config['LayerName'][k] = {'StreamDepth': 10000}
+            else:
+                hls_config['LayerName'][k]['StreamDepth'] = 10000
+
+        if hls_model.config.get_config_value('Backend') == 'VivadoAccelerator':
+            hls_config['LayerName']['in_local'] = {'StreamDepth' : 10000}
+            hls_config['LayerName']['out_local'] = {'StreamDepth': 10000}
+
+        cfg['OutputDir'] = out_dir + "_LARGE_FIFO"
+        cfg['HLSConfig'] = hls_config
+        hls_model = hls4ml.converters.keras_to_hls(cfg)
+
+
+    # run the build with FIFO_opt param set to 1 in order to generate the vcd file
+    hls_model.write()
+    hls_model.build(csim=True, cosim=True, synth=True, vsynth=False, export=False, validation=True)
+
+    with open(hls_model.config.get_output_dir() + '/' + hls_model.config.get_project_name() + '_prj' + '/solution1/sim/verilog/fifo_opt.vcd') as vcd_file:
+        vcd = VcdParser()
+        vcd.parse(vcd_file)
+        data = vcd.scope.toJson()
+
+    # wrapper fifos - useful only with VivadoAccelerator backend
+    if hls_model.config.get_config_value('Backend') == 'VivadoAccelerator':
+        for i in range(1, len(data['children'][0]['children'][0]['children'])):
+            populate_values(data['children'][0]['children'][0]['children'][i]['name'],
+                            data['children'][0]['children'][0]['children'][i]['children'][0]['data'],
+                            data['children'][0]['children'][0]['children'][i]['children'][1]['data'][0][1])
+
+    # model layers fifos
+    n_elem = len(data['children'][0]['children'][0]['children'][0]['children'])
+    for i in range(n_elem):
+        populate_values(data['children'][0]['children'][0]['children'][0]['children'][i]['name'],
+                        data['children'][0]['children'][0]['children'][0]['children'][i]['children'][0]['data'],
+                        data['children'][0]['children'][0]['children'][0]['children'][i]['children'][1]['data'][0][1])
+
+    maxs = [{'name': i['name'], 'max': i['max'], 'depth': i['depth']} for i in values]
+
+    with open(hls_model.config.get_output_dir() + '/max_depth.json', 'w') as f:
+        json.dump(maxs, f, indent=4)
+
+    new_config = cfg.copy()['HLSConfig']
+    new_config['Model']['FIFO_opt'] = 0
+    for k, v in hls_model.output_vars.items():
+        filtered_max = [x['max'] for x in maxs if v.cppname in x['name']]
+        if len(filtered_max) == 0:
+            continue
+        if len(filtered_max) > 1:
+            print('WARNING! Check names of FIFOs')
+        if k not in new_config['LayerName']:
+            new_config['LayerName'][k] = {'StreamDepth': filtered_max[0] + 1}
+        else:
+            new_config['LayerName'][k]['StreamDepth'] = filtered_max[0] + 1
+    for x in maxs:
+        if 'in_local' in x['name']:
+            new_config['LayerName']['in_local'] = {'StreamDepth': x['max'] + 1}
+        elif 'out_local' in x['name']:
+            new_config['LayerName']['out_local'] = {'StreamDepth': x['max'] + 1}
+
+    cfg['OutputDir'] = out_dir + '_FIFO_OPT'
+    cfg['HLSConfig'] = new_config
+    hls_model = hls4ml.converters.keras_to_hls(cfg)
+    hls_model.write()
+    hls_model.build(reset=reset, csim=csim, synth=synth, cosim=cosim, validation=validation, export=export, vsynth=vsynth)
+    print('[hls4ml] - FIFO optimization completed')
+    return hls_model
+
+
 def get_unoptimized_hlsmodel(model):
     from hls4ml.converters import convert_from_config