diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py index bd85937d89..e9f3056108 100644 --- a/hls4ml/backends/fpga/fpga_backend.py +++ b/hls4ml/backends/fpga/fpga_backend.py @@ -24,6 +24,7 @@ GarNetStack, GlobalPooling1D, GlobalPooling2D, + LayerNormalization, MatMul, Merge, Pooling1D, @@ -71,6 +72,7 @@ def __init__(self, name): Dot, Conv, MatMul, + LayerNormalization, ] for layer in accum_layers: diff --git a/hls4ml/backends/vivado/passes/core_templates.py b/hls4ml/backends/vivado/passes/core_templates.py index 1393cdfb49..7f0ec23bd6 100644 --- a/hls4ml/backends/vivado/passes/core_templates.py +++ b/hls4ml/backends/vivado/passes/core_templates.py @@ -1,6 +1,15 @@ from hls4ml.backends.backend import get_backend from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate -from hls4ml.model.layers import Activation, BatchNormalization, Dense, HardActivation, ParametrizedActivation, PReLU, Softmax +from hls4ml.model.layers import ( + Activation, + BatchNormalization, + Dense, + HardActivation, + LayerNormalization, + ParametrizedActivation, + PReLU, + Softmax, +) from hls4ml.model.optimizer.passes.hgq_proxy_model import UnaryLUT # Dense templates @@ -121,6 +130,58 @@ def format(self, node): return self.template.format(**params) +# LayerNormalization templates + +layernorm_config_template = """struct config{index} : nnet::layernorm_config {{ + static const unsigned n_in = {n_in}; + static const unsigned seq_len = {seq_len}; + static const unsigned axis = {axis}; + static const unsigned epsilon_power_of_10 = {epsilon_power_of_10}; + static const unsigned table_range_power2 = {table_range_power2}; + static const unsigned table_size = {table_size}; + typedef {accum_t.name} accum_t; + typedef {bias_t.name} bias_t; + typedef {scale_t.name} scale_t; + typedef {table_t.name} table_t; + static const unsigned io_type = nnet::{iotype}; + static const unsigned reuse_factor = {reuse}; + template + using product = nnet::product::{product_type}; +}};\n""" + +layernorm_function_template = 'nnet::layernormalize<{input_t}, {output_t}, {config}>({input}, {output}, {scale}, {bias});' + +layernorm_include_list = ['nnet_utils/nnet_layernorm.h'] + + +class LayerNormalizationConfigTemplate(LayerConfigTemplate): + def __init__(self): + super().__init__(LayerNormalization) + self.template = layernorm_config_template + + def format(self, node): + params = self._default_config_params(node) + params['n_in'] = node.get_input_variable().size_cpp() + params['product_type'] = get_backend('vivado').product_type( + node.get_input_variable().type.precision, node.get_weights('scale').type.precision + ) + + return self.template.format(**params) + + +class LayerNormalizationFunctionTemplate(FunctionCallTemplate): + def __init__(self): + super().__init__(LayerNormalization, include_header=layernorm_include_list) + self.template = layernorm_function_template + + def format(self, node): + params = self._default_function_params(node) + params['scale'] = node.get_weights('scale').name + params['bias'] = node.get_weights('bias').name + + return self.template.format(**params) + + # Activation templates activ_config_template = """struct {type}_config{index} : nnet::activ_config {{ diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py index 0a18d4503d..81deaebe5f 100644 --- a/hls4ml/backends/vivado/vivado_backend.py +++ b/hls4ml/backends/vivado/vivado_backend.py @@ -21,6 +21,7 @@ GarNet, GarNetStack, Layer, + LayerNormalization, Pooling1D, Pooling2D, SeparableConv1D, @@ -30,7 +31,7 @@ TimeDistributed, ) from hls4ml.model.optimizer import get_backend_passes, layer_optimizer -from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType, PackedType +from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType, PackedType, RoundingMode, SaturationMode from hls4ml.report import parse_vivado_report from hls4ml.utils import attribute_descriptions as descriptions @@ -84,6 +85,32 @@ def _register_layer_attributes(self): ) self.attribute_map[layer] = attrs + # Add LayerNorm attributes + ln_layers = [LayerNormalization] + for layer in ln_layers: + attrs = self.attribute_map.get(layer, []) + attrs.append(ConfigurableAttribute('table_range_power2', default=0, description=descriptions.table_range_power2)) + attrs.append(ConfigurableAttribute('table_size', default=4096, description=descriptions.table_size)) + attrs.append( + TypeAttribute( + 'table', + default=FixedPrecisionType( + 8, 5, signed=False, rounding_mode=RoundingMode.RND_CONV, saturation_mode=SaturationMode.SAT + ), + description=descriptions.table_type, + ) + ) + attrs.append( + TypeAttribute( + 'accum', + default=FixedPrecisionType( + 14, 4, signed=True, rounding_mode=RoundingMode.RND_CONV, saturation_mode=SaturationMode.SAT + ), + description=descriptions.accum_type, + ) + ) + self.attribute_map[layer] = attrs + # Add TimeStepLoopParallelism to TimeDistributed attrs = self.attribute_map.get(TimeDistributed, []) attrs.append( diff --git a/hls4ml/converters/keras/core.py b/hls4ml/converters/keras/core.py index 637bb6d401..ceea920195 100644 --- a/hls4ml/converters/keras/core.py +++ b/hls4ml/converters/keras/core.py @@ -1,3 +1,5 @@ +import math + from hls4ml.converters.keras_to_hls import get_weights_data, keras_handler, parse_default_keras_layer from hls4ml.model.quantizers import BinaryQuantizer, TernaryQuantizer from hls4ml.model.types import IntegerPrecisionType @@ -129,6 +131,40 @@ def parse_batchnorm_layer(keras_layer, input_names, input_shapes, data_reader): return layer, [shape for shape in input_shapes[0]] +@keras_handler('LayerNormalization') +def parse_layernorm_layer(keras_layer, input_names, input_shapes, data_reader): + assert 'LayerNormalization' in keras_layer['class_name'] + + layer = parse_default_keras_layer(keras_layer, input_names) + + in_size = 1 + for dim in input_shapes[0][1:]: + in_size *= dim + layer['n_in'] = layer['n_out'] = in_size + + if not ((len(input_shapes[0])) == 3): + raise Exception( + 'input size is not currently supported by hls4ml; ' + 'only three-dimensional input (including batch dimension) is supported' + ) + layer['seq_len'] = input_shapes[0][-2] + + if not (keras_layer['config']['axis'][0] == 2): + raise Exception('assigning the axis is not currently supported by hls4ml; only axis 2 is supported') + layer['axis'] = keras_layer['config']['axis'][0] + + layer['gamma_data'] = get_weights_data(data_reader, layer['name'], 'gamma') + layer['beta_data'] = get_weights_data(data_reader, layer['name'], 'beta') + + if keras_layer['config']['epsilon'] <= 0: + raise Exception('epsilon must be positive') + layer['epsilon_power_of_10'] = -round(math.log10(keras_layer['config']['epsilon'])) + if layer['epsilon_power_of_10'] <= 0: + raise Exception('epsilon must be less than 1e-1') + + return layer, [shape for shape in input_shapes[0]] + + @keras_handler('Embedding') def parse_embedding_layer(keras_layer, input_names, input_shapes, data_reader): assert 'Embedding' in keras_layer['class_name'] diff --git a/hls4ml/converters/pytorch/core.py b/hls4ml/converters/pytorch/core.py index 57c42f401f..c42604db08 100644 --- a/hls4ml/converters/pytorch/core.py +++ b/hls4ml/converters/pytorch/core.py @@ -1,3 +1,5 @@ +import math + import numpy as np from hls4ml.converters.pytorch_to_hls import pytorch_handler @@ -157,3 +159,39 @@ def parse_batchnorm_layer(operation, layer_name, input_names, input_shapes, node layer['n_filt'] = input_shapes[0][1] # Always channel first for Pytorch return layer, [shape for shape in input_shapes[0]] + + +@pytorch_handler('LayerNorm') +def parse_layernorm_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config): + assert 'LayerNorm' in operation + + layer = {} + + layer['class_name'] = 'LayerNormalization' + layer['name'] = layer_name + layer['inputs'] = input_names + + in_size = 1 + for dim in input_shapes[0][1:]: + in_size *= dim + layer['n_in'] = layer['n_out'] = in_size + + if not ((len(input_shapes[0])) == 3): + raise Exception( + 'input size is not currently supported by hls4ml; ' + 'only three-dimensional input (including batch dimension) is supported' + ) + layer['seq_len'] = input_shapes[0][-2] + + layer['axis'] = 2 + + layer['gamma_data'] = class_object.weight.data.numpy() + layer['beta_data'] = class_object.bias.data.numpy() + + if class_object.eps <= 0: + raise Exception('epsilon must be positive') + layer['epsilon_power_of_10'] = -round(math.log10(class_object.eps)) + if layer['epsilon_power_of_10'] <= 0: + raise Exception('epsilon must be less than 1e-1') + + return layer, [shape for shape in input_shapes[0]] diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index 0efeaafa3d..ff33c70ed4 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -1058,6 +1058,31 @@ def add_bias(self, bias, quantizer=None, precision=None): self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer, precision=precision) +class LayerNormalization(Layer): + _expected_attributes = [ + Attribute('n_in'), + Attribute('seq_len'), + Attribute('axis', value_type=int, default=2), + Attribute('epsilon_power_of_10', value_type=int, default=3), + WeightAttribute('scale'), + WeightAttribute('bias'), + TypeAttribute('scale'), + TypeAttribute('bias'), + ] + + def initialize(self): + inp = self.get_input_variable() + shape = inp.shape + dims = inp.dim_names + self.add_output_variable(shape, dims) + + scale = self.get_attr('gamma_data') + bias = self.get_attr('beta_data') + + self.add_weights_variable(name='scale', var_name='s{index}', data=scale) + self.add_weights_variable(name='bias', var_name='b{index}', data=bias) + + class Merge(Layer): def initialize(self): assert len(self.inputs) == 2 @@ -1710,6 +1735,7 @@ def initialize(self): 'BatchNormOnnx': BatchNormOnnx, 'LayerGroup': LayerGroup, 'SymbolicExpression': SymbolicExpression, + 'LayerNormalization': LayerNormalization, # TensorFlow-specific layers: 'BiasAdd': BiasAdd, } diff --git a/hls4ml/model/optimizer/passes/convert_to_channels_last.py b/hls4ml/model/optimizer/passes/convert_to_channels_last.py index 6511a6967b..5668da3e4b 100644 --- a/hls4ml/model/optimizer/passes/convert_to_channels_last.py +++ b/hls4ml/model/optimizer/passes/convert_to_channels_last.py @@ -2,7 +2,7 @@ # Based on https://github.com/fastmachinelearning/qonnx/blob/ # 12c96a3ded06beacab08e0f554e4ed014476c0aa/src/qonnx/transformation/channels_last.py -from hls4ml.model.layers import Concatenate, Dense, Input, Reshape, Transpose +from hls4ml.model.layers import Concatenate, Dense, Input, LayerNormalization, Reshape, Transpose from hls4ml.model.optimizer import OptimizerPass from hls4ml.model.types import WeightVariable @@ -44,6 +44,25 @@ def transform(self, model, node): node.get_output_variable().shape = input_shape dim_names = [f'N_INPUT_{i}_{node.index}' for i in range(1, len(input_shape) + 1)] node.get_output_variable().dim_names = dim_names + elif ( + isinstance(node, LayerNormalization) + and not model.config.config['HLSConfig']['Model']['ChannelsLastConversion'] == "off" + ): + # LayerNorm only works on the last dimension in PyTorch + perm = [1, 0] + pre_transpose = model.make_node( + 'Transpose', f'pre_transpose_for_{node.get_attr("name")}', {'perm': perm}, [node.get_input_node().name] + ) + pre_transpose.channels_last_converted = True + model.insert_node(pre_transpose) + + # If not the output layer, transpose again + if not node.get_attr('name') in model.outputs or model.config.config['HLSConfig']['Model']['TransposeOutputs']: + post_transpose = model.make_node( + 'Transpose', f'post_transpose_for_{node.get_attr("name")}', {'perm': perm}, [node.name] + ) + post_transpose.channels_last_converted = True + model.insert_node(post_transpose) else: # Transpose weight tensors tensors = ['weight', 'depthwise', 'pointwise', 'zero_bias', 'scale', 'recurrent_weight'] diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py index 919bc0c3c2..caa45991ad 100644 --- a/hls4ml/model/optimizer/passes/infer_precision.py +++ b/hls4ml/model/optimizer/passes/infer_precision.py @@ -51,7 +51,7 @@ def _infer_precision(self, node, types_to_infer): if node_class in ['Dense']: return self._infer_dense_precision(node, types_to_infer) - if node_class in ['BatchNormalization', 'ApplyAlpha']: + if node_class in ['BatchNormalization', 'ApplyAlpha', 'LayerNormalization']: return self._infer_bn_precision(node, types_to_infer) if node_class in ['Conv1D', 'Conv2D', 'PointwiseConv1D', 'PointwiseConv2D', 'Conv2DBatchnorm']: diff --git a/hls4ml/model/profiling.py b/hls4ml/model/profiling.py index acc4ccfa44..443c33d64c 100644 --- a/hls4ml/model/profiling.py +++ b/hls4ml/model/profiling.py @@ -285,6 +285,18 @@ def _keras_layer(layer): return layer.get_weights(), ['w', 'b'] +def _keras_layernorm(layer): + weights = layer.get_weights() + + gamma = weights[0] + beta = weights[1] + + scale = gamma + bias = beta + + return [scale, bias], ['s', 'b'] + + def _keras_lstm(layer): return layer.get_weights(), ['w', 'u', 'b'] @@ -294,6 +306,7 @@ def _keras_lstm(layer): { 'BatchNormalization': _keras_batchnorm, 'QBatchNormalization': _keras_batchnorm, + 'LayerNormalization': _keras_layernorm, 'LSTM': _keras_lstm, 'QLSTM': _keras_lstm, }, diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_layernorm.h b/hls4ml/templates/vivado/nnet_utils/nnet_layernorm.h new file mode 100644 index 0000000000..743ac09fc3 --- /dev/null +++ b/hls4ml/templates/vivado/nnet_utils/nnet_layernorm.h @@ -0,0 +1,139 @@ +#ifndef NNET_LAYERNORM_H_ +#define NNET_LAYERNORM_H_ + +#include "hls_stream.h" +#include "nnet_common.h" +#include "nnet_dense.h" +#include + +#include "hls_math.h" + +namespace nnet { + +struct layernorm_config { + // Internal data type definitions + typedef float bias_t; + typedef float scale_t; + typedef float accum_t; + typedef float table_t; + + // Layer Sizes + static const unsigned n_in = 20; + static const unsigned seq_len = 4; + static const unsigned axis = 2; + static const unsigned epsilon_power_of_10 = 3; + static const unsigned table_range_power2 = 0; + static const unsigned table_size = 1024; + + // Resource reuse info + static const unsigned io_type = io_parallel; + static const unsigned reuse_factor = 1; + + template using product = nnet::product::mult; +}; + +template void init_invert_sqr_table(typename CONFIG_T::table_t table_out[N_TABLE]) { + // Inversion function: + // result = 1/sqrt(x) + float min_val = pow(10.0f, -(int)CONFIG_T::epsilon_power_of_10); + float max_val = pow(2.0f, -(int)CONFIG_T::table_range_power2); + float step = max_val / (float)(N_TABLE); + for (int ii = 0; ii < N_TABLE; ii++) { + float in_val = min_val + step * ii; + table_out[ii] = (typename CONFIG_T::table_t)(1.0 / sqrt(in_val)); + } +} + +template +void layernorm_1d(data_T data[CONFIG_T::n_in / CONFIG_T::seq_len], res_T res[CONFIG_T::n_in / CONFIG_T::seq_len], + typename CONFIG_T::scale_t scale[CONFIG_T::n_in / CONFIG_T::seq_len], + typename CONFIG_T::bias_t bias[CONFIG_T::n_in / CONFIG_T::seq_len]) { + #pragma HLS PIPELINE II=CONFIG_T::reuse_factor + #pragma HLS ARRAY_PARTITION variable=data complete + #pragma HLS ARRAY_PARTITION variable=res complete + int inv_range_inv = (int)1 << CONFIG_T::table_range_power2; + typename CONFIG_T::table_t deno_inver = 0; +#ifdef __HLS_SYN__ + bool initialized = false; + typename CONFIG_T::table_t invert_sqr_table[CONFIG_T::table_size]; +#else + static bool initialized = false; + static typename CONFIG_T::table_t invert_sqr_table[CONFIG_T::table_size]; +#endif + if (!initialized) { + init_invert_sqr_table(invert_sqr_table); + initialized = true; + } + + static const unsigned dim = CONFIG_T::n_in / CONFIG_T::seq_len; + typename CONFIG_T::accum_t sum_cache = 0; + typename CONFIG_T::accum_t sum_cache2 = 0; + typename CONFIG_T::accum_t var, mean, diff; + typename CONFIG_T::accum_t data_diff[dim]; + + #pragma HLS ARRAY_PARTITION variable=data_diff complete + + const typename CONFIG_T::accum_t k_inv = 1.0 / dim; + +LAYERNORM_1D_SUM: + for (int i = 0; i < dim; ++i) { + sum_cache += static_cast(data[i]); + } + mean = CONFIG_T::template product::product(sum_cache, k_inv); + +LAYERNORM_1D_VAR: + for (int i = 0; i < dim; ++i) { + data_diff[i] = static_cast(data[i]) - mean; + diff = data_diff[i] * data_diff[i]; + sum_cache2 += diff; + } + var = CONFIG_T::template product::product(sum_cache2, k_inv); + + int index = (var) * (CONFIG_T::table_size)*inv_range_inv; + if (index < 0) + index = 0; + if (index > CONFIG_T::table_size - 1) + index = CONFIG_T::table_size - 1; + deno_inver = invert_sqr_table[index]; + +LAYERNORM_1D_RESULT: + for (int i = 0; i < dim; ++i) { + res[i] = data_diff[i] * deno_inver * scale[i] + bias[i]; + } +} + +template +void layernormalize(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in], + typename CONFIG_T::scale_t scale[CONFIG_T::n_in / CONFIG_T::seq_len], + typename CONFIG_T::bias_t bias[CONFIG_T::n_in / CONFIG_T::seq_len]) { + static const unsigned dim = CONFIG_T::n_in / CONFIG_T::seq_len; + data_T in_val[dim]; + res_T outval[dim]; + // Use a function_instantiate in case it helps to explicitly optimize unchanging weights/biases + #pragma HLS function_instantiate variable=scale,bias + + #pragma HLS ARRAY_PARTITION variable=scale complete + #pragma HLS ARRAY_PARTITION variable=bias complete + #pragma HLS ARRAY_PARTITION variable=in_val complete + #pragma HLS ARRAY_PARTITION variable=outval complete + +LAYERNORM_SEQ_LOOP: + for (int j = 0; j < CONFIG_T::seq_len; ++j) { + #pragma HLS PIPELINE + LAYERNORM_LOAD: + for (int i = 0; i < dim; ++i) { + #pragma HLS UNROLL + in_val[i] = data[j * dim + i]; + } + layernorm_1d(in_val, outval, scale, bias); + LAYERNORM_STORE: + for (int i = 0; i < dim; ++i) { + #pragma HLS UNROLL + res[j * dim + i] = outval[i]; + } + } +} + +} // namespace nnet + +#endif diff --git a/hls4ml/utils/attribute_descriptions.py b/hls4ml/utils/attribute_descriptions.py index 05653f8fdf..94f5788de7 100644 --- a/hls4ml/utils/attribute_descriptions.py +++ b/hls4ml/utils/attribute_descriptions.py @@ -50,6 +50,13 @@ 'usage at the expense of serialized computation and higher latency/II.' ) +# LayerNorm-related attributes + +table_range_power2 = ( + 'The negative power of 2 that represents the range of the lookup table, ' + 'e.g. a value of 1 would represent a range of 0.5.' +) + time_distributed_loop = ( 'Controls the amont and type of parallelism in the loop over time steps. If set to "off", no parallelism will be used. ' 'If set to "unroll", the loop will be unrolled. This may result in excessive resource use and cannot be used in ' diff --git a/test/pytest/test_layernorm.py b/test/pytest/test_layernorm.py new file mode 100644 index 0000000000..082982f24a --- /dev/null +++ b/test/pytest/test_layernorm.py @@ -0,0 +1,80 @@ +from pathlib import Path + +import numpy as np +import pytest +from tensorflow.keras.layers import LayerNormalization +from tensorflow.keras.models import Sequential + +import hls4ml + +test_root_path = Path(__file__).parent + +in_shape = (10, 8) +atol = 5e-2 + + +@pytest.fixture(scope='module') +def data(): + np.random.seed(0) + return np.random.rand(100, *in_shape) + + +@pytest.fixture(scope='module') +def model(): + model = Sequential() + layer = LayerNormalization(input_shape=in_shape) + model.add(layer) + model.compile() + + np.random.seed(0) + weights = np.random.normal(1.0, 0.1, size=(in_shape[-1],)) + biases = np.random.normal(0.0, 0.1, size=(in_shape[-1],)) + layer.set_weights([weights, biases]) + return model + + +@pytest.fixture(scope='module') +def custom_epsilon_model(): + model = Sequential() + layer = LayerNormalization(input_shape=in_shape, epsilon=1e-2) + model.add(layer) + model.compile() + return model + + +@pytest.mark.parametrize('backend', ['Vivado', 'Vitis']) +def test_layernorm_parsing(custom_epsilon_model, backend): + custom_config = hls4ml.utils.config_from_keras_model(custom_epsilon_model, granularity='name', backend=backend) + custom_config['LayerName']['layer_normalization']['Precision']['accum'] = 'ap_fixed<10,4>' + custom_config['LayerName']['layer_normalization']['table_t'] = 'ap_fixed<12,5>' + custom_config['LayerName']['layer_normalization']['TableSize'] = 2048 + custom_config['LayerName']['layer_normalization']['TableRangePower2'] = 1 + output_dir = str(test_root_path / f'hls4mlprj_layernorm_config_{backend}_io_parallel') + hls_model = hls4ml.converters.convert_from_keras_model( + custom_epsilon_model, backend=backend, hls_config=custom_config, io_type='io_parallel', output_dir=output_dir + ) + hls_model.compile() + + # Check that custom configuration is picked up correctly + hls_layer = list(hls_model.get_layers())[1] # 0 is input, 1 is LayerNorm + assert hls_layer.attributes['accum_t'].precision.definition_cpp() == 'ap_fixed<10,4>' + assert hls_layer.attributes['table_t'].precision.definition_cpp() == 'ap_fixed<12,5>' + assert hls_layer.attributes['table_size'] == 2048 + assert hls_layer.attributes['table_range_power2'] == 1 + assert hls_layer.attributes['epsilon_power_of_10'] == 2 + + +# Currently only Vivado/Vitis in io_parallel mode is supported +@pytest.mark.parametrize('backend', ['Vivado', 'Vitis']) +def test_layernorm_accuracy(model, data, backend): + config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend=backend) + output_dir = str(test_root_path / f'hls4mlprj_layernorm_{backend}_io_parallel') + hls_model = hls4ml.converters.convert_from_keras_model( + model, backend=backend, hls_config=config, io_type='io_parallel', output_dir=output_dir + ) + hls_model.compile() + + # Predict + y_keras = model.predict(data).flatten() + y_hls = hls_model.predict(data).flatten() + np.testing.assert_allclose(y_keras, y_hls, rtol=0, atol=atol, verbose=True) diff --git a/test/pytest/test_layernorm_pytorch.py b/test/pytest/test_layernorm_pytorch.py new file mode 100644 index 0000000000..ff41f2369a --- /dev/null +++ b/test/pytest/test_layernorm_pytorch.py @@ -0,0 +1,79 @@ +from collections import OrderedDict +from pathlib import Path + +import numpy as np +import pytest +import torch +from torch import nn + +import hls4ml + +test_root_path = Path(__file__).parent + +in_shape = (10, 8) +atol = 5e-2 + + +@pytest.fixture(scope='module') +def data(): + np.random.seed(0) + return np.random.rand(100, *in_shape) + + +@pytest.fixture(scope='module') +def model(): + model = nn.Sequential(OrderedDict([('layer_normalization', nn.LayerNorm(in_shape[-1]))])) + model.eval() + + with torch.no_grad(): + torch.manual_seed(0) + nn.init.normal_(model[0].weight, mean=1.0, std=0.1) + nn.init.normal_(model[0].bias, mean=0.0, std=0.1) + return model + + +@pytest.fixture(scope='module') +def custom_epsilon_model(): + model = nn.Sequential(OrderedDict([('layer_normalization', nn.LayerNorm(in_shape[-1], eps=1e-4))])) + model.eval() + return model + + +@pytest.mark.parametrize('backend', ['Vivado', 'Vitis']) +def test_layernorm_parsing(custom_epsilon_model, backend): + custom_config = hls4ml.utils.config_from_pytorch_model( + custom_epsilon_model, in_shape, granularity='name', backend=backend, channels_last_conversion='off' + ) + custom_config['LayerName']['layer_normalization']['Precision']['accum'] = 'ap_fixed<10,4>' + custom_config['LayerName']['layer_normalization']['table_t'] = 'ap_fixed<12,5>' + custom_config['LayerName']['layer_normalization']['TableSize'] = 2048 + custom_config['LayerName']['layer_normalization']['TableRangePower2'] = 1 + output_dir = str(test_root_path / f'hls4mlprj_layernorm_pytorch_config_{backend}_io_parallel') + hls_model = hls4ml.converters.convert_from_pytorch_model( + custom_epsilon_model, backend=backend, hls_config=custom_config, io_type='io_parallel', output_dir=output_dir + ) + hls_model.compile() + + # Check that custom configuration is picked up correctly + hls_layer = list(hls_model.get_layers())[1] # 0 is input, 1 is LayerNorm + assert hls_layer.attributes['accum_t'].precision.definition_cpp() == 'ap_fixed<10,4>' + assert hls_layer.attributes['table_t'].precision.definition_cpp() == 'ap_fixed<12,5>' + assert hls_layer.attributes['table_size'] == 2048 + assert hls_layer.attributes['table_range_power2'] == 1 + assert hls_layer.attributes['epsilon_power_of_10'] == 4 + + +# Currently only Vivado/Vitis in io_parallel mode is supported +@pytest.mark.parametrize('backend', ['Vivado', 'Vitis']) +def test_layernorm(model, data, backend): + config = hls4ml.utils.config_from_pytorch_model(model, in_shape, granularity='name', backend=backend) + output_dir = str(test_root_path / f'hls4mlprj_layernorm_pytorch_{backend}_io_parallel') + hls_model = hls4ml.converters.convert_from_pytorch_model( + model, backend=backend, hls_config=config, io_type='io_parallel', output_dir=output_dir + ) + hls_model.compile() + + # Predict + y_pytorch = model(torch.Tensor(data)).detach().numpy().flatten() + y_hls = hls_model.predict(data).flatten() + np.testing.assert_allclose(y_pytorch, y_hls, rtol=0, atol=atol, verbose=True)