diff --git a/.gitignore b/.gitignore index 8e68a1d37..2b72fd2b6 100644 --- a/.gitignore +++ b/.gitignore @@ -386,12 +386,7 @@ poetry.toml pyrightconfig.json ### VisualStudioCode ### -.vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json -!.vscode/*.code-snippets +.vscode # Local History for Visual Studio Code .history/ diff --git a/awscrt/cbor.py b/awscrt/cbor.py new file mode 100644 index 000000000..559c16b6d --- /dev/null +++ b/awscrt/cbor.py @@ -0,0 +1,447 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0. + +import _awscrt + +from awscrt import NativeResource +from enum import IntEnum +from typing import Callable, Any, Union + + +class AwsCborType(IntEnum): + # Corresponding to `enum aws_cbor_type` in aws/common/cbor.h + Unknown = 0 + UnsignedInt = 1 + NegativeInt = 2 + Float = 3 + Bytes = 4 + Text = 5 + ArrayStart = 6 + MapStart = 7 + Tag = 8 + Bool = 9 + Null = 10 + Undefined = 11 + Break = 12 + IndefBytes = 13 + IndefStr = 14 + IndefArray = 15 + IndefMap = 16 + + +class AwsCborEncoder(NativeResource): + """ Encoder for CBOR + This class is used to encode data into CBOR format. + Typical usage of encoder: + - create an instance of AwsCborEncoder + - call write_* methods to write data into the encoder + - call get_encoded_data() to get the encoded data + - call reset() to clear the encoder for next use + """ + + def __init__(self): + super().__init__() + self._binding = _awscrt.cbor_encoder_new() + + def get_encoded_data(self) -> bytes: + """Return the current encoded data as bytes + + Returns: + bytes: The encoded data currently + """ + return _awscrt.cbor_encoder_get_encoded_data(self._binding) + + def reset(self): + """Clear the current encoded data to empty bytes + """ + return _awscrt.cbor_encoder_reset(self._binding) + + def write_int(self, val: int): + """Write an int as cbor formatted, + val less than -2^64 will be encoded as Negative bignum for CBOR + val between -2^64 to -1, inclusive, will be encode as negative integer for CBOR + val between 0 to 2^64 - 1, inclusive, will be encoded as unsigned integer for CBOR + val greater than 2^64 - 1 will be encoded as Unsigned bignum for CBOR (Not implemented yet) + + Args: + val (int): value to be encoded and written to the encoded data. + """ + val_to_encode = val + if val < 0: + # For negative value, the value to encode is -1 - val. + val_to_encode = -1 - val + if val >= 0: + return _awscrt.cbor_encoder_write_uint(self._binding, val_to_encode) + else: + return _awscrt.cbor_encoder_write_negint(self._binding, val_to_encode) + + def write_float(self, val: float): + """Write a double as cbor formatted + If the val can be convert the int without loss of precision, + it will be converted to int to be written to as cbor formatted. + + Args: + val (float): value to be encoded and written to the encoded data. + """ + # Floating point numbers are usually implemented using double in C + return _awscrt.cbor_encoder_write_float(self._binding, val) + + def write_bytes(self, val: bytes): + """Write bytes as cbor formatted + + Args: + val (bytes): value to be encoded and written to the encoded data. + """ + return _awscrt.cbor_encoder_write_bytes(self._binding, val) + + def write_text(self, val: str): + """Write text as cbor formatted + + Args: + val (str): value to be encoded and written to the encoded data. + """ + return _awscrt.cbor_encoder_write_text(self._binding, val) + + def write_array_start(self, number_entries: int): + """Add a start of array element. + for a number of the cbor data items to be included in the array. + `number_entries` should 0 to 2^64 inclusive. + Otherwise, overflow will be raised. + + Args: + number_entries (int): number of entries in the array to be written + """ + if number_entries < 0 or number_entries > 2**64: + raise OverflowError() + + return _awscrt.cbor_encoder_write_array_start(self._binding, number_entries) + + def write_map_start(self, number_entries: int): + """Add a start of map element, with the `number_entries` + for the number of pair of cbor data items to be included in the map. + `number_entries` should 0 to 2^64 inclusive. + Otherwise, overflow will be raised. + + Args: + number_entries (int): number of entries in the map to be written + """ + if number_entries < 0 or number_entries > 2**64: + raise ValueError() + + return _awscrt.cbor_encoder_write_map_start(self._binding, number_entries) + + def write_tag(self, tag_number: int): + """Add a tag number. + Notes: it's user's responsibility to keep the integrity of the tagged value to follow the RFC8949 section 3.4 + + Args: + tag_number (int): the tag number, refer to RFC8949 section 3.4 for the valid tag number. + """ + if tag_number < 0 or tag_number > 2**64: + raise ValueError() + + return _awscrt.cbor_encoder_write_tag(self._binding, tag_number) + + def write_null(self): + """Add a simple value 22 as null. Refer to RFC8949 section 3.3 + """ + return _awscrt.cbor_encoder_write_simple_types(self._binding, AwsCborType.Null) + + def write_undefined(self): + """Add a simple value 23 as undefined. Refer to RFC8949 section 3.3 + """ + return _awscrt.cbor_encoder_write_simple_types(self._binding, AwsCborType.Undefined) + + def write_indef_array_start(self): + """Begin an indefinite-length array. Must be closed with write_break(). + Refer to RFC8949 section 3.2.2 + """ + return _awscrt.cbor_encoder_write_simple_types(self._binding, AwsCborType.IndefArray) + + def write_indef_map_start(self): + """Begin an indefinite-length map. Must be closed with write_break(). + Refer to RFC8949 section 3.2.2 + """ + return _awscrt.cbor_encoder_write_simple_types(self._binding, AwsCborType.IndefMap) + + def write_indef_bytes_start(self): + """Begin an indefinite-length byte string. Must be followed by definite-length + byte strings and closed with write_break(). + Refer to RFC8949 section 3.2.2 + """ + return _awscrt.cbor_encoder_write_simple_types(self._binding, AwsCborType.IndefBytes) + + def write_indef_text_start(self): + """Begin an indefinite-length text string. Must be followed by definite-length + text strings and closed with write_break(). + Refer to RFC8949 section 3.2.2 + """ + return _awscrt.cbor_encoder_write_simple_types(self._binding, AwsCborType.IndefStr) + + def write_break(self): + """Write a break code (0xFF) to close an indefinite-length item. + Refer to RFC8949 section 3.2.2 + """ + return _awscrt.cbor_encoder_write_simple_types(self._binding, AwsCborType.Break) + + def write_bool(self, val: bool): + """Add a simple value 20/21 as false/true. Refer to RFC8949 section 3.3 + """ + return _awscrt.cbor_encoder_write_bool(self._binding, val) + + def write_epoch_time(self, val: float): + """Helper to write second-based epoch time as cbor formatted + Args: + val (float): second-based epoch time, float value to represent the precision less than 1 second. + """ + # The epoch time is a tag 1, which is defined in RFC8949 section 3.4 + _awscrt.cbor_encoder_write_tag(self._binding, 1) + # write the epoch time as float, which will be encoded as small as possible without loss of precision. + return _awscrt.cbor_encoder_write_float(self._binding, val) + + def write_list(self, val: list): + """Generic helper API to write the whole list as cbor formatted. + The content of the list will be encoded as data_item. + """ + return _awscrt.cbor_encoder_write_py_list(self._binding, val) + + def write_dict(self, val: dict): + """Generic helper API to write the whole dict as cbor formatted. + The content of the dict will be encoded as data_item. + """ + return _awscrt.cbor_encoder_write_py_dict(self._binding, val) + + def write_data_item(self, data_item: Any): + """Generic API to write any type of an data_item as cbor formatted. + Specifically, it will be based on the type of data_item to decide how to encode it. + The supported type of data_item are: + - int + - float + - bool + - bytes + - str + - list + - dict + + Args: + data_item (Any): any type of data_item. If the type is not supported to be converted to cbor format, ValueError will be raised. + """ + return _awscrt.cbor_encoder_write_data_item(self._binding, data_item) + + +class AwsCborDecoder(NativeResource): + """ Decoder for CBOR + This class is used to decode a bytes of CBOR encoded data to python objects. + Typical usage of decoder: + - create an instance of AwsCborDecoder with bytes of cbor formatted data to be decoded + - call peek_next_type() to get the type of the next data item + - call pop_next_*() to based on the type of the next data item to decode it + - Until expected data decoded, call get_remaining_bytes_len() to check if there is any remaining bytes left. + - call reset_src() to reset the src data to be decoded, if needed. + """ + + def __init__(self, src: bytes, on_epoch_time: Callable[[Union[int, float]], Any] = None, **kwargs): + """Create an instance of AwsCborDecoder with the src data to be decoded. + The src data should be a bytes of cbor formatted data. + + Args: + src (bytes): the bytes of cbor formatted data to be decoded. + on_epoch_time (Callable[[int, Any], Any], optional): Optional callback invoked once tags + with id: 1, which is the epoch time, are encountered during decoding a data_item. + + The function should take the following arguments: + * `epoch_secs` (int | float): the seconds since epoch. + + The function should return + * `result` (Any): The PyObject the epoch time converted to. + """ + super().__init__() + self._src = src + self._binding = _awscrt.cbor_decoder_new(self, src) + self._on_epoch_time = on_epoch_time + + def _on_epoch_time_callback(self, epoch_secs: Union[int, float]) -> Any: + if self._on_epoch_time is not None: + return self._on_epoch_time(epoch_secs) + else: + # just default to the numeric type. + return epoch_secs + + def peek_next_type(self) -> AwsCborType: + """Return the AwsCborType of the next data item in the cbor formatted data + """ + return AwsCborType(_awscrt.cbor_decoder_peek_type(self._binding)) + + def get_remaining_bytes_len(self) -> int: + """Return the number of bytes not consumed yet of the src data. + """ + return _awscrt.cbor_decoder_get_remaining_bytes_len(self._binding) + + def get_remaining_bytes(self) -> bytes: + """Return the remaining bytes not consumed yet of the src data. + """ + remaining_length = _awscrt.cbor_decoder_get_remaining_bytes_len(self._binding) + return self._src[-remaining_length:] if remaining_length > 0 else b'' + + def reset_src(self, src: bytes): + """Reset the src data to be decoded. + Note: the previous src data will be discarded. + Use `get_remaining_bytes` to fetch the remaining bytes if needed before invoking this function. + """ + self._src = src + _awscrt.cbor_decoder_reset_src(self._binding, src) + + def consume_next_single_element(self): + """ + Consume the next single element, without the content followed by the element. + + As an example for the following cbor, this function will only consume the + 0xBF, "Start indefinite-length map", not any content of the map represented. + The next element to decode will start from 0x63: + + 0xbf6346756ef563416d7421ff + BF -- Start indefinite-length map + 63 -- First key, UTF-8 string length 3 + 46756e -- "Fun" + F5 -- First value, true + 63 -- Second key, UTF-8 string length 3 + 416d74 -- "Amt" + 21 -- Second value, -2 + FF -- "break" + """ + return _awscrt.cbor_decoder_consume_next_element(self._binding) + + def consume_next_whole_data_item(self): + """ + Consume the next data item, includes all the content within the data item. + Specifically, it read extra for the types listed below: + 1. `AwsCborType.IndefArray`, `AwsCborType.IndefMap`, `AwsCborType.IndefBytes` and `AwsCborType.IndefStr`. It read until + the `AwsCborType.Break` is read. + 2. `AwsCborType.ArrayStart` and `AwsCborType.MapStart`. It read the number of data items in the array/map. + 3. `AwsCborType.Tag`. It read the one extra data item as the value of the tag. + + As an example for the following cbor, this function will consume all the data + as it's only one cbor data item, an indefinite map with 2 pair: + + 0xbf6346756ef563416d7421ff + BF -- Start indefinite-length map + 63 -- First key, UTF-8 string length 3 + 46756e -- "Fun" + F5 -- First value, true + 63 -- Second key, UTF-8 string length 3 + 416d74 -- "Amt" + 21 -- Second value, -2 + FF -- "break" + """ + return _awscrt.cbor_decoder_consume_next_data_item(self._binding) + + def pop_next_unsigned_int(self) -> int: + """Return and consume the next data item as unsigned int if it's a `AwsCborType.UnsignedInt` + Otherwise, it will raise ValueError. + """ + return _awscrt.cbor_decoder_pop_next_unsigned_int(self._binding) + + def pop_next_negative_int(self) -> int: + """Return and consume the next data item as negative int if it's a `AwsCborType.NegativeInt` + Otherwise, it will raise ValueError. + """ + val = _awscrt.cbor_decoder_pop_next_negative_int(self._binding) + return -1 - val + + def pop_next_double(self) -> float: + """Return and consume the next data item as float if it's a `AwsCborType.Float` + Otherwise, it will raise ValueError. + """ + return _awscrt.cbor_decoder_pop_next_float(self._binding) + + def pop_next_bool(self) -> bool: + """Return and consume the next data item as bool if it's a `AwsCborType.Bool` + Otherwise, it will raise ValueError. + """ + return _awscrt.cbor_decoder_pop_next_boolean(self._binding) + + def pop_next_bytes(self) -> bytes: + """Return and consume the next data item as bytes if it's a `AwsCborType.Bytes` + Otherwise, it will raise ValueError. + """ + return _awscrt.cbor_decoder_pop_next_bytes(self._binding) + + def pop_next_text(self) -> str: + """Return and consume the next data item as text if it's a `AwsCborType.Text` + Otherwise, it will raise ValueError. + """ + return _awscrt.cbor_decoder_pop_next_text(self._binding) + + def pop_next_array_start(self) -> int: + """Return and consume the next data item as int if it's a `AwsCborType.ArrayStart` + Otherwise, it will raise ValueError. + The return value is the number of date items followed as the content of the array. + + Notes: For indefinite-length, this function will fail with unexpected type. The designed way to + handle indefinite-length array is: + - Get `AwsCborType.IndefArray` from `peek_next_type` + - call `consume_next_single_element` to pop the indefinite-length start. + - Decode the next data item until `AwsCborType.Break` read. + """ + return _awscrt.cbor_decoder_pop_next_array_start(self._binding) + + def pop_next_map_start(self) -> int: + """Return and consume the next data item as int if it's a `AwsCborType.MapStart` + Otherwise, it will raise ValueError. + The return value is the number of paired date items followed as the content of the map. + + Notes: For indefinite-length, this function will fail with unexpected type. + """ + return _awscrt.cbor_decoder_pop_next_map_start(self._binding) + + def pop_next_tag_val(self) -> int: + """Return and consume the next data item as int if it's a `AwsCborType.Tag` + Otherwise, it will raise ValueError. + + The return value is the tag ID. Refer the RFC8949 section 3.4 for + corresponding expected data item to follow by the tag id as value. + """ + return _awscrt.cbor_decoder_pop_next_tag(self._binding) + + def pop_next_list(self) -> list: + """Return and consume the next data item as list if it's a `AwsCborType.ArrayStart` or `AwsCborType.IndefArray` + Otherwise, it will raise ValueError. + It consumes the all the content of the array as `pop_next_data_item`. + """ + return _awscrt.cbor_decoder_pop_next_py_list(self._binding) + + def pop_next_map(self) -> dict: + """Return and consume the next data item as list if it's a `AwsCborType.MapStart` or `AwsCborType.IndefMap` + Otherwise, it will raise ValueError. + It consumes the all the content of the map as `pop_next_data_item`. + """ + return _awscrt.cbor_decoder_pop_next_py_dict(self._binding) + + def pop_next_data_item(self) -> Any: + """Generic API to decode cbor formatted data to a python object. + This consumes the next data item and return the decoded object. + The type of the python object will be based on the cbor data item type. + a full map from the cbor data item type to the python object type is: + - `AwsCborType.UnsignedInt` or `AwsCborType.NegativeInt` -> int + - `AwsCborType.Float` -> float + - `AwsCborType.Bytes` or `AwsCborType.IndefBytes` -> bytes + - `AwsCborType.Text` or `AwsCborType.IndefStr` -> str + - `AwsCborType.Null` or `AwsCborType.Undefined` -> none + - `AwsCborType.Bool` -> bool + - `AwsCborType.ArrayStart` or `AwsCborType.IndefArray` and all the followed data items in the array -> list + - `AwsCborType.MapStart` or `AwsCborType.IndefMap` and all the followed data items in the map -> dict + - `AwsCborType.Tag`: For tag with id 1, as the epoch time, it invokes the _on_epoch_time for python to convert to expected type. + For the reset tag, exception will be raised. + """ + return _awscrt.cbor_decoder_pop_next_data_item(self._binding) + + # def _pop_next_data_item_sdk(self) -> Any: + # """Helper function to decode cbor formatted data to a python object. + # It based on `pop_next_data_item` with the following specific rules for SDKs: + # 1. It the content in a collection has None, it will be ignored + # - If a value in the list is None, the list will NOT include the None value. + # - If a value or key in the dict is None, the dict will NOT include the key/value pair. + # 2. For epoch time, it will be converted to datetime object. + # 3. All other tag will not be supported and raise error. + # """ + # return _awscrt.cbor_decoder_pop_next_data_item_sdk(self._binding) diff --git a/source/cbor.c b/source/cbor.c new file mode 100644 index 000000000..309d73d16 --- /dev/null +++ b/source/cbor.c @@ -0,0 +1,767 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ +#include "cbor.h" + +#include + +/******************************************************************************* + * ENCODE + ******************************************************************************/ + +static const char *s_capsule_name_cbor_encoder = "aws_cbor_encoder"; + +static struct aws_cbor_encoder *s_cbor_encoder_from_capsule(PyObject *py_capsule) { + return PyCapsule_GetPointer(py_capsule, s_capsule_name_cbor_encoder); +} + +/* Runs when GC destroys the capsule */ +static void s_cbor_encoder_capsule_destructor(PyObject *py_capsule) { + struct aws_cbor_encoder *encoder = s_cbor_encoder_from_capsule(py_capsule); + aws_cbor_encoder_destroy(encoder); +} + +PyObject *aws_py_cbor_encoder_new(PyObject *self, PyObject *args) { + (void)self; + (void)args; + struct aws_cbor_encoder *encoder = aws_cbor_encoder_new(aws_py_get_allocator()); + AWS_ASSERT(encoder != NULL); + PyObject *py_capsule = PyCapsule_New(encoder, s_capsule_name_cbor_encoder, s_cbor_encoder_capsule_destructor); + if (!py_capsule) { + aws_cbor_encoder_destroy(encoder); + return NULL; + } + return py_capsule; +} + +PyObject *aws_py_cbor_encoder_get_encoded_data(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_capsule = NULL; + if (!PyArg_ParseTuple(args, "O", &py_capsule)) { + return NULL; + } + struct aws_cbor_encoder *encoder = s_cbor_encoder_from_capsule(py_capsule); + if (!encoder) { + return NULL; + } + struct aws_byte_cursor encoded_data = aws_cbor_encoder_get_encoded_data(encoder); + if (encoded_data.len == 0) { + /* TODO: probably better to be empty instead of None?? */ + Py_RETURN_NONE; + } + return PyBytes_FromStringAndSize((const char *)encoded_data.ptr, encoded_data.len); +} + +#define S_ENCODER_WRITE_PYOBJECT(ctype, py_conversion, field) \ + static PyObject *s_cbor_encoder_write_pyobject_as_##field(struct aws_cbor_encoder *encoder, PyObject *py_object) { \ + ctype data = py_conversion(py_object); \ + if (PyErr_Occurred()) { \ + return NULL; \ + } \ + aws_cbor_encoder_write_##field(encoder, data); \ + Py_RETURN_NONE; \ + } + +S_ENCODER_WRITE_PYOBJECT(uint64_t, PyLong_AsUnsignedLongLong, uint) +S_ENCODER_WRITE_PYOBJECT(uint64_t, PyLong_AsUnsignedLongLong, negint) +S_ENCODER_WRITE_PYOBJECT(double, PyFloat_AsDouble, float) +S_ENCODER_WRITE_PYOBJECT(struct aws_byte_cursor, aws_byte_cursor_from_pybytes, bytes) +S_ENCODER_WRITE_PYOBJECT(struct aws_byte_cursor, aws_byte_cursor_from_pyunicode, text) +S_ENCODER_WRITE_PYOBJECT(bool, PyObject_IsTrue, bool) + +S_ENCODER_WRITE_PYOBJECT(uint64_t, PyLong_AsUnsignedLongLong, array_start) +S_ENCODER_WRITE_PYOBJECT(uint64_t, PyLong_AsUnsignedLongLong, map_start) +S_ENCODER_WRITE_PYOBJECT(uint64_t, PyLong_AsUnsignedLongLong, tag) + +static PyObject *s_cbor_encoder_write_pyobject(struct aws_cbor_encoder *encoder, PyObject *py_object); + +static PyObject *s_cbor_encoder_write_pylong(struct aws_cbor_encoder *encoder, PyObject *py_object) { + long val = 0; + int overflow = 0; + + val = PyLong_AsLongAndOverflow(py_object, &overflow); + if (overflow == 0) { + if (val >= 0) { + aws_cbor_encoder_write_uint(encoder, (uint64_t)val); + } else { + uint64_t val_unsigned = (uint64_t)(-val) - 1; + aws_cbor_encoder_write_negint(encoder, val_unsigned); + } + } else if (overflow < 0) { + /** + * The py object is negative and too small. + * Convert the value to -1 - value in python to encode. + */ + PyObject *abs_val = PyNumber_Negative(py_object); + if (!abs_val) { + return NULL; + } + PyObject *minus_one = PyLong_FromLong(1); + if (!minus_one) { + Py_DECREF(abs_val); + return NULL; + } + /* -value - 1 */ + PyObject *result = PyNumber_Subtract(abs_val, minus_one); + Py_DECREF(abs_val); + Py_DECREF(minus_one); + + if (!result) { + return NULL; + } + /* Convert to uint64_t and check for overflow */ + uint64_t val_to_encode = PyLong_AsUnsignedLongLong(result); + Py_DECREF(result); + if (PyErr_Occurred()) { + /* Value is too large even for uint64_t */ + PyErr_SetString(PyExc_OverflowError, "The integer is too large, BigNumber is not supported yet."); + return NULL; + } + + aws_cbor_encoder_write_negint(encoder, val_to_encode); + } else { + uint64_t val_to_encode = PyLong_AsUnsignedLongLong(py_object); + + if (PyErr_Occurred()) { + /* Value is too large for uint64_t */ + PyErr_SetString(PyExc_OverflowError, "The integer is too large, BigNumber is not supported yet."); + return NULL; + } + + aws_cbor_encoder_write_uint(encoder, val_to_encode); + } + Py_RETURN_NONE; +} + +static PyObject *s_cbor_encoder_write_pylist(struct aws_cbor_encoder *encoder, PyObject *py_list) { + Py_ssize_t size = PyList_Size(py_list); + aws_cbor_encoder_write_array_start(encoder, (size_t)size); + for (Py_ssize_t i = 0; i < size; i++) { + PyObject *item = PyList_GetItem(py_list, i); + if (!item) { + PyErr_SetString(PyExc_RuntimeError, "Failed to get item from list"); + return NULL; + } + s_cbor_encoder_write_pyobject(encoder, item); + } + Py_RETURN_NONE; +} + +static PyObject *s_cbor_encoder_write_pydict(struct aws_cbor_encoder *encoder, PyObject *py_dict) { + Py_ssize_t size = PyDict_Size(py_dict); + aws_cbor_encoder_write_map_start(encoder, (size_t)size); + PyObject *key = NULL; + PyObject *value = NULL; + Py_ssize_t pos = 0; + + while (PyDict_Next(py_dict, &pos, &key, &value)) { + s_cbor_encoder_write_pyobject(encoder, key); + s_cbor_encoder_write_pyobject(encoder, value); + } + Py_RETURN_NONE; +} + +static PyObject *s_cbor_encoder_write_pyobject(struct aws_cbor_encoder *encoder, PyObject *py_object) { + + /** + * TODO: timestamp <-> datetime?? Decimal fraction <-> decimal?? + */ + if (PyLong_CheckExact(py_object)) { + return s_cbor_encoder_write_pylong(encoder, py_object); + } else if (PyFloat_CheckExact(py_object)) { + return s_cbor_encoder_write_pyobject_as_float(encoder, py_object); + } else if (PyBool_Check(py_object)) { + return s_cbor_encoder_write_pyobject_as_bool(encoder, py_object); + } else if (PyBytes_CheckExact(py_object)) { + return s_cbor_encoder_write_pyobject_as_bytes(encoder, py_object); + } else if (PyUnicode_Check(py_object)) { + /* Allow subclasses of `str` */ + return s_cbor_encoder_write_pyobject_as_text(encoder, py_object); + } else if (PyList_Check(py_object)) { + /* Write py_list, allow subclasses of `list` */ + return s_cbor_encoder_write_pylist(encoder, py_object); + } else if (PyDict_Check(py_object)) { + /* Write py_dict, allow subclasses of `dict` */ + return s_cbor_encoder_write_pydict(encoder, py_object); + } else if (py_object == Py_None) { + aws_cbor_encoder_write_null(encoder); + } else { + PyErr_Format(PyExc_ValueError, "Not supported type %R", (PyObject *)Py_TYPE(py_object)); + } + + Py_RETURN_NONE; +} + +/*********************************** BINDINGS ***********************************************/ + +#define ENCODER_WRITE(field, encoder_fn) \ + PyObject *aws_py_cbor_encoder_write_##field(PyObject *self, PyObject *args) { \ + (void)self; \ + PyObject *py_object; \ + PyObject *py_capsule; \ + if (!PyArg_ParseTuple(args, "OO", &py_capsule, &py_object)) { \ + return NULL; \ + } \ + struct aws_cbor_encoder *encoder = s_cbor_encoder_from_capsule(py_capsule); \ + if (!encoder) { \ + return NULL; \ + } \ + return encoder_fn(encoder, py_object); \ + } + +ENCODER_WRITE(uint, s_cbor_encoder_write_pyobject_as_uint) +ENCODER_WRITE(negint, s_cbor_encoder_write_pyobject_as_negint) +ENCODER_WRITE(float, s_cbor_encoder_write_pyobject_as_float) +ENCODER_WRITE(bytes, s_cbor_encoder_write_pyobject_as_bytes) +ENCODER_WRITE(text, s_cbor_encoder_write_pyobject_as_text) +ENCODER_WRITE(bool, s_cbor_encoder_write_pyobject_as_bool) +ENCODER_WRITE(array_start, s_cbor_encoder_write_pyobject_as_array_start) +ENCODER_WRITE(map_start, s_cbor_encoder_write_pyobject_as_map_start) +ENCODER_WRITE(tag, s_cbor_encoder_write_pyobject_as_tag) +ENCODER_WRITE(py_list, s_cbor_encoder_write_pylist) +ENCODER_WRITE(py_dict, s_cbor_encoder_write_pydict) +ENCODER_WRITE(data_item, s_cbor_encoder_write_pyobject) + +PyObject *aws_py_cbor_encoder_write_simple_types(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_capsule = NULL; + Py_ssize_t type_enum = AWS_CBOR_TYPE_UNKNOWN; + if (!PyArg_ParseTuple(args, "On", &py_capsule, &type_enum)) { + return NULL; + } + struct aws_cbor_encoder *encoder = s_cbor_encoder_from_capsule(py_capsule); + if (!encoder) { + return NULL; + } + switch (type_enum) { + case AWS_CBOR_TYPE_NULL: + aws_cbor_encoder_write_null(encoder); + break; + case AWS_CBOR_TYPE_UNDEFINED: + aws_cbor_encoder_write_undefined(encoder); + break; + case AWS_CBOR_TYPE_INDEF_ARRAY_START: + aws_cbor_encoder_write_indef_array_start(encoder); + break; + case AWS_CBOR_TYPE_INDEF_MAP_START: + aws_cbor_encoder_write_indef_map_start(encoder); + break; + case AWS_CBOR_TYPE_INDEF_BYTES_START: + aws_cbor_encoder_write_indef_bytes_start(encoder); + break; + case AWS_CBOR_TYPE_INDEF_TEXT_START: + aws_cbor_encoder_write_indef_text_start(encoder); + break; + case AWS_CBOR_TYPE_BREAK: + aws_cbor_encoder_write_break(encoder); + break; + default: + PyErr_Format(PyExc_ValueError, "Not supported simple type"); + return NULL; + } + Py_RETURN_NONE; +} + +/******************************************************************************* + * DECODE + ******************************************************************************/ + +struct decoder_binding { + struct aws_cbor_decoder *native; + + /* Encoder has simple lifetime, no async/multi-thread allowed. */ + PyObject *self_py; +}; + +static const char *s_capsule_name_cbor_decoder = "aws_cbor_decoder"; + +static struct aws_cbor_decoder *s_cbor_decoder_from_capsule(PyObject *py_capsule) { + struct decoder_binding *binding = PyCapsule_GetPointer(py_capsule, s_capsule_name_cbor_decoder); + if (!binding) { + return NULL; + } + return binding->native; +} +/* Runs when GC destroys the capsule */ +static void s_cbor_decoder_capsule_destructor(PyObject *py_capsule) { + struct decoder_binding *binding = PyCapsule_GetPointer(py_capsule, s_capsule_name_cbor_decoder); + aws_cbor_decoder_destroy(binding->native); + aws_mem_release(aws_py_get_allocator(), binding); +} + +PyObject *aws_py_cbor_decoder_new(PyObject *self, PyObject *args) { + (void)self; + /* The python object will keep the src alive from python. */ + PyObject *py_self = NULL; + struct aws_byte_cursor src = {0}; /* s# */ + + if (!PyArg_ParseTuple(args, "Os#", &py_self, &src.ptr, &src.len)) { + return NULL; + } + struct decoder_binding *binding = aws_mem_calloc(aws_py_get_allocator(), 1, sizeof(struct decoder_binding)); + binding->native = aws_cbor_decoder_new(aws_py_get_allocator(), src); + AWS_ASSERT(binding->native != NULL); + PyObject *py_capsule = PyCapsule_New(binding, s_capsule_name_cbor_decoder, s_cbor_decoder_capsule_destructor); + if (!py_capsule) { + aws_cbor_decoder_destroy(binding->native); + aws_mem_release(aws_py_get_allocator(), binding); + return NULL; + } + /* The binding and the py_object have the same life time */ + binding->self_py = py_self; + return py_capsule; +} + +static struct aws_cbor_decoder *s_get_decoder_from_py_arg(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_capsule = NULL; + if (!PyArg_ParseTuple(args, "O", &py_capsule)) { + return NULL; + } + return s_cbor_decoder_from_capsule(py_capsule); +} + +#define S_POP_NEXT_TO_PYOBJECT(ctype, field, py_conversion) \ + static PyObject *s_cbor_decoder_pop_next_##field##_to_pyobject(struct aws_cbor_decoder *decoder) { \ + ctype out_val; \ + if (aws_cbor_decoder_pop_next_##field(decoder, &out_val)) { \ + return PyErr_AwsLastError(); \ + } \ + return py_conversion(out_val); \ + } + +#define S_POP_NEXT_TO_PYOBJECT_CURSOR(field, py_conversion) \ + static PyObject *s_cbor_decoder_pop_next_##field##_to_pyobject(struct aws_cbor_decoder *decoder) { \ + struct aws_byte_cursor out_val; \ + if (aws_cbor_decoder_pop_next_##field(decoder, &out_val)) { \ + return PyErr_AwsLastError(); \ + } \ + return py_conversion(&out_val); \ + } + +S_POP_NEXT_TO_PYOBJECT(uint64_t, unsigned_int_val, PyLong_FromUnsignedLongLong) +S_POP_NEXT_TO_PYOBJECT(uint64_t, negative_int_val, PyLong_FromUnsignedLongLong) +S_POP_NEXT_TO_PYOBJECT(double, float_val, PyFloat_FromDouble) +S_POP_NEXT_TO_PYOBJECT(bool, boolean_val, PyBool_FromLong) +S_POP_NEXT_TO_PYOBJECT_CURSOR(bytes_val, PyBytes_FromAwsByteCursor) +S_POP_NEXT_TO_PYOBJECT_CURSOR(text_val, PyUnicode_FromAwsByteCursor) +S_POP_NEXT_TO_PYOBJECT(uint64_t, array_start, PyLong_FromUnsignedLongLong) +S_POP_NEXT_TO_PYOBJECT(uint64_t, map_start, PyLong_FromUnsignedLongLong) +S_POP_NEXT_TO_PYOBJECT(uint64_t, tag_val, PyLong_FromUnsignedLongLong) + +static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct decoder_binding *binding); + +/** + * helper to convert next data item to py_list + */ +static PyObject *s_cbor_decoder_pop_next_data_item_to_py_list(struct decoder_binding *binding) { + struct aws_cbor_decoder *decoder = binding->native; + enum aws_cbor_type out_type = AWS_CBOR_TYPE_UNKNOWN; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + PyObject *array = NULL; + PyObject *item = NULL; + switch (out_type) { + case AWS_CBOR_TYPE_ARRAY_START: { + uint64_t num_array_item = 0; + aws_cbor_decoder_pop_next_array_start(decoder, &num_array_item); + if (num_array_item > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, "number of array is too large to fit."); + return NULL; + } + array = PyList_New((Py_ssize_t)num_array_item); + if (!array) { + return NULL; + } + for (size_t i = 0; i < num_array_item; ++i) { + item = s_cbor_decoder_pop_next_data_item_to_pyobject(binding); + if (!item) { + goto error; + } + PyList_SetItem(array, i, item); /* Steals reference to item */ + } + return array; + } + case AWS_CBOR_TYPE_INDEF_ARRAY_START: { + array = PyList_New(0); + if (!array) { + return NULL; + } + /* Consume the inf array start */ + aws_cbor_decoder_consume_next_single_element(decoder); + aws_cbor_decoder_peek_type(decoder, &out_type); + while (out_type != AWS_CBOR_TYPE_BREAK) { + item = s_cbor_decoder_pop_next_data_item_to_pyobject(binding); + if (!item) { + goto error; + } + if (PyList_Append(array, item) == -1) { + goto error; + } + /* Append will not steal the reference, deref here. */ + Py_DECREF(item); + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + PyErr_AwsLastError(); + goto error; + } + } + /* Consume the break element */ + aws_cbor_decoder_consume_next_single_element(decoder); + return array; + } + default: + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } +error: + if (array) { + Py_DECREF(array); + } + return NULL; +} + +/** + * helper to convert next data item to py_dict + */ +static PyObject *s_cbor_decoder_pop_next_data_item_to_py_dict(struct decoder_binding *binding) { + struct aws_cbor_decoder *decoder = binding->native; + enum aws_cbor_type out_type = AWS_CBOR_TYPE_UNKNOWN; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + PyObject *dict = NULL; + PyObject *key = NULL; + PyObject *value = NULL; + switch (out_type) { + case AWS_CBOR_TYPE_MAP_START: { + uint64_t num_item = 0; + aws_cbor_decoder_pop_next_map_start(decoder, &num_item); + if (num_item > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, "number of dict is too large to fit."); + return NULL; + } + dict = PyDict_New(); + if (!dict) { + return NULL; + } + for (size_t i = 0; i < num_item; ++i) { + key = s_cbor_decoder_pop_next_data_item_to_pyobject(binding); + value = s_cbor_decoder_pop_next_data_item_to_pyobject(binding); + if (!key || !value) { + goto error; + } + if (PyDict_SetItem(dict, key, value) == -1) { + goto error; + } + Py_DECREF(key); + Py_DECREF(value); + } + return dict; + } + case AWS_CBOR_TYPE_INDEF_MAP_START: { + dict = PyDict_New(); + if (!dict) { + return NULL; + } + /* Consume the inf array start */ + aws_cbor_decoder_consume_next_single_element(decoder); + aws_cbor_decoder_peek_type(decoder, &out_type); + while (out_type != AWS_CBOR_TYPE_BREAK) { + key = s_cbor_decoder_pop_next_data_item_to_pyobject(binding); + value = s_cbor_decoder_pop_next_data_item_to_pyobject(binding); + if (!key || !value) { + goto error; + } + if (PyDict_SetItem(dict, key, value) == -1) { + goto error; + } + Py_DECREF(key); + Py_DECREF(value); + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + PyErr_AwsLastError(); + goto error; + } + } + /* Consume the break element */ + aws_cbor_decoder_consume_next_single_element(decoder); + return dict; + } + default: + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } +error: + if (dict) { + Py_DECREF(dict); + } + if (key) { + Py_DECREF(key); + } + if (value) { + Py_DECREF(value); + } + return NULL; +} + +/** + * helper to get the next inf byte + */ +static PyObject *s_cbor_decoder_pop_next_inf_bytes_to_py_bytes(struct aws_cbor_decoder *decoder) { + enum aws_cbor_type out_type = AWS_CBOR_TYPE_UNKNOWN; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + if (out_type != AWS_CBOR_TYPE_INDEF_BYTES_START) { + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } + /* consume the bytes start element */ + aws_cbor_decoder_consume_next_single_element(decoder); + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + /* Empty bytes */ + PyObject *result = PyBytes_FromStringAndSize(NULL, 0); + while (out_type != AWS_CBOR_TYPE_BREAK) { + PyObject *next_part = s_cbor_decoder_pop_next_bytes_val_to_pyobject(decoder); + if (!next_part) { + Py_DECREF(result); + return NULL; + } + /* The reference to the old value of bytes will be stolen and next_part will be del. */ + PyBytes_ConcatAndDel(&result, next_part); + if (!result) { + return NULL; + } + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + } + /* Consume the break element */ + aws_cbor_decoder_consume_next_single_element(decoder); + return result; +} + +/** + * helper to get the next inf string + */ +static PyObject *s_cbor_decoder_pop_next_inf_string_to_py_str(struct aws_cbor_decoder *decoder) { + enum aws_cbor_type out_type = AWS_CBOR_TYPE_UNKNOWN; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + if (out_type != AWS_CBOR_TYPE_INDEF_TEXT_START) { + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } + /* consume the bytes start element */ + aws_cbor_decoder_consume_next_single_element(decoder); + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + /* Empty string */ + PyObject *result = PyUnicode_FromStringAndSize(NULL, 0); + while (out_type != AWS_CBOR_TYPE_BREAK) { + PyObject *next_part = s_cbor_decoder_pop_next_text_val_to_pyobject(decoder); + if (!next_part) { + Py_DECREF(result); + return NULL; + } + /* Returns a new reference and keep the arguments unchanged. */ + PyObject *concat_val = PyUnicode_Concat(result, next_part); + Py_DECREF(result); + Py_DECREF(next_part); + if (!concat_val) { + return NULL; + } + result = concat_val; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + } + /* Consume the break element */ + aws_cbor_decoder_consume_next_single_element(decoder); + return result; +} + +/** + * Generic helper to convert a cbor encoded data to PyObject + */ +static PyObject *s_cbor_decoder_pop_next_tag_to_pyobject(struct decoder_binding *binding) { + struct aws_cbor_decoder *decoder = binding->native; + uint64_t out_tag_val = 0; + if (aws_cbor_decoder_pop_next_tag_val(decoder, &out_tag_val)) { + return PyErr_AwsLastError(); + } + /* TODO: implement those tags */ + switch (out_tag_val) { + case AWS_CBOR_TAG_EPOCH_TIME: { + enum aws_cbor_type out_type = AWS_CBOR_TYPE_UNKNOWN; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + if (out_type == AWS_CBOR_TYPE_FLOAT || out_type == AWS_CBOR_TYPE_UINT || out_type == AWS_CBOR_TYPE_NEGINT) { + PyObject *val = s_cbor_decoder_pop_next_data_item_to_pyobject(binding); + PyObject *result = PyObject_CallMethod(binding->self_py, "_on_epoch_time_callback", "(O)", val); + Py_DECREF(val); + return result; + } else { + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } + } + case AWS_CBOR_TAG_UNSIGNED_BIGNUM: + case AWS_CBOR_TAG_NEGATIVE_BIGNUM: + case AWS_CBOR_TAG_DECIMAL_FRACTION: + default: + PyErr_Format(PyExc_ValueError, "Unsupported tag value: %" PRIu64 ".", out_tag_val); + return NULL; + } + Py_RETURN_NONE; +} + +/** + * Generic helper to convert a cbor encoded data to PyObject + */ +static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct decoder_binding *binding) { + struct aws_cbor_decoder *decoder = binding->native; + enum aws_cbor_type out_type = AWS_CBOR_TYPE_UNKNOWN; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + switch (out_type) { + case AWS_CBOR_TYPE_UINT: + return s_cbor_decoder_pop_next_unsigned_int_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_NEGINT: { + /* The value from native code is -1 - val. */ + PyObject *minus_one = PyLong_FromLong(-1); + if (!minus_one) { + return NULL; + } + PyObject *val = s_cbor_decoder_pop_next_negative_int_val_to_pyobject(decoder); + if (!val) { + Py_DECREF(minus_one); + return NULL; + } + PyObject *ret_val = PyNumber_Subtract(minus_one, val); + Py_DECREF(minus_one); + Py_DECREF(val); + return ret_val; + } + case AWS_CBOR_TYPE_FLOAT: + return s_cbor_decoder_pop_next_float_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_BYTES: + return s_cbor_decoder_pop_next_bytes_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_TEXT: + return s_cbor_decoder_pop_next_text_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_BOOL: + return s_cbor_decoder_pop_next_boolean_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_NULL: + /* fall through */ + case AWS_CBOR_TYPE_UNDEFINED: + aws_cbor_decoder_consume_next_single_element(decoder); + Py_RETURN_NONE; + case AWS_CBOR_TYPE_MAP_START: + /* fall through */ + case AWS_CBOR_TYPE_INDEF_MAP_START: + return s_cbor_decoder_pop_next_data_item_to_py_dict(binding); + case AWS_CBOR_TYPE_ARRAY_START: + /* fall through */ + case AWS_CBOR_TYPE_INDEF_ARRAY_START: + return s_cbor_decoder_pop_next_data_item_to_py_list(binding); + case AWS_CBOR_TYPE_INDEF_BYTES_START: + return s_cbor_decoder_pop_next_inf_bytes_to_py_bytes(decoder); + case AWS_CBOR_TYPE_INDEF_TEXT_START: + return s_cbor_decoder_pop_next_inf_string_to_py_str(decoder); + case AWS_CBOR_TYPE_TAG: + return s_cbor_decoder_pop_next_tag_to_pyobject(binding); + default: + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } + return NULL; +} + +/*********************************** BINDINGS ***********************************************/ + +PyObject *aws_py_cbor_decoder_peek_type(PyObject *self, PyObject *args) { + struct aws_cbor_decoder *decoder = s_get_decoder_from_py_arg(self, args); + if (!decoder) { + return NULL; + } + enum aws_cbor_type out_type = AWS_CBOR_TYPE_UNKNOWN; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + return PyLong_FromSize_t(out_type); +} + +PyObject *aws_py_cbor_decoder_get_remaining_bytes_len(PyObject *self, PyObject *args) { + struct aws_cbor_decoder *decoder = s_get_decoder_from_py_arg(self, args); + if (!decoder) { + return NULL; + } + size_t remaining_len = aws_cbor_decoder_get_remaining_length(decoder); + return PyLong_FromSize_t(remaining_len); +} + +PyObject *aws_py_cbor_decoder_consume_next_element(PyObject *self, PyObject *args) { + struct aws_cbor_decoder *decoder = s_get_decoder_from_py_arg(self, args); + if (!decoder) { + return NULL; + } + if (aws_cbor_decoder_consume_next_single_element(decoder)) { + return PyErr_AwsLastError(); + } + Py_RETURN_NONE; +} + +PyObject *aws_py_cbor_decoder_consume_next_data_item(PyObject *self, PyObject *args) { + struct aws_cbor_decoder *decoder = s_get_decoder_from_py_arg(self, args); + if (!decoder) { + return NULL; + } + if (aws_cbor_decoder_consume_next_whole_data_item(decoder)) { + return PyErr_AwsLastError(); + } + Py_RETURN_NONE; +} + +#define DECODER_POP(field, decoder_fn) \ + PyObject *aws_py_cbor_decoder_pop_next_##field(PyObject *self, PyObject *args) { \ + struct aws_cbor_decoder *decoder = s_get_decoder_from_py_arg(self, args); \ + if (!decoder) { \ + return NULL; \ + } \ + return decoder_fn(decoder); \ + } + +#define DECODER_POP_WITH_BINDING(field, decoder_binding_fn) \ + PyObject *aws_py_cbor_decoder_pop_next_##field(PyObject *self, PyObject *args) { \ + (void)self; \ + PyObject *py_capsule = NULL; \ + if (!PyArg_ParseTuple(args, "O", &py_capsule)) { \ + return NULL; \ + } \ + struct decoder_binding *binding = PyCapsule_GetPointer(py_capsule, s_capsule_name_cbor_decoder); \ + if (!binding) { \ + return NULL; \ + } \ + return decoder_binding_fn(binding); \ + } + +DECODER_POP(unsigned_int, s_cbor_decoder_pop_next_unsigned_int_val_to_pyobject) +DECODER_POP(negative_int, s_cbor_decoder_pop_next_negative_int_val_to_pyobject) +DECODER_POP(float, s_cbor_decoder_pop_next_float_val_to_pyobject) +DECODER_POP(boolean, s_cbor_decoder_pop_next_boolean_val_to_pyobject) +DECODER_POP(bytes, s_cbor_decoder_pop_next_bytes_val_to_pyobject) +DECODER_POP(text, s_cbor_decoder_pop_next_text_val_to_pyobject) +DECODER_POP(tag, s_cbor_decoder_pop_next_tag_val_to_pyobject) +DECODER_POP(array_start, s_cbor_decoder_pop_next_array_start_to_pyobject) +DECODER_POP(map_start, s_cbor_decoder_pop_next_map_start_to_pyobject) + +DECODER_POP_WITH_BINDING(py_list, s_cbor_decoder_pop_next_data_item_to_py_list) +DECODER_POP_WITH_BINDING(py_dict, s_cbor_decoder_pop_next_data_item_to_py_dict) +DECODER_POP_WITH_BINDING(data_item, s_cbor_decoder_pop_next_data_item_to_pyobject) diff --git a/source/cbor.h b/source/cbor.h new file mode 100644 index 000000000..b196bc135 --- /dev/null +++ b/source/cbor.h @@ -0,0 +1,56 @@ +#ifndef AWS_CRT_PYTHON_CBOR_H +#define AWS_CRT_PYTHON_CBOR_H +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ +#include "module.h" + +/******************************************************************************* + * ENCODE + ******************************************************************************/ + +PyObject *aws_py_cbor_encoder_new(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_get_encoded_data(PyObject *self, PyObject *args); + +PyObject *aws_py_cbor_encoder_write_uint(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_negint(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_float(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_bytes(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_text(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_array_start(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_map_start(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_tag(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_bool(PyObject *self, PyObject *args); + +/* Encode the types without value needed. The arg is the type to encode. */ +PyObject *aws_py_cbor_encoder_write_simple_types(PyObject *self, PyObject *args); + +PyObject *aws_py_cbor_encoder_write_py_list(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_py_dict(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_data_item(PyObject *self, PyObject *args); + +/******************************************************************************* + * DECODE + ******************************************************************************/ + +PyObject *aws_py_cbor_decoder_new(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_peek_type(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_get_remaining_bytes_len(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_consume_next_element(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_consume_next_data_item(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_unsigned_int(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_negative_int(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_float(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_boolean(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_bytes(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_text(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_array_start(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_map_start(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_tag(PyObject *self, PyObject *args); + +PyObject *aws_py_cbor_decoder_pop_next_py_list(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_py_dict(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_data_item(PyObject *self, PyObject *args); + +#endif /* AWS_CRT_PYTHON_CBOR_H */ diff --git a/source/module.c b/source/module.c index 4ad2bd002..cfed91793 100644 --- a/source/module.c +++ b/source/module.c @@ -5,6 +5,7 @@ #include "module.h" #include "auth.h" +#include "cbor.h" #include "checksums.h" #include "common.h" #include "crypto.h" @@ -117,6 +118,14 @@ PyObject *PyUnicode_FromAwsString(const struct aws_string *aws_str) { return PyUnicode_FromStringAndSize(aws_string_c_str(aws_str), aws_str->len); } +PyObject *PyBytes_FromAwsByteCursor(const struct aws_byte_cursor *cursor) { + if (cursor->len > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, "Cursor exceeds PY_SSIZE_T_MAX"); + return NULL; + } + return PyBytes_FromStringAndSize((const char *)cursor->ptr, (Py_ssize_t)cursor->len); +} + uint32_t PyObject_GetAttrAsUint32(PyObject *o, const char *class_name, const char *attr_name) { uint32_t result = UINT32_MAX; @@ -899,6 +908,41 @@ static PyMethodDef s_module_methods[] = { AWS_PY_METHOD_DEF(websocket_increment_read_window, METH_VARARGS), AWS_PY_METHOD_DEF(websocket_create_handshake_request, METH_VARARGS), + /* CBOR Encode */ + AWS_PY_METHOD_DEF(cbor_encoder_new, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_get_encoded_data, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_uint, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_negint, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_float, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_bytes, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_text, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_array_start, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_map_start, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_tag, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_bool, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_simple_types, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_py_list, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_py_dict, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_data_item, METH_VARARGS), + + /* CBOR Decode */ + AWS_PY_METHOD_DEF(cbor_decoder_new, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_peek_type, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_get_remaining_bytes_len, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_consume_next_element, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_consume_next_data_item, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_unsigned_int, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_negative_int, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_float, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_boolean, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_bytes, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_text, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_array_start, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_map_start, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_tag, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_py_list, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_py_dict, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_data_item, METH_VARARGS), {NULL, NULL, 0, NULL}, }; diff --git a/source/module.h b/source/module.h index f626cb1e5..2f9dd217e 100644 --- a/source/module.h +++ b/source/module.h @@ -30,6 +30,7 @@ enum aws_crt_python_errors { /* AWS Specific Helpers */ PyObject *PyUnicode_FromAwsByteCursor(const struct aws_byte_cursor *cursor); PyObject *PyUnicode_FromAwsString(const struct aws_string *aws_str); +PyObject *PyBytes_FromAwsByteCursor(const struct aws_byte_cursor *cursor); /* Return the named attribute, converted to the specified type. * If conversion cannot occur a python exception is set (check PyExc_Occurred()) */ diff --git a/test/resources/decode-error-tests.json b/test/resources/decode-error-tests.json new file mode 100644 index 000000000..d32f2308d --- /dev/null +++ b/test/resources/decode-error-tests.json @@ -0,0 +1,282 @@ +[ + { + "description": "TestDecode_InvalidArgument - map/2 - arg len 2 greater than remaining buf len", + "input": "b900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - tag/1 - arg len 1 greater than remaining buf len", + "input": "d8", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - major7/float64 - incomplete float64 at end of buf", + "input": "fb00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - negint/4 - arg len 4 greater than remaining buf len", + "input": "3a000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - negint/8 - arg len 8 greater than remaining buf len", + "input": "3b00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - string/4 - arg len 4 greater than remaining buf len", + "input": "7a000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - map/1 - arg len 1 greater than remaining buf len", + "input": "b8", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - map/4 - arg len 4 greater than remaining buf len", + "input": "ba000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - tag/2 - arg len 2 greater than remaining buf len", + "input": "d900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - uint/1 - arg len 1 greater than remaining buf len", + "input": "18", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - string/1 - arg len 1 greater than remaining buf len", + "input": "78", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - string/8 - arg len 8 greater than remaining buf len", + "input": "7b00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - string/2 - arg len 2 greater than remaining buf len", + "input": "7900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - list/2 - arg len 2 greater than remaining buf len", + "input": "9900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - slice/1 - arg len 1 greater than remaining buf len", + "input": "58", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - slice/4 - arg len 4 greater than remaining buf len", + "input": "5a000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - slice/8 - arg len 8 greater than remaining buf len", + "input": "5b00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - negint/? - unexpected minor value 31", + "input": "3f", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - tag/8 - arg len 8 greater than remaining buf len", + "input": "db00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - uint/2 - arg len 2 greater than remaining buf len", + "input": "1900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - uint/8 - arg len 8 greater than remaining buf len", + "input": "1b00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - negint/2 - arg len 2 greater than remaining buf len", + "input": "3900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - negint/1 - arg len 1 greater than remaining buf len", + "input": "38", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - list/8 - arg len 8 greater than remaining buf len", + "input": "9b00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - tag/4 - arg len 4 greater than remaining buf len", + "input": "da000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - major7/float32 - incomplete float32 at end of buf", + "input": "fa000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - uint/4 - arg len 4 greater than remaining buf len", + "input": "1a000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - slice/2 - arg len 2 greater than remaining buf len", + "input": "5900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - list/4 - arg len 4 greater than remaining buf len", + "input": "9a000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - tag/? - unexpected minor value 31", + "input": "df", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - major7/? - unexpected minor value 31", + "input": "ff", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - uint/? - unexpected minor value 31", + "input": "1f", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - list/1 - arg len 1 greater than remaining buf len", + "input": "98", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - map/8 - arg len 8 greater than remaining buf len", + "input": "bb00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidList - [] / eof after head - unexpected end of payload", + "input": "81", + "error": true + }, + { + "description": "TestDecode_InvalidList - [] / invalid item - arg len 1 greater than remaining buf len", + "input": "8118", + "error": true + }, + { + "description": "TestDecode_InvalidList - [_ ] / no break - expected break marker", + "input": "9f", + "error": true + }, + { + "description": "TestDecode_InvalidList - [_ ] / invalid item - arg len 1 greater than remaining buf len", + "input": "9f18", + "error": true + }, + { + "description": "TestDecode_InvalidMap - {} / invalid key - slice len 1 greater than remaining buf len", + "input": "a17801", + "error": true + }, + { + "description": "TestDecode_InvalidMap - {} / invalid value - arg len 1 greater than remaining buf len", + "input": "a163666f6f18", + "error": true + }, + { + "description": "TestDecode_InvalidMap - {_ } / no break - expected break marker", + "input": "bf", + "error": true + }, + { + "description": "TestDecode_InvalidMap - {_ } / invalid key - slice len 1 greater than remaining buf len", + "input": "bf7801", + "error": true + }, + { + "description": "TestDecode_InvalidMap - {_ } / invalid value - arg len 1 greater than remaining buf len", + "input": "bf63666f6f18", + "error": true + }, + { + "description": "TestDecode_InvalidMap - {} / eof after head - unexpected end of payload", + "input": "a1", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - slice/1, not enough bytes - slice len 1 greater than remaining buf len", + "input": "5801", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - slice/?, nested indefinite - nested indefinite slice", + "input": "5f5f", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - string/?, no break - expected break marker", + "input": "7f", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - string/?, nested indefinite - nested indefinite slice", + "input": "7f7f", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - string/?, invalid nested definite - decode subslice: slice len 1 greater than remaining buf len", + "input": "7f7801", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - slice/?, no break - expected break marker", + "input": "5f", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - slice/?, invalid nested major - unexpected major type 3 in indefinite slice", + "input": "5f60", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - slice/?, invalid nested definite - decode subslice: slice len 1 greater than remaining buf len", + "input": "5f5801", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - string/1, not enough bytes - slice len 1 greater than remaining buf len", + "input": "7801", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - string/?, invalid nested major - unexpected major type 2 in indefinite slice", + "input": "7f40", + "error": true + }, + { + "description": "TestDecode_InvalidTag - invalid value - arg len 1 greater than remaining buf len", + "input": "c118", + "error": true + }, + { + "description": "TestDecode_InvalidTag - eof - unexpected end of payload", + "input": "c1", + "error": true + } +] \ No newline at end of file diff --git a/test/resources/decode-success-tests.json b/test/resources/decode-success-tests.json new file mode 100644 index 000000000..86c6d32b3 --- /dev/null +++ b/test/resources/decode-success-tests.json @@ -0,0 +1,1528 @@ +[ + { + "description": "atomic - uint/0/max", + "input": "17", + "expect": { + "uint": 23 + } + }, + { + "description": "atomic - uint/2/min", + "input": "190000", + "expect": { + "uint": 0 + } + }, + { + "description": "atomic - uint/8/min", + "input": "1b0000000000000000", + "expect": { + "uint": 0 + } + }, + { + "description": "atomic - negint/1/min", + "input": "3800", + "expect": { + "negint": -1 + } + }, + { + "description": "atomic - negint/2/min", + "input": "390000", + "expect": { + "negint": -1 + } + }, + { + "description": "atomic - false", + "input": "f4", + "expect": { + "bool": false + } + }, + { + "description": "atomic - uint/1/min", + "input": "1800", + "expect": { + "uint": 0 + } + }, + { + "description": "atomic - negint/8/min", + "input": "3b0000000000000000", + "expect": { + "negint": -1 + } + }, + { + "description": "atomic - float64/+Inf", + "input": "fb7ff0000000000000", + "expect": { + "float64": 9218868437227405312 + } + }, + { + "description": "atomic - uint/4/min", + "input": "1a00000000", + "expect": { + "uint": 0 + } + }, + { + "description": "atomic - null", + "input": "f6", + "expect": { + "null": {} + } + }, + { + "description": "atomic - negint/2/max", + "input": "39ffff", + "expect": { + "negint": -65536 + } + }, + { + "description": "atomic - negint/8/max", + "input": "3bfffffffffffffffe", + "expect": { + "negint": -18446744073709551615 + } + }, + { + "description": "atomic - float32/1.625", + "input": "fa3fd00000", + "expect": { + "float32": 1070596096 + } + }, + { + "description": "atomic - uint/0/min", + "input": "00", + "expect": { + "uint": 0 + } + }, + { + "description": "atomic - uint/1/max", + "input": "18ff", + "expect": { + "uint": 255 + } + }, + { + "description": "atomic - uint/8/max", + "input": "1bffffffffffffffff", + "expect": { + "uint": 18446744073709551615 + } + }, + { + "description": "atomic - negint/1/max", + "input": "38ff", + "expect": { + "negint": -256 + } + }, + { + "description": "atomic - negint/4/min", + "input": "3a00000000", + "expect": { + "negint": -1 + } + }, + { + "description": "atomic - float64/1.625", + "input": "fb3ffa000000000000", + "expect": { + "float64": 4609997168567123968 + } + }, + { + "description": "atomic - uint/2/max", + "input": "19ffff", + "expect": { + "uint": 65535 + } + }, + { + "description": "atomic - negint/0/max", + "input": "37", + "expect": { + "negint": -24 + } + }, + { + "description": "atomic - negint/4/max", + "input": "3affffffff", + "expect": { + "negint": -4294967296 + } + }, + { + "description": "atomic - uint/4/max", + "input": "1affffffff", + "expect": { + "uint": 4294967295 + } + }, + { + "description": "atomic - negint/0/min", + "input": "20", + "expect": { + "negint": -1 + } + }, + { + "description": "atomic - true", + "input": "f5", + "expect": { + "bool": true + } + }, + { + "description": "atomic - float32/+Inf", + "input": "fa7f800000", + "expect": { + "float32": 2139095040 + } + }, + { + "description": "definite slice - len = 0", + "input": "40", + "expect": { + "bytestring": [] + } + }, + { + "description": "definite slice - len \u003e 0", + "input": "43666f6f", + "expect": { + "bytestring": [ + 102, + 111, + 111 + ] + } + }, + { + "description": "definite string - len = 0", + "input": "60", + "expect": { + "string": "" + } + }, + { + "description": "definite string - len \u003e 0", + "input": "63666f6f", + "expect": { + "string": "foo" + } + }, + { + "description": "indefinite slice - len = 0", + "input": "5fff", + "expect": { + "bytestring": [] + } + }, + { + "description": "indefinite slice - len = 0, explicit", + "input": "5f40ff", + "expect": { + "bytestring": [] + } + }, + { + "description": "indefinite slice - len = 0, len \u003e 0", + "input": "5f4043666f6fff", + "expect": { + "bytestring": [ + 102, + 111, + 111 + ] + } + }, + { + "description": "indefinite slice - len \u003e 0, len = 0", + "input": "5f43666f6f40ff", + "expect": { + "bytestring": [ + 102, + 111, + 111 + ] + } + }, + { + "description": "indefinite slice - len \u003e 0, len \u003e 0", + "input": "5f43666f6f43666f6fff", + "expect": { + "bytestring": [ + 102, + 111, + 111, + 102, + 111, + 111 + ] + } + }, + { + "description": "indefinite string - len = 0", + "input": "7fff", + "expect": { + "string": "" + } + }, + { + "description": "indefinite string - len = 0, explicit", + "input": "7f60ff", + "expect": { + "string": "" + } + }, + { + "description": "indefinite string - len = 0, len \u003e 0", + "input": "7f6063666f6fff", + "expect": { + "string": "foo" + } + }, + { + "description": "indefinite string - len \u003e 0, len = 0", + "input": "7f63666f6f60ff", + "expect": { + "string": "foo" + } + }, + { + "description": "indefinite string - len \u003e 0, len \u003e 0", + "input": "7f63666f6f63666f6fff", + "expect": { + "string": "foofoo" + } + }, + { + "description": "list - [float64]", + "input": "81fb7ff0000000000000", + "expect": { + "list": [ + { + "float64": 9218868437227405312 + } + ] + } + }, + { + "description": "list - [_ negint/4/min]", + "input": "9f3a00000000ff", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [uint/1/min]", + "input": "811800", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [_ uint/4/min]", + "input": "9f1a00000000ff", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [uint/0/max]", + "input": "8117", + "expect": { + "list": [ + { + "uint": 23 + } + ] + } + }, + { + "description": "list - [uint/1/max]", + "input": "8118ff", + "expect": { + "list": [ + { + "uint": 255 + } + ] + } + }, + { + "description": "list - [negint/2/min]", + "input": "81390000", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [negint/8/min]", + "input": "813b0000000000000000", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [_ uint/2/min]", + "input": "9f190000ff", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [uint/0/min]", + "input": "8100", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [negint/0/min]", + "input": "8120", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [negint/0/max]", + "input": "8137", + "expect": { + "list": [ + { + "negint": -24 + } + ] + } + }, + { + "description": "list - [negint/1/min]", + "input": "813800", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [negint/1/max]", + "input": "8138ff", + "expect": { + "list": [ + { + "negint": -256 + } + ] + } + }, + { + "description": "list - [negint/4/max]", + "input": "813affffffff", + "expect": { + "list": [ + { + "negint": -4294967296 + } + ] + } + }, + { + "description": "list - [_ uint/4/max]", + "input": "9f1affffffffff", + "expect": { + "list": [ + { + "uint": 4294967295 + } + ] + } + }, + { + "description": "list - [_ negint/0/max]", + "input": "9f37ff", + "expect": { + "list": [ + { + "negint": -24 + } + ] + } + }, + { + "description": "list - [uint/2/min]", + "input": "81190000", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [_ false]", + "input": "9ff4ff", + "expect": { + "list": [ + { + "bool": false + } + ] + } + }, + { + "description": "list - [_ float32]", + "input": "9ffa7f800000ff", + "expect": { + "list": [ + { + "float32": 2139095040 + } + ] + } + }, + { + "description": "list - [_ negint/1/max]", + "input": "9f38ffff", + "expect": { + "list": [ + { + "negint": -256 + } + ] + } + }, + { + "description": "list - [uint/8/max]", + "input": "811bffffffffffffffff", + "expect": { + "list": [ + { + "uint": 18446744073709551615 + } + ] + } + }, + { + "description": "list - [negint/4/min]", + "input": "813a00000000", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [negint/8/max]", + "input": "813bfffffffffffffffe", + "expect": { + "list": [ + { + "negint": -18446744073709551615 + } + ] + } + }, + { + "description": "list - [_ negint/2/min]", + "input": "9f390000ff", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [_ negint/4/max]", + "input": "9f3affffffffff", + "expect": { + "list": [ + { + "negint": -4294967296 + } + ] + } + }, + { + "description": "list - [_ true]", + "input": "9ff5ff", + "expect": { + "list": [ + { + "bool": true + } + ] + } + }, + { + "description": "list - [_ null]", + "input": "9ff6ff", + "expect": { + "list": [ + { + "null": {} + } + ] + } + }, + { + "description": "list - [uint/8/min]", + "input": "811b0000000000000000", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [null]", + "input": "81f6", + "expect": { + "list": [ + { + "null": {} + } + ] + } + }, + { + "description": "list - [_ uint/1/min]", + "input": "9f1800ff", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [_ uint/1/max]", + "input": "9f18ffff", + "expect": { + "list": [ + { + "uint": 255 + } + ] + } + }, + { + "description": "list - [_ uint/2/max]", + "input": "9f19ffffff", + "expect": { + "list": [ + { + "uint": 65535 + } + ] + } + }, + { + "description": "list - [_ uint/8/min]", + "input": "9f1b0000000000000000ff", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [_ negint/8/min]", + "input": "9f3b0000000000000000ff", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [_ float64]", + "input": "9ffb7ff0000000000000ff", + "expect": { + "list": [ + { + "float64": 9218868437227405312 + } + ] + } + }, + { + "description": "list - [uint/4/min]", + "input": "811a00000000", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [true]", + "input": "81f5", + "expect": { + "list": [ + { + "bool": true + } + ] + } + }, + { + "description": "list - [float32]", + "input": "81fa7f800000", + "expect": { + "list": [ + { + "float32": 2139095040 + } + ] + } + }, + { + "description": "list - [_ uint/0/min]", + "input": "9f00ff", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [_ uint/0/max]", + "input": "9f17ff", + "expect": { + "list": [ + { + "uint": 23 + } + ] + } + }, + { + "description": "list - [_ uint/8/max]", + "input": "9f1bffffffffffffffffff", + "expect": { + "list": [ + { + "uint": 18446744073709551615 + } + ] + } + }, + { + "description": "list - [_ negint/1/min]", + "input": "9f3800ff", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [_ negint/2/max]", + "input": "9f39ffffff", + "expect": { + "list": [ + { + "negint": -65536 + } + ] + } + }, + { + "description": "list - [uint/2/max]", + "input": "8119ffff", + "expect": { + "list": [ + { + "uint": 65535 + } + ] + } + }, + { + "description": "list - [negint/2/max]", + "input": "8139ffff", + "expect": { + "list": [ + { + "negint": -65536 + } + ] + } + }, + { + "description": "list - [false]", + "input": "81f4", + "expect": { + "list": [ + { + "bool": false + } + ] + } + }, + { + "description": "list - [_ negint/0/min]", + "input": "9f20ff", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [_ negint/8/max]", + "input": "9f3bfffffffffffffffeff", + "expect": { + "list": [ + { + "negint": -18446744073709551615 + } + ] + } + }, + { + "description": "list - [uint/4/max]", + "input": "811affffffff", + "expect": { + "list": [ + { + "uint": 4294967295 + } + ] + } + }, + { + "description": "map - {uint/0/min}", + "input": "a163666f6f00", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {uint/4/max}", + "input": "a163666f6f1affffffff", + "expect": { + "map": { + "foo": { + "uint": 4294967295 + } + } + } + }, + { + "description": "map - {negint/0/min}", + "input": "a163666f6f20", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ float32}", + "input": "bf63666f6ffa7f800000ff", + "expect": { + "map": { + "foo": { + "float32": 2139095040 + } + } + } + }, + { + "description": "map - {false}", + "input": "a163666f6ff4", + "expect": { + "map": { + "foo": { + "bool": false + } + } + } + }, + { + "description": "map - {float32}", + "input": "a163666f6ffa7f800000", + "expect": { + "map": { + "foo": { + "float32": 2139095040 + } + } + } + }, + { + "description": "map - {_ uint/0/max}", + "input": "bf63666f6f17ff", + "expect": { + "map": { + "foo": { + "uint": 23 + } + } + } + }, + { + "description": "map - {_ negint/2/min}", + "input": "bf63666f6f390000ff", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ false}", + "input": "bf63666f6ff4ff", + "expect": { + "map": { + "foo": { + "bool": false + } + } + } + }, + { + "description": "map - {uint/8/min}", + "input": "a163666f6f1b0000000000000000", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {_ negint/0/max}", + "input": "bf63666f6f37ff", + "expect": { + "map": { + "foo": { + "negint": -24 + } + } + } + }, + { + "description": "map - {_ null}", + "input": "bf63666f6ff6ff", + "expect": { + "map": { + "foo": { + "null": {} + } + } + } + }, + { + "description": "map - {uint/1/min}", + "input": "a163666f6f1800", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {_ uint/1/min}", + "input": "bf63666f6f1800ff", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {_ uint/8/max}", + "input": "bf63666f6f1bffffffffffffffffff", + "expect": { + "map": { + "foo": { + "uint": 18446744073709551615 + } + } + } + }, + { + "description": "map - {_ negint/0/min}", + "input": "bf63666f6f20ff", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ negint/1/min}", + "input": "bf63666f6f3800ff", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ negint/1/max}", + "input": "bf63666f6f38ffff", + "expect": { + "map": { + "foo": { + "negint": -256 + } + } + } + }, + { + "description": "map - {_ negint/2/max}", + "input": "bf63666f6f39ffffff", + "expect": { + "map": { + "foo": { + "negint": -65536 + } + } + } + }, + { + "description": "map - {_ negint/4/min}", + "input": "bf63666f6f3a00000000ff", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ true}", + "input": "bf63666f6ff5ff", + "expect": { + "map": { + "foo": { + "bool": true + } + } + } + }, + { + "description": "map - {uint/2/max}", + "input": "a163666f6f19ffff", + "expect": { + "map": { + "foo": { + "uint": 65535 + } + } + } + }, + { + "description": "map - {uint/8/max}", + "input": "a163666f6f1bffffffffffffffff", + "expect": { + "map": { + "foo": { + "uint": 18446744073709551615 + } + } + } + }, + { + "description": "map - {negint/0/max}", + "input": "a163666f6f37", + "expect": { + "map": { + "foo": { + "negint": -24 + } + } + } + }, + { + "description": "map - {negint/1/max}", + "input": "a163666f6f38ff", + "expect": { + "map": { + "foo": { + "negint": -256 + } + } + } + }, + { + "description": "map - {negint/2/max}", + "input": "a163666f6f39ffff", + "expect": { + "map": { + "foo": { + "negint": -65536 + } + } + } + }, + { + "description": "map - {negint/4/min}", + "input": "a163666f6f3a00000000", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {negint/8/max}", + "input": "a163666f6f3bfffffffffffffffe", + "expect": { + "map": { + "foo": { + "negint": -18446744073709551615 + } + } + } + }, + { + "description": "map - {float64}", + "input": "a163666f6ffb7ff0000000000000", + "expect": { + "map": { + "foo": { + "float64": 9218868437227405312 + } + } + } + }, + { + "description": "map - {_ uint/0/min}", + "input": "bf63666f6f00ff", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {_ uint/4/min}", + "input": "bf63666f6f1a00000000ff", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {_ uint/8/min}", + "input": "bf63666f6f1b0000000000000000ff", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {uint/1/max}", + "input": "a163666f6f18ff", + "expect": { + "map": { + "foo": { + "uint": 255 + } + } + } + }, + { + "description": "map - {negint/2/min}", + "input": "a163666f6f390000", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {negint/8/min}", + "input": "a163666f6f3b0000000000000000", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {true}", + "input": "a163666f6ff5", + "expect": { + "map": { + "foo": { + "bool": true + } + } + } + }, + { + "description": "map - {_ uint/2/min}", + "input": "bf63666f6f190000ff", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {_ negint/8/min}", + "input": "bf63666f6f3b0000000000000000ff", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ negint/8/max}", + "input": "bf63666f6f3bfffffffffffffffeff", + "expect": { + "map": { + "foo": { + "negint": -18446744073709551615 + } + } + } + }, + { + "description": "map - {uint/0/max}", + "input": "a163666f6f17", + "expect": { + "map": { + "foo": { + "uint": 23 + } + } + } + }, + { + "description": "map - {negint/4/max}", + "input": "a163666f6f3affffffff", + "expect": { + "map": { + "foo": { + "negint": -4294967296 + } + } + } + }, + { + "description": "map - {null}", + "input": "a163666f6ff6", + "expect": { + "map": { + "foo": { + "null": {} + } + } + } + }, + { + "description": "map - {_ uint/4/max}", + "input": "bf63666f6f1affffffffff", + "expect": { + "map": { + "foo": { + "uint": 4294967295 + } + } + } + }, + { + "description": "map - {_ float64}", + "input": "bf63666f6ffb7ff0000000000000ff", + "expect": { + "map": { + "foo": { + "float64": 9218868437227405312 + } + } + } + }, + { + "description": "map - {uint/2/min}", + "input": "a163666f6f190000", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {uint/4/min}", + "input": "a163666f6f1a00000000", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {negint/1/min}", + "input": "a163666f6f3800", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ uint/1/max}", + "input": "bf63666f6f18ffff", + "expect": { + "map": { + "foo": { + "uint": 255 + } + } + } + }, + { + "description": "map - {_ uint/2/max}", + "input": "bf63666f6f19ffffff", + "expect": { + "map": { + "foo": { + "uint": 65535 + } + } + } + }, + { + "description": "map - {_ negint/4/max}", + "input": "bf63666f6f3affffffffff", + "expect": { + "map": { + "foo": { + "negint": -4294967296 + } + } + } + }, + { + "description": "tag - 0/min", + "input": "c074323030332D31322D31335431383A33303A30325A", + "expect": { + "tag": { + "id": 0, + "value": { + "string": "2003-12-13T18:30:02Z" + } + } + } + }, + { + "description": "tag - 1/min", + "input": "d80074323030332D31322D31335431383A33303A30325A", + "expect": { + "tag": { + "id": 0, + "value": { + "string": "2003-12-13T18:30:02Z" + } + } + } + }, + { + "description": "tag - 1/max", + "input": "d8ff01", + "expect": { + "tag": { + "id": 255, + "value": { + "uint": 1 + } + } + } + }, + { + "description": "tag - 4/min", + "input": "da0000000074323030332D31322D31335431383A33303A30325A", + "expect": { + "tag": { + "id": 0, + "value": { + "string": "2003-12-13T18:30:02Z" + } + } + } + }, + { + "description": "tag - 8/min", + "input": "db000000000000000074323030332D31322D31335431383A33303A30325A", + "expect": { + "tag": { + "id": 0, + "value": { + "string": "2003-12-13T18:30:02Z" + } + } + } + }, + { + "description": "tag - 0/max", + "input": "d701", + "expect": { + "tag": { + "id": 23, + "value": { + "uint": 1 + } + } + } + }, + { + "description": "tag - 2/min", + "input": "d9000074323030332D31322D31335431383A33303A30325A", + "expect": { + "tag": { + "id": 0, + "value": { + "string": "2003-12-13T18:30:02Z" + } + } + } + }, + { + "description": "tag - 2/max", + "input": "d9fffe01", + "expect": { + "tag": { + "id": 65534, + "value": { + "uint": 1 + } + } + } + }, + { + "description": "tag - 4/max", + "input": "dafffffffe01", + "expect": { + "tag": { + "id": 4294967294, + "value": { + "uint": 1 + } + } + } + }, + { + "description": "tag - 8/max", + "input": "dbfffffffffffffffe01", + "expect": { + "tag": { + "id": 18446744073709551614, + "value": { + "uint": 1 + } + } + } + } +] \ No newline at end of file diff --git a/test/test_cbor.py b/test/test_cbor.py new file mode 100644 index 000000000..3ab65b569 --- /dev/null +++ b/test/test_cbor.py @@ -0,0 +1,263 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0. + +from test import NativeResourceTest +from awscrt.cbor import * +import json +import struct +import os +import datetime + + +class TestCBOR(NativeResourceTest): + def test_cbor_encode_decode_int(self): + encoder = AwsCborEncoder() + try: + # pass float instead of int + encoder.write_int(1.1) + except TypeError as e: + self.assertIsNotNone(e) + else: + self.assertTrue(False) + + val_to_write = [-100, 100, 2**64 - 1, -2**64] + for val in val_to_write: + encoder.write_int(val) + + decoder = AwsCborDecoder(encoder.get_encoded_data()) + try: + # The first val is a negative val. + val = decoder.pop_next_unsigned_int() + except RuntimeError as e: + self.assertIsNotNone(e) + else: + self.assertTrue(False) + + for val in val_to_write: + t = decoder.pop_next_data_item() + self.assertEqual(t, val) + + self.assertEqual(decoder.get_remaining_bytes_len(), 0) + + def test_cbor_encode_decode_data_item(self): + encoder = AwsCborEncoder() + numerics = [-100.12, 100.0, -100, 100, 2**64 - 1, -2**64, 18446744073709551616.0] + another_map = { + # "bignum": 2**65, # TODO: big number are not supported from C impl yet. + # "negative bignum": -2**75, + 2**6: [1, 2, 3], + -2**6: [1, ["2", b"3"], {"most complicated": numerics}, 2**6, -2**7] + } + val_to_write = { + "mytest": b"write_test", + b"test_more": another_map, + 2: { + 2.3: ["a", "b", "c"] + }, + "empty map": {}, + "empty array": [], + "True": True, + "False": False, + "empty str": "", + "empty bytes": b"", + } + encoder.write_data_item(val_to_write) + + decoder = AwsCborDecoder(encoder.get_encoded_data()) + + # Temp val only for easier to debug. + t = decoder.pop_next_data_item() + self.assertEqual(val_to_write, t) + + def test_cbor_encode_decode_indef(self): + encoder = AwsCborEncoder() + numerics = [-100.12, 100.0, -100, 100, 2**64 - 1, -2**64, 18446744073709551616.0] + another_map = { + 2**6: [1, 2, 3], + -2**6: [1, ["2", b"3"], {"most complicated": numerics}, 2**6, -2**7] + } + encoder.write_indef_array_start() + for i in numerics: + encoder.write_data_item(i) + encoder.write_break() + + encoder.write_indef_map_start() + for key in another_map: + encoder.write_data_item(key) + encoder.write_data_item(another_map[key]) + encoder.write_break() + + text1 = "test" + text2 = "text" + encoder.write_indef_text_start() + encoder.write_text(text1) + encoder.write_text(text2) + encoder.write_break() + + bytes1 = b"test" + bytes2 = b"bytes" + encoder.write_indef_bytes_start() + encoder.write_bytes(bytes1) + encoder.write_bytes(bytes2) + encoder.write_break() + + decoder = AwsCborDecoder(encoder.get_encoded_data()) + # Temp val only for easier to debug. + t = decoder.pop_next_data_item() + self.assertEqual(numerics, t) + t = decoder.pop_next_data_item() + self.assertEqual(another_map, t) + t = decoder.pop_next_data_item() + self.assertEqual(text1 + text2, t) + t = decoder.pop_next_data_item() + self.assertEqual(bytes1 + bytes2, t) + self.assertEqual(0, decoder.get_remaining_bytes_len()) + + def test_cbor_encode_decode_epoch_time(self): + time_stamp_secs = 100.1 # some random time + + encoder = AwsCborEncoder() + encoder.write_tag(1) # tag time + encoder.write_float(time_stamp_secs) + + def on_epoch_time(epoch_secs): + return datetime.datetime.fromtimestamp(epoch_secs) + + # without the handler for epoch time, it just return the numeric. + decoder = AwsCborDecoder(encoder.get_encoded_data()) + t = decoder.pop_next_data_item() + self.assertEqual(time_stamp_secs, t) + + # add handler + decoder = AwsCborDecoder(encoder.get_encoded_data(), on_epoch_time) + t = decoder.pop_next_data_item() + self.assertEqual(datetime.datetime.fromtimestamp(time_stamp_secs), t) + + def test_cbor_encode_decode_unexpected_tag(self): + time_stamp_secs = 100.1 # some random time + + encoder = AwsCborEncoder() + encoder.write_tag(0) # tag time + encoder.write_float(time_stamp_secs) + + def on_epoch_time(epoch_secs): + return datetime.datetime.fromtimestamp(epoch_secs) + # add handler + decoder = AwsCborDecoder(encoder.get_encoded_data(), on_epoch_time) + exception = None + try: + t = decoder.pop_next_data_item() + except Exception as e: + exception = e + self.assertIsNotNone(exception) + + def _ieee754_bits_to_float(self, bits): + return struct.unpack('>f', struct.pack('>I', bits))[0] + + def _ieee754_bits_to_double(self, bits): + return struct.unpack('>d', struct.pack('>Q', bits))[0] + + def _convert_expect(self, expect): + if isinstance(expect, dict): + if 'uint' in expect: + return expect['uint'] + elif 'negint' in expect: + return expect['negint'] + elif 'bool' in expect: + return expect['bool'] + elif 'float32' in expect: + return self._ieee754_bits_to_float(expect['float32']) + elif 'float64' in expect: + return self._ieee754_bits_to_double(expect['float64']) + elif 'null' in expect: + return None + elif 'bytestring' in expect: + return bytes(expect['bytestring']) + elif 'string' in expect: + return expect['string'] + elif 'list' in expect: + return [self._convert_expect(item) for item in expect['list']] + elif 'map' in expect: + return {k: self._convert_expect(v) for k, v in expect['map'].items()} + return expect + + def test_cbor_decode_success(self): + """Test CBOR decoding using test cases from JSON file""" + current_dir = os.path.dirname(os.path.abspath(__file__)) + test_file = os.path.join(current_dir, 'resources', 'decode-success-tests.json') + with open(test_file, 'r') as f: + test_cases = json.load(f) + + for case in test_cases: + description = case.get("description", "No description") + input_hex = case.get("input") + expected = self._convert_expect(case.get("expect")) + + with self.subTest(description=description): + # Convert hex input to bytes + try: + bytes_input = bytes.fromhex(input_hex) + except ValueError as e: + self.fail(f"Failed to convert hex input: {e}") + + # Decode the CBOR data + try: + decoder = AwsCborDecoder(bytes_input) + type = decoder.peek_next_type() + if type == AwsCborType.Tag: + # TODO: we don't support parse the tag to python type yet. + # hard code the tag cases to the expected format. + tag_id = decoder.pop_next_tag_val() + if tag_id == 0: + tag_value_type = "string" + else: + tag_value_type = "uint" + tag_data = decoder.pop_next_data_item() + decoded_data = { + "tag": { + "id": tag_id, + "value": { + tag_value_type: tag_data + } + } + } + else: + decoded_data = decoder.pop_next_data_item() + + self.assertEqual( + decoded_data, + expected, + f"Failed case '{description}'\nDecoded: {decoded_data}\nExpected: {expected}" + ) + except Exception as e: + self.fail(f"Failed to decode CBOR data: {e}") + + def test_cbor_decode_errors(self): + """Test CBOR decoding error cases from JSON file""" + current_dir = os.path.dirname(os.path.abspath(__file__)) + test_file = os.path.join(current_dir, 'resources', 'decode-error-tests.json') + + with open(test_file, 'r') as f: + test_cases = json.load(f) + + for case in test_cases: + description = case.get("description", "No description") + input_hex = case.get("input") + + with self.subTest(description=description): + # Convert hex input to bytes + try: + bytes_input = bytes.fromhex(input_hex) + except ValueError as e: + self.fail(f"Failed to convert hex input: {e}") + + # Decode the CBOR data - should raise an exception + decoder = AwsCborDecoder(bytes_input) + + with self.assertRaises(RuntimeError): + type = decoder.peek_next_type() + if type == AwsCborType.Tag: + tag_id = decoder.pop_next_tag_val() + tag_data = decoder.pop_next_data_item() + else: + decoded_data = decoder.pop_next_data_item()