From 5c5694dc87e2a752809ee40bac6604fd3a8a275e Mon Sep 17 00:00:00 2001 From: rimio Date: Mon, 26 Feb 2018 00:26:03 +0200 Subject: Partial decoder (WIP) --- CMakeLists.txt | 43 +++++ LICENSE | 2 +- bin/.gitignore | 2 + include/ecbor.h | 179 +++++++++++++++++++ lib/.gitignore | 2 + src/ecbor.c | 8 + src/ecbor_decoder.c | 489 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/ecbor_encoder.c | 8 + src/ecbor_internal.h | 26 +++ 9 files changed, 758 insertions(+), 1 deletion(-) create mode 100644 CMakeLists.txt create mode 100644 bin/.gitignore create mode 100644 include/ecbor.h create mode 100644 lib/.gitignore create mode 100644 src/ecbor.c create mode 100644 src/ecbor_decoder.c create mode 100644 src/ecbor_encoder.c create mode 100644 src/ecbor_internal.h diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..957588a --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,43 @@ +cmake_minimum_required (VERSION 3.1) + +project (ecbor LANGUAGES C) + +# Compiler setup +set (CMAKE_C_STANDARD 99) + +add_compile_options (-Wall -Wextra -pedantic -Wmissing-prototypes -Wstrict-prototypes -Wold-style-definition) + +# Output directory setup +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/lib") +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/lib") +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/bin") + +# Directory setup +set (SRC_DIR "${PROJECT_SOURCE_DIR}/src") +set (INCLUDE_DIR "${PROJECT_SOURCE_DIR}/include") + +include_directories (${INCLUDE_DIR} ${SOURCE_DIR}) + +# Source setup +set (LIB_INCLUDES + "${PROJECT_SOURCE_DIR}/include/ecbor.h" +) + +set (LIB_SOURCES + "${SRC_DIR}/ecbor.c" + "${SRC_DIR}/ecbor_encoder.c" + "${SRC_DIR}/ecbor_decoder.c" +) + +set (TOOL_SOURCES + # TODO +) + +# Targets +add_library (${PROJECT_NAME}_shared SHARED ${LIB_SOURCES}) +add_library (${PROJECT_NAME}_static STATIC ${LIB_SOURCES}) + +set_target_properties (${PROJECT_NAME}_shared PROPERTIES OUTPUT_NAME ${PROJECT_NAME}) +set_target_properties (${PROJECT_NAME}_static PROPERTIES OUTPUT_NAME ${PROJECT_NAME}) + +target_compile_options (${PROJECT_NAME}_static PRIVATE -nostdlib) \ No newline at end of file diff --git a/LICENSE b/LICENSE index e25529a..0f2ba03 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018 Vasile Vilvoiu +Copyright (c) 2018 Vasile Vilvoiu Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/bin/.gitignore b/bin/.gitignore new file mode 100644 index 0000000..c96a04f --- /dev/null +++ b/bin/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file diff --git a/include/ecbor.h b/include/ecbor.h new file mode 100644 index 0000000..a4aed39 --- /dev/null +++ b/include/ecbor.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2018 Vasile Vilvoiu + * + * libecbor is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef _ECBOR_H_ +#define _ECBOR_H_ + +#include + +/* + * Error codes + */ +typedef enum { + /* no error */ + ECBOR_OK = 0, + ECBOR_ERR_UNKNOWN = 1, /* shouldn't happen, hopefully */ + + /* internal (i.e. misuse) errors */ + ECBOR_ERR_NULL_CONTEXT = 10, + ECBOR_ERR_NULL_INPUT_BUFFER = 11, + ECBOR_ERR_NULL_OUTPUT_BUFFER = 12, + ECBOR_ERR_NULL_ITEM_BUFFER = 13, + + ECBOR_ERR_NULL_ITEM = 20, + + ECBOR_ERR_WRONG_MODE = 30, + + /* bounds errors */ + ECBOR_ERR_INVALID_END_OF_BUFFER = 50, + ECBOR_ERR_END_OF_NODE_BUFFER = 51, + + /* syntax errors */ + ECBOR_ERR_CURRENTLY_NOT_SUPPORTED = 100, + ECBOR_ERR_INVALID_ADDITIONAL = 101, + ECBOR_ERR_INVALID_CHUNK_MAJOR_TYPE = 102, + ECBOR_ERR_NESTET_INDEFINITE_STRING = 103, + ECBOR_ERR_INVALID_KEY_VALUE_PAIR = 104, + ECBOR_ERR_INVALID_STOP_CODE = 105, + + /* control codes */ + ECBOR_END_OF_BUFFER = 200, + ECBOR_END_OF_INDEFINITE = 201, +} ecbor_error_t; + +/* + * Implementation limits + */ + +/* + * CBOR major types + */ +typedef enum { + ECBOR_MT_UNDEFINED = -1, + ECBOR_MT_UINT = 0, + ECBOR_MT_NINT = 1, + ECBOR_MT_BSTR = 2, + ECBOR_MT_STR = 3, + ECBOR_MT_ARRAY = 4, + ECBOR_MT_MAP = 5, + ECBOR_MT_TAG = 6, + ECBOR_MT_SPECIAL = 7, +} ecbor_major_type_t; + +/* + * CBOR Item + */ +typedef struct { + /* major type of item */ + ecbor_major_type_t major_type; + + /* value */ + union { + uint64_t uinteger; + int64_t integer; + const uint8_t *string; + const uint8_t *items; + } value; + + /* storage size of value, in bytes */ + uint64_t size; + + /* number of chunks in value, in case it is indefinite */ + uint64_t n_chunks; + + /* non-zero if size is indefinite (for strings, maps and arrays) */ + uint8_t is_indefinite; +} ecbor_item_t; + +/* + * CBOR tree node + */ +typedef struct ecbor_node ecbor_node_t; +struct ecbor_node { + ecbor_item_t item; + ecbor_node_t *parent; + ecbor_node_t *next; /* next in array or map */ + uint64_t index; /* index in array or map */ +}; + +/* + * Mode + */ +typedef enum { + ECBOR_MODE_DECODE = 0, + ECBOR_MODE_DECODE_STREAMED = 1, + ECBOR_MODE_DECODE_TREE = 2, + ECBOR_MODE_ENCODE = 3 +} ecbor_mode_t; + +/* + * CBOR parsing context + */ +typedef struct { + /* mode of operation for this context */ + ecbor_mode_t mode; + + /* output buffer position */ + uint8_t *out_position; + + /* remaining bytes */ + unsigned int bytes_left; +} ecbor_encode_context_t; + +typedef struct { + /* mode of operation for this context */ + ecbor_mode_t mode; + + /* input buffer position */ + const uint8_t *in_position; + + /* remaining bytes */ + uint64_t bytes_left; + + /* node buffer */ + ecbor_node_t *nodes; + + /* capacity of item buffer (in items) */ + unsigned int node_capacity; + + /* number of used items so far */ + unsigned int n_nodes; +} ecbor_decode_context_t; + + +/* + * Initialization routines + */ +extern ecbor_error_t +ecbor_initialize_decode (ecbor_decode_context_t *context, + const uint8_t *buffer, + uint64_t buffer_size); + +extern ecbor_error_t +ecbor_initialize_decode_streamed (ecbor_decode_context_t *context, + const uint8_t *buffer, + uint64_t buffer_size); + +extern ecbor_error_t +ecbor_initialize_decode_tree (ecbor_decode_context_t *context, + const uint8_t *buffer, + uint64_t buffer_size, + ecbor_node_t *node_buffer, + uint64_t node_capacity); + + +/* + * Decoding routines + */ +extern ecbor_error_t +ecbor_decode (ecbor_decode_context_t *context, ecbor_item_t *item); + +/* + * Encoding routines + */ + +#endif \ No newline at end of file diff --git a/lib/.gitignore b/lib/.gitignore new file mode 100644 index 0000000..c96a04f --- /dev/null +++ b/lib/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file diff --git a/src/ecbor.c b/src/ecbor.c new file mode 100644 index 0000000..11984a6 --- /dev/null +++ b/src/ecbor.c @@ -0,0 +1,8 @@ +/* + * Copyright (c) 2018 Vasile Vilvoiu + * + * libecbor is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include "ecbor.h" diff --git a/src/ecbor_decoder.c b/src/ecbor_decoder.c new file mode 100644 index 0000000..ccc8196 --- /dev/null +++ b/src/ecbor_decoder.c @@ -0,0 +1,489 @@ +/* + * Copyright (c) 2018 Vasile Vilvoiu + * + * libecbor is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include "ecbor.h" +#include "ecbor_internal.h" + +static ecbor_error_t +ecbor_initialize_decode_internal (ecbor_decode_context_t *context, + const uint8_t *buffer, + uint64_t buffer_size) +{ + if (!context) { + return ECBOR_ERR_NULL_CONTEXT; + } + if (!buffer) { + return ECBOR_ERR_NULL_INPUT_BUFFER; + } + + context->in_position = buffer; + context->bytes_left = buffer_size; + + return ECBOR_OK; +} + +ecbor_error_t +ecbor_initialize_decode (ecbor_decode_context_t *context, + const uint8_t *buffer, + uint64_t buffer_size) +{ + ecbor_error_t rc = + ecbor_initialize_decode_internal (context, buffer, buffer_size); + + if (rc != ECBOR_OK) { + return rc; + } + + context->mode = ECBOR_MODE_DECODE; + context->nodes = NULL; + context->node_capacity = 0; + context->n_nodes = 0; + + return ECBOR_OK; +} + +ecbor_error_t +ecbor_initialize_decode_streamed (ecbor_decode_context_t *context, + const uint8_t *buffer, + uint64_t buffer_size) +{ + ecbor_error_t rc = + ecbor_initialize_decode_internal (context, buffer, buffer_size); + + if (rc != ECBOR_OK) { + return rc; + } + + context->mode = ECBOR_MODE_DECODE_STREAMED; + context->nodes = NULL; + context->node_capacity = 0; + context->n_nodes = 0; + + return ECBOR_OK; +} + +ecbor_error_t +ecbor_initialize_decode_tree (ecbor_decode_context_t *context, + const uint8_t *buffer, + uint64_t buffer_size, + ecbor_node_t *node_buffer, + uint64_t node_capacity) +{ + ecbor_error_t rc = + ecbor_initialize_decode_internal (context, buffer, buffer_size); + + if (rc != ECBOR_OK) { + return rc; + } + + if (!node_buffer) { + return ECBOR_ERR_NULL_ITEM_BUFFER; + } + + context->mode = ECBOR_MODE_DECODE_TREE; + context->nodes = node_buffer; + context->node_capacity = node_capacity; + context->n_nodes = 0; + + return ECBOR_OK; +} + +static inline ecbor_error_t +ecbor_decode_uint (ecbor_decode_context_t *context, + uint64_t *value, + uint64_t *size, + uint8_t additional) +{ + if (additional < 24) { + /* value stored in additional information */ + (*value) = additional; + (*size) = 1; + } else { + /* compute storage size */ + (*size) = (0x1 << (additional - ECBOR_ADDITIONAL_1BYTE)); + if (context->bytes_left < (*size)) { + return ECBOR_ERR_INVALID_END_OF_BUFFER; + } + + /* read actual value */ + switch (additional) { + case ECBOR_ADDITIONAL_1BYTE: + (*value) = *((uint8_t *) context->in_position); + break; + + case ECBOR_ADDITIONAL_2BYTE: + (*value) = *((uint16_t *) context->in_position); + break; + + case ECBOR_ADDITIONAL_4BYTE: + (*value) = *((uint32_t *) context->in_position); + break; + + case ECBOR_ADDITIONAL_8BYTE: + (*value) = *((uint64_t *) context->in_position); + break; + + default: + return ECBOR_ERR_INVALID_ADDITIONAL; + } + + /* advance buffer */ + context->in_position += (*size); + context->bytes_left -= (*size); + } + + return ECBOR_OK; +} + +static ecbor_error_t +ecbor_decode_next_internal (ecbor_decode_context_t *context, + ecbor_item_t *item, + int8_t is_chunk, + ecbor_major_type_t chunk_mtype) +{ + unsigned int additional; + + if (context->bytes_left == 0) { + return ECBOR_END_OF_BUFFER; + } + if (context->mode != ECBOR_MODE_DECODE + && context->mode != ECBOR_MODE_DECODE_STREAMED) { + /* only allow known modes of operation; junk in will generate + undefined behaviour */ + return ECBOR_ERR_WRONG_MODE; + } + + /* clear item, just so we do not leave garbage on partial read */ + item->major_type = ECBOR_MT_UNDEFINED; + item->size = 0; + item->n_chunks = 0; + item->is_indefinite = false; + + /* extract major type (most significant three bits) and additional info */ + item->major_type = (*context->in_position >> 5) & 0x07; + additional = (*context->in_position & 0x1f); + context->in_position ++; context->bytes_left --; + + /* check mandatory major type (in case we are reading string chunks); + we do not want to continue parsing a malformed indefinite string and + potentially explode the stack with subsequent calls */ + if (is_chunk && chunk_mtype != item->major_type) { + if (item->major_type == ECBOR_MT_SPECIAL + && additional == ECBOR_ADDITIONAL_INDEFINITE) { + /* this is a valid stop code, pass it directly; note that this branch is + only taken when inside an indefinite string */ + return ECBOR_END_OF_INDEFINITE; + } else { + /* this is not a stop code, and the item has the wrong major type */ + return ECBOR_ERR_INVALID_CHUNK_MAJOR_TYPE; + } + } + + /* handle type */ + switch (item->major_type) { + + /* + * Integer types + */ + case ECBOR_MT_UINT: + return ecbor_decode_uint (context, &item->value.uinteger, &item->size, + additional); + + case ECBOR_MT_NINT: + { + /* read negative value as unsigned */ + ecbor_error_t rc = ecbor_decode_uint (context, &item->value.uinteger, + &item->size, additional); + if (rc != ECBOR_OK) { + return rc; + } + + /* parse as negative */ + item->value.integer = (-1) - item->value.uinteger; + } + break; + + /* + * String types + */ + case ECBOR_MT_BSTR: + case ECBOR_MT_STR: + /* keep buffer pointer from current pointer */ + item->value.string = context->in_position; + + /* discriminate between definite and indefinite strings; we do not treat + * indefinite strings as we do indefinite maps and arrays, in the sense + * that we do not allow the user to manually walk each chunk + */ + if (additional == ECBOR_ADDITIONAL_INDEFINITE) { + ecbor_item_t chunk; + ecbor_error_t rc; + + /* mark accordingly */ + item->is_indefinite = true; + + /* do not allow nested indefinite length strings */ + if (is_chunk) { + return ECBOR_ERR_NESTET_INDEFINITE_STRING; + } + + /* indefinite lenght string; read through all blocks to compute size */ + item->value.string = context->in_position; + + while (true) { + /* read next chunk */ + rc = ecbor_decode_next_internal (context, &chunk, true, + item->major_type); + if (rc != ECBOR_OK) { + if (rc == ECBOR_END_OF_INDEFINITE) { + /* stop code found, break from loop */ + break; + } else if (rc == ECBOR_END_OF_BUFFER) { + /* treat a valid end of buffer as invalid since we did not yet + find the stop code */ + return ECBOR_ERR_INVALID_END_OF_BUFFER; + } else if (rc != ECBOR_OK) { + /* other error */ + return rc; + } + } + + /* add chunk size to item size */ + item->size += chunk.size; + item->n_chunks ++; + } + } else { + uint64_t foo; + + /* read size of buffer */ + ecbor_error_t rc = ecbor_decode_uint (context, &item->size, &foo, + additional); + if (rc != ECBOR_OK) { + return rc; + } + + /* advance */ + if (context->bytes_left < item->size) { + return ECBOR_ERR_INVALID_END_OF_BUFFER; + } + context->in_position += item->size; + context->bytes_left -= item->size; + } + break; + + /* + * Arrays and maps + */ + case ECBOR_MT_ARRAY: + case ECBOR_MT_MAP: + /* keep buffer pointer from current pointer */ + item->value.items = context->in_position; + + /* discriminate between definite and indefinite maps and arrays */ + if (additional == ECBOR_ADDITIONAL_INDEFINITE) { + /* mark accordingly */ + item->is_indefinite = true; + + if (context->mode != ECBOR_MODE_DECODE_STREAMED) { + /* we have an indefinite map or array and we're not in streamed mode; + we have to walk children to compute size and advance to next item */ + ecbor_item_t child; + ecbor_error_t rc; + + while (true) { + /* read next chunk */ + rc = ecbor_decode_next_internal (context, &child, false, + ECBOR_MT_UNDEFINED); + if (rc != ECBOR_OK) { + if (rc == ECBOR_END_OF_INDEFINITE) { + /* stop code found, break from loop */ + break; + } else if (rc == ECBOR_END_OF_BUFFER) { + /* treat a valid end of buffer as invalid since we did not yet + find the stop code */ + return ECBOR_ERR_INVALID_END_OF_BUFFER; + } else if (rc != ECBOR_OK) { + /* other error */ + return rc; + } + } + + /* add chunk size to item size */ + item->size += child.size; + item->n_chunks ++; + } + + if ((item->major_type == ECBOR_MT_MAP) && (item->n_chunks % 2 != 0)) { + /* incomplete key-value pair; we expect maps to have even number of + items */ + return ECBOR_ERR_INVALID_KEY_VALUE_PAIR; + } + } + } else { + uint64_t foo; + + /* read size of map or array */ + ecbor_error_t rc = ecbor_decode_uint (context, &item->n_chunks, &foo, + additional); + if (rc != ECBOR_OK) { + return rc; + } + + if (item->major_type == ECBOR_MT_MAP) { + /* we keep the total number of items in n_chunks, yet the map has the + number of key-value pairs encoded in the length */ + item->n_chunks *= 2; + } + + if (context->mode != ECBOR_MODE_DECODE_STREAMED) { + ecbor_item_t child; + ecbor_error_t rc; + uint64_t child_no; + + /* not in streamed mode; compute size so we can advance */ + for (child_no = 0; child_no < item->n_chunks; child_no ++) { + /* read next child */ + rc = ecbor_decode_next_internal (context, &child, false, + ECBOR_MT_UNDEFINED); + if (rc != ECBOR_OK) { + if (rc == ECBOR_END_OF_INDEFINITE) { + /* stop code found, but none is expected */ + return ECBOR_ERR_INVALID_STOP_CODE; + } else if (rc == ECBOR_END_OF_BUFFER) { + /* treat a valid end of buffer as invalid since we did not yet + reach the end of the map or array */ + return ECBOR_ERR_INVALID_END_OF_BUFFER; + } else if (rc != ECBOR_OK) { + /* other error */ + return rc; + } + } + + /* add child size to item size */ + item->size += child.size; + } + } + } + break; + + case ECBOR_MT_TAG: + return ECBOR_ERR_CURRENTLY_NOT_SUPPORTED; + break; + + case ECBOR_MT_SPECIAL: + if (additional == ECBOR_ADDITIONAL_INDEFINITE) { + /* stop code */ + return ECBOR_END_OF_INDEFINITE; + } else { + return ECBOR_ERR_CURRENTLY_NOT_SUPPORTED; + } + break; + + default: + return ECBOR_ERR_UNKNOWN; + } + + return ECBOR_OK; +} + +ecbor_error_t +ecbor_decode (ecbor_decode_context_t *context, ecbor_item_t *item) +{ + ecbor_error_t rc = ECBOR_OK; + ecbor_node_t *curr_prev = NULL, *curr_parent = NULL, *curr_node = NULL; + + if (!context) { + return ECBOR_ERR_NULL_CONTEXT; + } + if (!item) { + return ECBOR_ERR_NULL_ITEM; + } + + if (context->mode == ECBOR_MODE_DECODE + || context->mode == ECBOR_MODE_DECODE_STREAMED) { + /* we just get the next item */ + return ecbor_decode_next_internal (context, item, false, ECBOR_MT_UNDEFINED); + } + + /* + * decode whole tree + */ + context->n_nodes = 0; + + /* step into streamed mode; some of the semantic checks will be done here */ + context->mode = ECBOR_MODE_DECODE_STREAMED; + + /* consume until something happens */ + while (true) { + + /* allocate new node */ + if (context->n_nodes >= context->node_capacity) { + rc = ECBOR_ERR_END_OF_NODE_BUFFER; + goto end; + } + + curr_prev = curr_node; + curr_node = &context->nodes[context->n_nodes]; + context->n_nodes ++; + + if (curr_prev) { + curr_prev->next = curr_node; + curr_node->index = curr_prev->index + 1; + } else { + curr_node->index = 0; + } + + /* consume next item */ + rc = ecbor_decode_next_internal (context, &curr_node->item, false, + ECBOR_MT_UNDEFINED); + + /* handle end of indefinite */ + if (rc == ECBOR_END_OF_INDEFINITE) { + if ((!curr_parent) + || (curr_parent->item.major_type != ECBOR_MT_MAP + && curr_parent->item.major_type != ECBOR_MT_ARRAY) + || (!curr_parent->item.is_indefinite)) { + /* we are not in an indefinite map or array */ + rc = ECBOR_ERR_INVALID_STOP_CODE; + goto end; + } + + /* jump up one level */ + curr_node = curr_parent; + curr_parent = curr_parent->parent; + + /* ignore this item, reuse it later */ + context->n_nodes --; + continue; + } + + /* handle new children */ + if (curr_node->item.major_type == ECBOR_MT_MAP + || curr_node->item.major_type == ECBOR_MT_ARRAY) { + /* TODO */ + } + + /* handle end of definite maps and arrays */ + /* TODO */ + + /* check return code */ + if (rc == ECBOR_END_OF_BUFFER) { + if (!curr_parent) { + /* we are not allowed to reach the end unless the item is top-level */ + rc = ECBOR_ERR_INVALID_END_OF_BUFFER; + goto end; + } + } else if (rc != ECBOR_OK) { + goto end; + } + } + +end: + /* return to tree mode and return error code */ + context->mode = ECBOR_MODE_DECODE_TREE; + return rc; +} \ No newline at end of file diff --git a/src/ecbor_encoder.c b/src/ecbor_encoder.c new file mode 100644 index 0000000..cd9159a --- /dev/null +++ b/src/ecbor_encoder.c @@ -0,0 +1,8 @@ +/* + * Copyright (c) 2018 Vasile Vilvoiu + * + * libecbor is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include "ecbor.h" \ No newline at end of file diff --git a/src/ecbor_internal.h b/src/ecbor_internal.h new file mode 100644 index 0000000..f0ec24b --- /dev/null +++ b/src/ecbor_internal.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018 Vasile Vilvoiu + * + * libecbor is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef _ECBOR_INTERNAL_H_ +#define _ECBOR_INTERNAL_H_ + +/* Don't rely on for this */ +#define NULL 0 +#define false 0 +#define true 1 + +/* Additional value meanings */ +enum { + ECBOR_ADDITIONAL_1BYTE = 24, + ECBOR_ADDITIONAL_2BYTE = 25, + ECBOR_ADDITIONAL_4BYTE = 26, + ECBOR_ADDITIONAL_8BYTE = 27, + + ECBOR_ADDITIONAL_INDEFINITE = 31 +}; + +#endif \ No newline at end of file -- cgit v1.2.3