micropython/lib/uzlib/uzlib.h

/*
 * uzlib  -  tiny deflate/inflate library (deflate, gzip, zlib)
 *
 * Copyright (c) 2003 by Joergen Ibsen / Jibz
 * All Rights Reserved
 * http://www.ibsensoftware.com/
 *
 * Copyright (c) 2014-2018 by Paul Sokolovsky
 *
 * Optimised for MicroPython:
 * Copyright (c) 2023 by Jim Mussared
 *
 * This software is provided 'as-is', without any express
 * or implied warranty.  In no event will the authors be
 * held liable for any damages arising from the use of
 * this software.
 *
 * Permission is granted to anyone to use this software
 * for any purpose, including commercial applications,
 * and to alter it and redistribute it freely, subject to
 * the following restrictions:
 *
 * 1. The origin of this software must not be
 *    misrepresented; you must not claim that you
 *    wrote the original software. If you use this
 *    software in a product, an acknowledgment in
 *    the product documentation would be appreciated
 *    but is not required.
 *
 * 2. Altered source versions must be plainly marked
 *    as such, and must not be misrepresented as
 *    being the original software.
 *
 * 3. This notice may not be removed or altered from
 *    any source distribution.
 */

#ifndef UZLIB_H_INCLUDED
#define UZLIB_H_INCLUDED

#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>

#include "uzlib_conf.h"
#if UZLIB_CONF_DEBUG_LOG
#include <stdio.h>
#endif

#ifdef __cplusplus
extern "C" {
#endif

/* ok status, more data produced */
#define UZLIB_OK             0
/* end of compressed stream reached */
#define UZLIB_DONE           1
#define UZLIB_DATA_ERROR    (-3)
#define UZLIB_CHKSUM_ERROR  (-4)
#define UZLIB_DICT_ERROR    (-5)

/* checksum types */
#define UZLIB_CHKSUM_NONE  0
#define UZLIB_CHKSUM_ADLER 1
#define UZLIB_CHKSUM_CRC   2

/* helper macros */
#define TINF_ARRAY_SIZE(arr) (sizeof(arr) / sizeof(*(arr)))

/* data structures */

typedef struct {
   unsigned short table[16];  /* table of code length counts */
   unsigned short trans[288]; /* code -> symbol translation table */
} TINF_TREE;

typedef struct _uzlib_uncomp_t {
    /* Pointer to the next byte in the input buffer */
    const unsigned char *source;
    /* Pointer to the next byte past the input buffer (source_limit = source + len) */
    const unsigned char *source_limit;
    /* If source_limit == NULL, or source >= source_limit, this function
       will be used to read next byte from source stream. The function may
       also return -1 in case of EOF (or irrecoverable error). Note that
       besides returning the next byte, it may also update source and
       source_limit fields, thus allowing for buffered operation. */
    void *source_read_data;
    int (*source_read_cb)(void *);

    unsigned int tag;
    unsigned int bitcount;

    /* Destination (output) buffer start */
    unsigned char *dest_start;
    /* Current pointer in dest buffer */
    unsigned char *dest;
    /* Pointer past the end of the dest buffer, similar to source_limit */
    unsigned char *dest_limit;

    /* Accumulating checksum */
    unsigned int checksum;
    char checksum_type;
    bool eof;

    int btype;
    int bfinal;
    unsigned int curlen;
    int lzOff;
    unsigned char *dict_ring;
    unsigned int dict_size;
    unsigned int dict_idx;

    TINF_TREE ltree; /* dynamic length/symbol tree */
    TINF_TREE dtree; /* dynamic distance tree */
} uzlib_uncomp_t;

#define TINF_PUT(d, c) \
    { \
        *d->dest++ = c; \
        if (d->dict_ring) { d->dict_ring[d->dict_idx++] = c; if (d->dict_idx == d->dict_size) d->dict_idx = 0; } \
    }

unsigned char uzlib_get_byte(uzlib_uncomp_t *d);

/* Decompression API */

void uzlib_uncompress_init(uzlib_uncomp_t *d, void *dict, unsigned int dictLen);
int  uzlib_uncompress(uzlib_uncomp_t *d);
int  uzlib_uncompress_chksum(uzlib_uncomp_t *d);

#define UZLIB_HEADER_ZLIB             0
#define UZLIB_HEADER_GZIP             1
int uzlib_parse_zlib_gzip_header(uzlib_uncomp_t *d, int *wbits);

/* Compression API */

typedef struct {
    void *dest_write_data;
    void (*dest_write_cb)(void *data, uint8_t byte);
    unsigned long outbits;
    int noutbits;
    uint8_t *hist_buf;
    size_t hist_max;
    size_t hist_start;
    size_t hist_len;
} uzlib_lz77_state_t;

void uzlib_lz77_init(uzlib_lz77_state_t *state, uint8_t *hist, size_t hist_max);
void uzlib_lz77_compress(uzlib_lz77_state_t *state, const uint8_t *src, unsigned len);

void uzlib_start_block(uzlib_lz77_state_t *state);
void uzlib_finish_block(uzlib_lz77_state_t *state);

/* Checksum API */

/* prev_sum is previous value for incremental computation, 1 initially */
uint32_t uzlib_adler32(const void *data, unsigned int length, uint32_t prev_sum);
/* crc is previous value for incremental computation, 0xffffffff initially */
uint32_t uzlib_crc32(const void *data, unsigned int length, uint32_t crc);

#ifdef __cplusplus
} /* extern "C" */
#endif

#endif /* UZLIB_H_INCLUDED */
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00			`/*`
			`* uzlib - tiny deflate/inflate library (deflate, gzip, zlib)`
			`*`
			`* Copyright (c) 2003 by Joergen Ibsen / Jibz`
			`* All Rights Reserved`
			`* http://www.ibsensoftware.com/`
			`*`
			`* Copyright (c) 2014-2018 by Paul Sokolovsky`
			`*`
lib/uzlib: Clean up tinf -> uzlib rename. This library used a mix of "tinf" and "uzlib" to refer to itself. Remove all use of "tinf" in the public API. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 14:50:05 +00:00			`* Optimised for MicroPython:`
			`* Copyright (c) 2023 by Jim Mussared`
			`*`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00			`* This software is provided 'as-is', without any express`
			`* or implied warranty. In no event will the authors be`
			`* held liable for any damages arising from the use of`
			`* this software.`
			`*`
			`* Permission is granted to anyone to use this software`
			`* for any purpose, including commercial applications,`
			`* and to alter it and redistribute it freely, subject to`
			`* the following restrictions:`
			`*`
			`* 1. The origin of this software must not be`
			`* misrepresented; you must not claim that you`
			`* wrote the original software. If you use this`
			`* software in a product, an acknowledgment in`
			`* the product documentation would be appreciated`
			`* but is not required.`
			`*`
			`* 2. Altered source versions must be plainly marked`
			`* as such, and must not be misrepresented as`
			`* being the original software.`
			`*`
			`* 3. This notice may not be removed or altered from`
			`* any source distribution.`
			`*/`

			`#ifndef UZLIB_H_INCLUDED`
			`#define UZLIB_H_INCLUDED`

			`#include <stdlib.h>`
			`#include <stdint.h>`
			`#include <stdbool.h>`

			`#include "uzlib_conf.h"`
			`#if UZLIB_CONF_DEBUG_LOG`
			`#include <stdio.h>`
			`#endif`

			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

			`/* ok status, more data produced */`
lib/uzlib: Clean up tinf -> uzlib rename. This library used a mix of "tinf" and "uzlib" to refer to itself. Remove all use of "tinf" in the public API. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 14:50:05 +00:00			`#define UZLIB_OK 0`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00			`/* end of compressed stream reached */`
lib/uzlib: Clean up tinf -> uzlib rename. This library used a mix of "tinf" and "uzlib" to refer to itself. Remove all use of "tinf" in the public API. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 14:50:05 +00:00			`#define UZLIB_DONE 1`
			`#define UZLIB_DATA_ERROR (-3)`
			`#define UZLIB_CHKSUM_ERROR (-4)`
			`#define UZLIB_DICT_ERROR (-5)`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00
			`/* checksum types */`
lib/uzlib: Clean up tinf -> uzlib rename. This library used a mix of "tinf" and "uzlib" to refer to itself. Remove all use of "tinf" in the public API. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 14:50:05 +00:00			`#define UZLIB_CHKSUM_NONE 0`
			`#define UZLIB_CHKSUM_ADLER 1`
			`#define UZLIB_CHKSUM_CRC 2`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00
			`/* helper macros */`
			`#define TINF_ARRAY_SIZE(arr) (sizeof(arr) / sizeof(*(arr)))`

			`/* data structures */`

			`typedef struct {`
			`unsigned short table[16]; /* table of code length counts */`
			`unsigned short trans[288]; /* code -> symbol translation table */`
			`} TINF_TREE;`

lib/uzlib: Clean up tinf -> uzlib rename. This library used a mix of "tinf" and "uzlib" to refer to itself. Remove all use of "tinf" in the public API. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 14:50:05 +00:00			`typedef struct _uzlib_uncomp_t {`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00			`/* Pointer to the next byte in the input buffer */`
			`const unsigned char *source;`
			`/* Pointer to the next byte past the input buffer (source_limit = source + len) */`
			`const unsigned char *source_limit;`
			`/* If source_limit == NULL, or source >= source_limit, this function`
			`will be used to read next byte from source stream. The function may`
			`also return -1 in case of EOF (or irrecoverable error). Note that`
			`besides returning the next byte, it may also update source and`
			`source_limit fields, thus allowing for buffered operation. */`
lib/uzlib: Add a source_read_data var to pass to source_read_cb. For better abstraction for users of this API. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-29 01:56:08 +00:00			`void *source_read_data;`
			`int (source_read_cb)(void );`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00
			`unsigned int tag;`
			`unsigned int bitcount;`

			`/* Destination (output) buffer start */`
			`unsigned char *dest_start;`
			`/* Current pointer in dest buffer */`
			`unsigned char *dest;`
			`/* Pointer past the end of the dest buffer, similar to source_limit */`
			`unsigned char *dest_limit;`

			`/* Accumulating checksum */`
			`unsigned int checksum;`
			`char checksum_type;`
			`bool eof;`

			`int btype;`
			`int bfinal;`
			`unsigned int curlen;`
			`int lzOff;`
			`unsigned char *dict_ring;`
			`unsigned int dict_size;`
			`unsigned int dict_idx;`

			`TINF_TREE ltree; /* dynamic length/symbol tree */`
			`TINF_TREE dtree; /* dynamic distance tree */`
lib/uzlib: Clean up tinf -> uzlib rename. This library used a mix of "tinf" and "uzlib" to refer to itself. Remove all use of "tinf" in the public API. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 14:50:05 +00:00			`} uzlib_uncomp_t;`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00
			`#define TINF_PUT(d, c) \`
			`{ \`
			`*d->dest++ = c; \`
			`if (d->dict_ring) { d->dict_ring[d->dict_idx++] = c; if (d->dict_idx == d->dict_size) d->dict_idx = 0; } \`
			`}`

lib/uzlib: Clean up tinf -> uzlib rename. This library used a mix of "tinf" and "uzlib" to refer to itself. Remove all use of "tinf" in the public API. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 14:50:05 +00:00			`unsigned char uzlib_get_byte(uzlib_uncomp_t *d);`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00
			`/* Decompression API */`

lib/uzlib: Clean up tinf -> uzlib rename. This library used a mix of "tinf" and "uzlib" to refer to itself. Remove all use of "tinf" in the public API. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 14:50:05 +00:00			`void uzlib_uncompress_init(uzlib_uncomp_t d, void dict, unsigned int dictLen);`
			`int uzlib_uncompress(uzlib_uncomp_t *d);`
			`int uzlib_uncompress_chksum(uzlib_uncomp_t *d);`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00
lib/uzlib: Combine zlib/gzip header parsing to allow auto-detect. This supports `wbits` values between +40 to +47. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 15:34:31 +00:00			`#define UZLIB_HEADER_ZLIB 0`
			`#define UZLIB_HEADER_GZIP 1`
			`int uzlib_parse_zlib_gzip_header(uzlib_uncomp_t d, int wbits);`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00
			`/* Compression API */`

lib/uzlib/defl_static: Optimize zlib_start/finish_block. Collapsing the two adjacent calls to outbits saves 32 bytes. Bringing defl_static.c into lz77.c allows better inlining, saves 24 bytes. Merge the Outbuf/uzlib_lz77_state_t structs, a minor simplification that doesn't change code size. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 16:56:58 +00:00			`typedef struct {`
			`void *dest_write_data;`
			`void (dest_write_cb)(void data, uint8_t byte);`
			`unsigned long outbits;`
			`int noutbits;`
lib/uzlib: Add memory-efficient, streaming LZ77 compression support. The compression algorithm implemented in this commit uses much less memory compared to the standard way of implementing it using a hash table and large look-back window. In particular the algorithm here doesn't allocate hash table to store indices into the history of the previously seen text. Instead it simply does a brute-force-search of the history text to find a match for the compressor. This is slower (linear search vs hash table lookup) but with a small enough history (eg 512 bytes) it's not that slow. And a small history does not impact the compression too much. To give some more concrete numbers comparing memory use between the approaches: - Standard approach: inplace compression, all text to compress must be in RAM (or at least memory addressable), and then an additional 16k bytes RAM of hash table pointers, pointing into the text - The approach in this commit: streaming compression, only a limited amount of previous text must be in RAM (user selectable, defaults to 512 bytes). To compress, say, 1k of data, the standard approach requires all that data to be in RAM, plus an additional 16k of RAM for the hash table pointers. With this commit, you only need the 1k of data in RAM. Or if it's streaming from a file (or elsewhere), you could get away with only 256 bytes of RAM for the sliding history and still get very decent compression. In summary: because compression takes such a large amount of RAM (in the standard algorithm) and it's not really suitable for microcontrollers, the approach taken in this commit is to minimise RAM usage as much as possible, and still have acceptable performance (speed and compression ratio). Signed-off-by: Damien George <damien@micropython.org> 2023-01-18 04:46:23 +00:00			`uint8_t *hist_buf;`
			`size_t hist_max;`
			`size_t hist_start;`
			`size_t hist_len;`
lib/uzlib/defl_static: Optimize zlib_start/finish_block. Collapsing the two adjacent calls to outbits saves 32 bytes. Bringing defl_static.c into lz77.c allows better inlining, saves 24 bytes. Merge the Outbuf/uzlib_lz77_state_t structs, a minor simplification that doesn't change code size. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 16:56:58 +00:00			`} uzlib_lz77_state_t;`

			`void uzlib_lz77_init(uzlib_lz77_state_t state, uint8_t hist, size_t hist_max);`
			`void uzlib_lz77_compress(uzlib_lz77_state_t state, const uint8_t src, unsigned len);`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00
lib/uzlib/defl_static: Optimize zlib_start/finish_block. Collapsing the two adjacent calls to outbits saves 32 bytes. Bringing defl_static.c into lz77.c allows better inlining, saves 24 bytes. Merge the Outbuf/uzlib_lz77_state_t structs, a minor simplification that doesn't change code size. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 16:56:58 +00:00			`void uzlib_start_block(uzlib_lz77_state_t *state);`
			`void uzlib_finish_block(uzlib_lz77_state_t *state);`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00
			`/* Checksum API */`

			`/* prev_sum is previous value for incremental computation, 1 initially */`
lib/uzlib: Clean up tinf -> uzlib rename. This library used a mix of "tinf" and "uzlib" to refer to itself. Remove all use of "tinf" in the public API. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 14:50:05 +00:00			`uint32_t uzlib_adler32(const void *data, unsigned int length, uint32_t prev_sum);`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00			`/* crc is previous value for incremental computation, 0xffffffff initially */`
lib/uzlib: Clean up tinf -> uzlib rename. This library used a mix of "tinf" and "uzlib" to refer to itself. Remove all use of "tinf" in the public API. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2023-06-26 14:50:05 +00:00			`uint32_t uzlib_crc32(const void *data, unsigned int length, uint32_t crc);`
extmod/uzlib: Update uzlib to v2.9.2. Major changes include robust parsing of erroneous compressed streams and updated API. 2018-11-30 20:36:49 +00:00
			`#ifdef __cplusplus`
			`} /* extern "C" */`
			`#endif`

			`#endif /* UZLIB_H_INCLUDED */`