diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt
index 2d766d6700..a8dd665d4d 100644
--- a/modules/core/CMakeLists.txt
+++ b/modules/core/CMakeLists.txt
@@ -324,6 +324,9 @@ vp_set_source_file_compile_flag(src/tools/optimization/vpQuadProg.cpp -Wno-stric
 vp_set_source_file_compile_flag(src/tools/optimization/vpLinProg.cpp -Wno-strict-overflow)
 vp_set_source_file_compile_flag(src/tools/histogram/vpHistogram.cpp -Wno-strict-overflow)
 vp_set_source_file_compile_flag(src/image/vpFont.cpp -Wno-float-equal -Wno-strict-overflow)
+# To avoid warnings with basisu_miniz.h such as:
+# basisu_miniz.h:1184:9: warning: this ‘for’ clause does not guard... [-Wmisleading-indentation]
+vp_set_source_file_compile_flag(src/tools/vpIoTools.cpp -Wno-misleading-indentation)
 vp_set_source_file_compile_flag(src/munkres/vpMunkres.cpp /wd26812 /wd4244)
 vp_set_source_file_compile_flag(test/image/testImageBinarise.cpp -Wno-strict-overflow)
 vp_set_source_file_compile_flag(test/image/testImageOwnership.cpp -Wno-pessimizing-move)
diff --git a/modules/core/include/visp3/core/vpIoTools.h b/modules/core/include/visp3/core/vpIoTools.h
index b0b82eeba4..d473f60c03 100644
--- a/modules/core/include/visp3/core/vpIoTools.h
+++ b/modules/core/include/visp3/core/vpIoTools.h
@@ -47,8 +47,344 @@
 #include <stdlib.h>
 #include <string>
 #include <vector>
+#include <numeric>
 #include <visp3/core/vpColor.h>
 
+#include <memory>
+#include <map>
+#include <cassert>
+#include <complex>
+
+namespace visp
+{
+// https://github.com/BinomialLLC/basis_universal/blob/ad9386a4a1cf2a248f7bbd45f543a7448db15267/encoder/basisu_miniz.h#L665
+static unsigned long vp_mz_crc32(unsigned long crc, const unsigned char *ptr, size_t buf_len)
+{
+  static const unsigned int s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
+    0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c };
+  unsigned int crcu32 = (unsigned int)crc;
+  if (!ptr) return 0;
+  crcu32 = ~crcu32;
+  while (buf_len--) {
+    unsigned char b = *ptr++;
+    crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)];
+    crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)];
+  }
+  return ~crcu32;
+}
+
+namespace cnpy
+{
+// Copyright (C) 2011  Carl Rogers
+// Released under MIT License
+// license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
+struct NpyArray
+{
+  NpyArray(const std::vector<size_t> &_shape, size_t _word_size, bool _fortran_order) :
+    shape(_shape), word_size(_word_size), fortran_order(_fortran_order)
+  {
+    num_vals = 1;
+    for (size_t i = 0;i < shape.size();i++) num_vals *= shape[i];
+    data_holder = std::shared_ptr<std::vector<char>>(
+        new std::vector<char>(num_vals * word_size));
+  }
+
+  NpyArray() : shape(0), word_size(0), fortran_order(0), num_vals(0) { }
+
+  template<typename T>
+  T *data()
+  {
+    return reinterpret_cast<T *>(&(*data_holder)[0]);
+  }
+
+  template<typename T>
+  const T *data() const
+  {
+    return reinterpret_cast<T *>(&(*data_holder)[0]);
+  }
+
+  template<typename T>
+  std::vector<T> as_vec() const
+  {
+    const T *p = data<T>();
+    return std::vector<T>(p, p+num_vals);
+  }
+
+  size_t num_bytes() const
+  {
+    return data_holder->size();
+  }
+
+  std::shared_ptr<std::vector<char>> data_holder;
+  std::vector<size_t> shape;
+  size_t word_size;
+  bool fortran_order;
+  size_t num_vals;
+};
+
+using npz_t = std::map<std::string, NpyArray>;
+
+VISP_EXPORT char BigEndianTest();
+VISP_EXPORT char map_type(const std::type_info &t);
+template<typename T> std::vector<char> create_npy_header(const std::vector<size_t> &shape);
+VISP_EXPORT void parse_npy_header(FILE *fp, size_t &word_size, std::vector<size_t> &shape, bool &fortran_order);
+VISP_EXPORT void parse_npy_header(unsigned char *buffer, size_t &word_size, std::vector<size_t> &shape, bool &fortran_order);
+VISP_EXPORT void parse_zip_footer(FILE *fp, uint16_t &nrecs, size_t &global_header_size, size_t &global_header_offset);
+VISP_EXPORT npz_t npz_load(std::string fname);
+VISP_EXPORT NpyArray npz_load(std::string fname, std::string varname);
+VISP_EXPORT NpyArray npy_load(std::string fname);
+
+template<typename T> std::vector<char> &operator+=(std::vector<char> &lhs, const T rhs)
+{
+  //write in little endian
+  for (size_t byte = 0; byte < sizeof(T); byte++) {
+    char val = *((char *)&rhs+byte);
+    lhs.push_back(val);
+  }
+  return lhs;
+}
+
+template<> inline std::vector<char> &operator+=(std::vector<char> &lhs, const std::string rhs)
+{
+  lhs.insert(lhs.end(), rhs.begin(), rhs.end());
+  return lhs;
+}
+
+template<> inline std::vector<char> &operator+=(std::vector<char> &lhs, const char *rhs)
+{
+//write in little endian
+  size_t len = strlen(rhs);
+  lhs.reserve(len);
+  for (size_t byte = 0; byte < len; byte++) {
+    lhs.push_back(rhs[byte]);
+  }
+  return lhs;
+}
+
+/*!
+  Save an array of data (\p data) into the \p fname npy file. This function is similar to the
+  <a href="https://numpy.org/doc/stable/reference/generated/numpy.save.html">numpy.save</a> function.
+  \param[in] fname : Path to the npy file.
+  \param[in] data : Pointer to an array of basic datatype (int, float, double, std::complex<double>, ...).
+  \param[in] shape : Shape of the array, e.g. Nz x Ny x Nx.
+  \param[in] mode : Writing mode, i.e. overwrite (w) or append (a) to the file.
+  \warning This function has only been tested on little endian platform.
+  \note Original library: <a href="https://github.com/rogersce/cnpy">cnpy</a> with MIT license.
+ */
+template<typename T> void npy_save(std::string fname, const T *data, const std::vector<size_t> shape, std::string mode = "w")
+{
+  FILE *fp = NULL;
+  std::vector<size_t> true_data_shape; //if appending, the shape of existing + new data
+
+  if (mode == "a") fp = fopen(fname.c_str(), "r+b");
+
+  if (fp) {
+    //file exists. we need to append to it. read the header, modify the array size
+    size_t word_size;
+    bool fortran_order;
+    parse_npy_header(fp, word_size, true_data_shape, fortran_order);
+    assert(!fortran_order);
+
+    if (word_size != sizeof(T)) {
+      std::cout<<"libnpy error: "<<fname<<" has word size "<<word_size<<" but npy_save appending data sized "<<sizeof(T)<<"\n";
+      assert(word_size == sizeof(T));
+    }
+    if (true_data_shape.size() != shape.size()) {
+      std::cout<<"libnpy error: npy_save attempting to append misdimensioned data to "<<fname<<"\n";
+      assert(true_data_shape.size() != shape.size());
+    }
+
+    for (size_t i = 1; i < shape.size(); i++) {
+      if (shape[i] != true_data_shape[i]) {
+        std::cout<<"libnpy error: npy_save attempting to append misshaped data to "<<fname<<"\n";
+        assert(shape[i] == true_data_shape[i]);
+      }
+    }
+    true_data_shape[0] += shape[0];
+  }
+  else {
+    fp = fopen(fname.c_str(), "wb");
+    true_data_shape = shape;
+  }
+
+  std::vector<char> header = create_npy_header<T>(true_data_shape);
+  size_t nels = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
+
+  fseek(fp, 0, SEEK_SET);
+  fwrite(&header[0], sizeof(char), header.size(), fp);
+  fseek(fp, 0, SEEK_END);
+  fwrite(data, sizeof(T), nels, fp);
+  fclose(fp);
+}
+
+/*!
+  Save the specified \p fname array of data (\p data) into the \p zipname npz file. This function is similar to the
+  <a href="https://numpy.org/doc/stable/reference/generated/numpy.savez.html">numpy.savez</a> function.
+  \param[in] zipname : Path to the npz file.
+  \param[in] fname : Identifier for the corresponding array of data.
+  \param[in] data : Pointer to an array of basic datatype (int, float, double, std::complex<double>, ...).
+  \param[in] shape : Shape of the array, e.g. Nz x Ny x Nx.
+  \param[in] mode : Writing mode, i.e. overwrite (w) or append (a) to the file.
+  \warning This function has only been tested on little endian platform.
+  \note Original library: <a href="https://github.com/rogersce/cnpy">cnpy</a> with MIT license.
+ */
+template<typename T> void npz_save(std::string zipname, std::string fname, const T *data, const std::vector<size_t> &shape, std::string mode = "w")
+{
+  //first, append a .npy to the fname
+  fname += ".npy";
+
+  //now, on with the show
+  FILE *fp = NULL;
+  uint16_t nrecs = 0;
+  size_t global_header_offset = 0;
+  std::vector<char> global_header;
+
+  if (mode == "a") fp = fopen(zipname.c_str(), "r+b");
+
+  if (fp) {
+    //zip file exists. we need to add a new npy file to it.
+    //first read the footer. this gives us the offset and size of the global header
+    //then read and store the global header.
+    //below, we will write the the new data at the start of the global header then append the global header and footer below it
+    size_t global_header_size;
+    parse_zip_footer(fp, nrecs, global_header_size, global_header_offset);
+    fseek(fp, global_header_offset, SEEK_SET);
+    global_header.resize(global_header_size);
+    size_t res = fread(&global_header[0], sizeof(char), global_header_size, fp);
+    if (res != global_header_size) {
+      throw std::runtime_error("npz_save: header read error while adding to existing zip");
+    }
+    fseek(fp, global_header_offset, SEEK_SET);
+  }
+  else {
+    fp = fopen(zipname.c_str(), "wb");
+  }
+
+  std::vector<char> npy_header = create_npy_header<T>(shape);
+
+  size_t nels = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
+  size_t nbytes = nels*sizeof(T) + npy_header.size();
+
+  //get the CRC of the data to be added
+  uint32_t crc = vp_mz_crc32(0L, (uint8_t *)&npy_header[0], npy_header.size());
+  crc = vp_mz_crc32(crc, (uint8_t *)data, nels*sizeof(T));
+
+  //build the local header
+  std::vector<char> local_header;
+  local_header += "PK"; //first part of sig
+  local_header += (uint16_t)0x0403; //second part of sig
+  local_header += (uint16_t)20; //min version to extract
+  local_header += (uint16_t)0; //general purpose bit flag
+  local_header += (uint16_t)0; //compression method
+  local_header += (uint16_t)0; //file last mod time
+  local_header += (uint16_t)0;     //file last mod date
+  local_header += (uint32_t)crc; //crc
+  local_header += (uint32_t)nbytes; //compressed size
+  local_header += (uint32_t)nbytes; //uncompressed size
+  local_header += (uint16_t)fname.size(); //fname length
+  local_header += (uint16_t)0; //extra field length
+  local_header += fname;
+
+  //build global header
+  global_header += "PK"; //first part of sig
+  global_header += (uint16_t)0x0201; //second part of sig
+  global_header += (uint16_t)20; //version made by
+  global_header.insert(global_header.end(), local_header.begin()+4, local_header.begin()+30);
+  global_header += (uint16_t)0; //file comment length
+  global_header += (uint16_t)0; //disk number where file starts
+  global_header += (uint16_t)0; //internal file attributes
+  global_header += (uint32_t)0; //external file attributes
+  global_header += (uint32_t)global_header_offset; //relative offset of local file header, since it begins where the global header used to begin
+  global_header += fname;
+
+  //build footer
+  std::vector<char> footer;
+  footer += "PK"; //first part of sig
+  footer += (uint16_t)0x0605; //second part of sig
+  footer += (uint16_t)0; //number of this disk
+  footer += (uint16_t)0; //disk where footer starts
+  footer += (uint16_t)(nrecs+1); //number of records on this disk
+  footer += (uint16_t)(nrecs+1); //total number of records
+  footer += (uint32_t)global_header.size(); //nbytes of global headers
+  footer += (uint32_t)(global_header_offset + nbytes + local_header.size()); //offset of start of global headers, since global header now starts after newly written array
+  footer += (uint16_t)0; //zip file comment length
+
+  //write everything
+  fwrite(&local_header[0], sizeof(char), local_header.size(), fp);
+  fwrite(&npy_header[0], sizeof(char), npy_header.size(), fp);
+  fwrite(data, sizeof(T), nels, fp);
+  fwrite(&global_header[0], sizeof(char), global_header.size(), fp);
+  fwrite(&footer[0], sizeof(char), footer.size(), fp);
+  fclose(fp);
+}
+
+/*!
+  Save the specified 1-D array of data (\p data) into the \p fname npz file. This function is similar to the
+  <a href="https://numpy.org/doc/stable/reference/generated/numpy.save.html">numpy.save</a> function.
+  \param[in] fname : Path to the npy file.
+  \param[in] data : Pointer to a 1-D array of basic datatype (int, float, double, std::complex<double>, ...).
+  \param[in] mode : Writing mode, i.e. overwrite (w) or append (a) to the file.
+  \warning This function has only been tested on little endian platform.
+  \note Original library: <a href="https://github.com/rogersce/cnpy">cnpy</a> with MIT license.
+ */
+template<typename T> void npy_save(std::string fname, const std::vector<T> data, std::string mode = "w")
+{
+  std::vector<size_t> shape;
+  shape.push_back(data.size());
+  npy_save(fname, &data[0], shape, mode);
+}
+
+/*!
+  Save the specified \p fname 1-D array of data (\p data) into the \p zipname npz file. This function is similar to the
+  <a href="https://numpy.org/doc/stable/reference/generated/numpy.savez.html">numpy.savez</a> function.
+  \param[in] zipname : Path to the npz file.
+  \param[in] fname : Identifier for the corresponding array of data.
+  \param[in] data : Pointer to a 1-D array of basic datatype (int, float, double, std::complex<double>, ...).
+  \param[in] mode : Writing mode, i.e. overwrite (w) or append (a) to the file.
+  \warning This function has only been tested on little endian platform.
+  \note Original library: <a href="https://github.com/rogersce/cnpy">cnpy</a> with MIT license.
+ */
+template<typename T> void npz_save(std::string zipname, std::string fname, const std::vector<T> data, std::string mode = "w")
+{
+  std::vector<size_t> shape;
+  shape.push_back(data.size());
+  npz_save(zipname, fname, &data[0], shape, mode);
+}
+
+template<typename T> std::vector<char> create_npy_header(const std::vector<size_t> &shape)
+{
+  std::vector<char> dict;
+  dict += "{'descr': '";
+  dict += BigEndianTest();
+  dict += map_type(typeid(T));
+  dict += std::to_string(sizeof(T));
+  dict += "', 'fortran_order': False, 'shape': (";
+  dict += std::to_string(shape[0]);
+  for (size_t i = 1;i < shape.size();i++) {
+    dict += ", ";
+    dict += std::to_string(shape[i]);
+  }
+  if (shape.size() == 1) dict += ",";
+  dict += "), }";
+  //pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
+  int remainder = 16 - (10 + dict.size()) % 16;
+  dict.insert(dict.end(), remainder, ' ');
+  dict.back() = '\n';
+
+  std::vector<char> header;
+  header += (char)0x93;
+  header += "NUMPY";
+  header += (char)0x01; //major version of numpy format
+  header += (char)0x00; //minor version of numpy format
+  header += (uint16_t)dict.size();
+  header.insert(header.end(), dict.begin(), dict.end());
+
+  return header;
+}
+
+} // namespace cnpy
+} // namespace visp
+
 /*!
  * \class vpIoTools
  * \ingroup group_core_files_io
diff --git a/modules/core/src/tools/file/basisu_miniz.h b/modules/core/src/tools/file/basisu_miniz.h
new file mode 100644
index 0000000000..e296bc6323
--- /dev/null
+++ b/modules/core/src/tools/file/basisu_miniz.h
@@ -0,0 +1,2549 @@
+/* miniz.c v1.15 - deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing
+   Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt
+  
+   Forked from the public domain/unlicense version at: https://code.google.com/archive/p/miniz/ 
+   
+   Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#ifndef MINIZ_HEADER_INCLUDED
+#define MINIZ_HEADER_INCLUDED
+
+#include <stdlib.h>
+
+// Defines to completely disable specific portions of miniz.c:
+// If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl.
+
+// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O.
+//#define MINIZ_NO_STDIO
+
+// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or
+// get/set file times, and the C run-time funcs that get/set times won't be called.
+// The current downside is the times written to your archives will be from 1979.
+//#define MINIZ_NO_TIME
+
+// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's.
+//#define MINIZ_NO_ARCHIVE_APIS
+
+// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive API's.
+//#define MINIZ_NO_ARCHIVE_WRITING_APIS
+
+// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's.
+//#define MINIZ_NO_ZLIB_APIS
+
+// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib.
+//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+
+// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc.
+// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc
+// callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user
+// functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work.
+//#define MINIZ_NO_MALLOC
+
+#if defined(__TINYC__) && (defined(__linux) || defined(__linux__))
+  // TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux
+  #define MINIZ_NO_TIME
+#endif
+
+#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS)
+  #include <time.h>
+#endif
+
+#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__)
+// MINIZ_X86_OR_X64_CPU is only used to help set the below macros.
+#define MINIZ_X86_OR_X64_CPU 1
+#endif
+
+#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
+// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian.
+#define MINIZ_LITTLE_ENDIAN 1
+#endif
+
+#if MINIZ_X86_OR_X64_CPU
+// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses.
+#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
+#endif
+
+// Using unaligned loads and stores causes errors when using UBSan. Jam it off.
+#if defined(__has_feature)
+#if __has_feature(undefined_behavior_sanitizer)
+#undef MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0
+#endif
+#endif
+
+#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__)
+// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions).
+#define MINIZ_HAS_64BIT_REGISTERS 1
+#endif
+
+namespace buminiz {
+
+// ------------------- zlib-style API Definitions.
+
+// For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits!
+typedef unsigned long mz_ulong;
+
+// mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap.
+void mz_free(void *p);
+
+#define MZ_ADLER32_INIT (1)
+// mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL.
+mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len);
+
+#define MZ_CRC32_INIT (0)
+// mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL.
+mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len);
+
+// Compression strategies.
+enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 };
+
+// Method
+#define MZ_DEFLATED 8
+
+#ifndef MINIZ_NO_ZLIB_APIS
+
+// Heap allocation callbacks.
+// Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long.
+typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size);
+typedef void (*mz_free_func)(void *opaque, void *address);
+typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size);
+
+#define MZ_VERSION          "9.1.15"
+#define MZ_VERNUM           0x91F0
+#define MZ_VER_MAJOR        9
+#define MZ_VER_MINOR        1
+#define MZ_VER_REVISION     15
+#define MZ_VER_SUBREVISION  0
+
+// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs).
+enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 };
+
+// Return status codes. MZ_PARAM_ERROR is non-standard.
+enum { MZ_OK = 0, MZ_STREAM_END = 1, MZ_NEED_DICT = 2, MZ_ERRNO = -1, MZ_STREAM_ERROR = -2, MZ_DATA_ERROR = -3, MZ_MEM_ERROR = -4, MZ_BUF_ERROR = -5, MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 };
+
+// Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL.
+enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 };
+
+// Window bits
+#define MZ_DEFAULT_WINDOW_BITS 15
+
+struct mz_internal_state;
+
+// Compression/decompression stream struct.
+typedef struct mz_stream_s
+{
+  const unsigned char *next_in;     // pointer to next byte to read
+  unsigned int avail_in;            // number of bytes available at next_in
+  mz_ulong total_in;                // total number of bytes consumed so far
+
+  unsigned char *next_out;          // pointer to next byte to write
+  unsigned int avail_out;           // number of bytes that can be written to next_out
+  mz_ulong total_out;               // total number of bytes produced so far
+
+  char *msg;                        // error msg (unused)
+  struct mz_internal_state *state;  // internal state, allocated by zalloc/zfree
+
+  mz_alloc_func zalloc;             // optional heap allocation function (defaults to malloc)
+  mz_free_func zfree;               // optional heap free function (defaults to free)
+  void *opaque;                     // heap alloc function user pointer
+
+  int data_type;                    // data_type (unused)
+  mz_ulong adler;                   // adler32 of the source or uncompressed data
+  mz_ulong reserved;                // not used
+} mz_stream;
+
+typedef mz_stream *mz_streamp;
+
+// Returns the version string of miniz.c.
+const char *mz_version(void);
+
+// mz_deflateInit() initializes a compressor with default options:
+// Parameters:
+//  pStream must point to an initialized mz_stream struct.
+//  level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION].
+//  level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio.
+//  (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.)
+// Return values:
+//  MZ_OK on success.
+//  MZ_STREAM_ERROR if the stream is bogus.
+//  MZ_PARAM_ERROR if the input parameters are bogus.
+//  MZ_MEM_ERROR on out of memory.
+int mz_deflateInit(mz_streamp pStream, int level);
+
+// mz_deflateInit2() is like mz_deflate(), except with more control:
+// Additional parameters:
+//   method must be MZ_DEFLATED
+//   window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer)
+//   mem_level must be between [1, 9] (it's checked but ignored by miniz.c)
+int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy);
+
+// Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2().
+int mz_deflateReset(mz_streamp pStream);
+
+// mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible.
+// Parameters:
+//   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
+//   flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH.
+// Return values:
+//   MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full).
+//   MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore.
+//   MZ_STREAM_ERROR if the stream is bogus.
+//   MZ_PARAM_ERROR if one of the parameters is invalid.
+//   MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.)
+int mz_deflate(mz_streamp pStream, int flush);
+
+// mz_deflateEnd() deinitializes a compressor:
+// Return values:
+//  MZ_OK on success.
+//  MZ_STREAM_ERROR if the stream is bogus.
+int mz_deflateEnd(mz_streamp pStream);
+
+// mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH.
+mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len);
+
+// Single-call compression functions mz_compress() and mz_compress2():
+// Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure.
+int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
+int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level);
+
+// mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress().
+mz_ulong mz_compressBound(mz_ulong source_len);
+
+// Initializes a decompressor.
+int mz_inflateInit(mz_streamp pStream);
+
+// mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer:
+// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate).
+int mz_inflateInit2(mz_streamp pStream, int window_bits);
+
+// Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible.
+// Parameters:
+//   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
+//   flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH.
+//   On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster).
+//   MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data.
+// Return values:
+//   MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full.
+//   MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified.
+//   MZ_STREAM_ERROR if the stream is bogus.
+//   MZ_DATA_ERROR if the deflate stream is invalid.
+//   MZ_PARAM_ERROR if one of the parameters is invalid.
+//   MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again
+//   with more input data, or with more room in the output buffer (except when using single call decompression, described above).
+int mz_inflate(mz_streamp pStream, int flush);
+int mz_inflate2(mz_streamp pStream, int flush, int adler32_checking);
+
+// Deinitializes a decompressor.
+int mz_inflateEnd(mz_streamp pStream);
+
+// Single-call decompression.
+// Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure.
+int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
+
+// Returns a string description of the specified error code, or NULL if the error code is invalid.
+const char *mz_error(int err);
+
+// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports.
+// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project.
+#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+  typedef unsigned char Byte;
+  typedef unsigned int uInt;
+  typedef mz_ulong uLong;
+  typedef Byte Bytef;
+  typedef uInt uIntf;
+  typedef char charf;
+  typedef int intf;
+  typedef void *voidpf;
+  typedef uLong uLongf;
+  typedef void *voidp;
+  typedef void *const voidpc;
+  #define Z_NULL                0
+  #define Z_NO_FLUSH            MZ_NO_FLUSH
+  #define Z_PARTIAL_FLUSH       MZ_PARTIAL_FLUSH
+  #define Z_SYNC_FLUSH          MZ_SYNC_FLUSH
+  #define Z_FULL_FLUSH          MZ_FULL_FLUSH
+  #define Z_FINISH              MZ_FINISH
+  #define Z_BLOCK               MZ_BLOCK
+  #define Z_OK                  MZ_OK
+  #define Z_STREAM_END          MZ_STREAM_END
+  #define Z_NEED_DICT           MZ_NEED_DICT
+  #define Z_ERRNO               MZ_ERRNO
+  #define Z_STREAM_ERROR        MZ_STREAM_ERROR
+  #define Z_DATA_ERROR          MZ_DATA_ERROR
+  #define Z_MEM_ERROR           MZ_MEM_ERROR
+  #define Z_BUF_ERROR           MZ_BUF_ERROR
+  #define Z_VERSION_ERROR       MZ_VERSION_ERROR
+  #define Z_PARAM_ERROR         MZ_PARAM_ERROR
+  #define Z_NO_COMPRESSION      MZ_NO_COMPRESSION
+  #define Z_BEST_SPEED          MZ_BEST_SPEED
+  #define Z_BEST_COMPRESSION    MZ_BEST_COMPRESSION
+  #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
+  #define Z_DEFAULT_STRATEGY    MZ_DEFAULT_STRATEGY
+  #define Z_FILTERED            MZ_FILTERED
+  #define Z_HUFFMAN_ONLY        MZ_HUFFMAN_ONLY
+  #define Z_RLE                 MZ_RLE
+  #define Z_FIXED               MZ_FIXED
+  #define Z_DEFLATED            MZ_DEFLATED
+  #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
+  #define alloc_func            mz_alloc_func
+  #define free_func             mz_free_func
+  #define internal_state        mz_internal_state
+  #define z_stream              mz_stream
+  #define deflateInit           mz_deflateInit
+  #define deflateInit2          mz_deflateInit2
+  #define deflateReset          mz_deflateReset
+  #define deflate               mz_deflate
+  #define deflateEnd            mz_deflateEnd
+  #define deflateBound          mz_deflateBound
+  #define compress              mz_compress
+  #define compress2             mz_compress2
+  #define compressBound         mz_compressBound
+  #define inflateInit           mz_inflateInit
+  #define inflateInit2          mz_inflateInit2
+  #define inflate               mz_inflate
+  #define inflateEnd            mz_inflateEnd
+  #define uncompress            mz_uncompress
+  #define crc32                 mz_crc32
+  #define adler32               mz_adler32
+  #define MAX_WBITS             15
+  #define MAX_MEM_LEVEL         9
+  #define zError                mz_error
+  #define ZLIB_VERSION          MZ_VERSION
+  #define ZLIB_VERNUM           MZ_VERNUM
+  #define ZLIB_VER_MAJOR        MZ_VER_MAJOR
+  #define ZLIB_VER_MINOR        MZ_VER_MINOR
+  #define ZLIB_VER_REVISION     MZ_VER_REVISION
+  #define ZLIB_VER_SUBREVISION  MZ_VER_SUBREVISION
+  #define zlibVersion           mz_version
+  #define zlib_version          mz_version()
+#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+
+#endif // MINIZ_NO_ZLIB_APIS
+
+// ------------------- Types and macros
+
+typedef unsigned char mz_uint8;
+typedef signed short mz_int16;
+typedef unsigned short mz_uint16;
+typedef unsigned int mz_uint32;
+typedef unsigned int mz_uint;
+typedef long long mz_int64;
+typedef unsigned long long mz_uint64;
+typedef int mz_bool;
+
+#define MZ_FALSE (0)
+#define MZ_TRUE (1)
+
+// An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message.
+#ifdef _MSC_VER
+   #define MZ_MACRO_END while (0, 0)
+#else
+   #define MZ_MACRO_END while (0)
+#endif
+
+// ------------------- Low-level Decompression API Definitions
+
+// Decompression flags used by tinfl_decompress().
+// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream.
+// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input.
+// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB).
+// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes.
+enum
+{
+  TINFL_FLAG_PARSE_ZLIB_HEADER = 1,
+  TINFL_FLAG_HAS_MORE_INPUT = 2,
+  TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
+  TINFL_FLAG_COMPUTE_ADLER32 = 8
+};
+
+// High level decompression functions:
+// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc().
+// On entry:
+//  pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress.
+// On return:
+//  Function returns a pointer to the decompressed data, or NULL on failure.
+//  *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data.
+//  The caller must call mz_free() on the returned block when it's no longer needed.
+void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
+
+// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory.
+// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success.
+#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
+size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
+
+// tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer.
+// Returns 1 on success or 0 on failure.
+typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
+int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
+
+struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor;
+
+// Max size of LZ dictionary.
+#define TINFL_LZ_DICT_SIZE 32768
+
+// Return status.
+typedef enum
+{
+  TINFL_STATUS_BAD_PARAM = -3,
+  TINFL_STATUS_ADLER32_MISMATCH = -2,
+  TINFL_STATUS_FAILED = -1,
+  TINFL_STATUS_DONE = 0,
+  TINFL_STATUS_NEEDS_MORE_INPUT = 1,
+  TINFL_STATUS_HAS_MORE_OUTPUT = 2
+} tinfl_status;
+
+// Initializes the decompressor to its initial state.
+#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END
+#define tinfl_get_adler32(r) (r)->m_check_adler32
+
+// Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability.
+// This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output.
+tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags);
+
+// Internal/private bits follow.
+enum
+{
+  TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19,
+  TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
+};
+
+typedef struct
+{
+  mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0];
+  mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
+} tinfl_huff_table;
+
+#if MINIZ_HAS_64BIT_REGISTERS
+  #define TINFL_USE_64BIT_BITBUF 1
+#endif
+
+#if TINFL_USE_64BIT_BITBUF
+  typedef mz_uint64 tinfl_bit_buf_t;
+  #define TINFL_BITBUF_SIZE (64)
+#else
+  typedef mz_uint32 tinfl_bit_buf_t;
+  #define TINFL_BITBUF_SIZE (32)
+#endif
+
+struct tinfl_decompressor_tag
+{
+  mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES];
+  tinfl_bit_buf_t m_bit_buf;
+  size_t m_dist_from_out_buf_start;
+  tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES];
+  mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
+};
+
+// ------------------- Low-level Compression API Definitions
+
+// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently).
+#define TDEFL_LESS_MEMORY 0
+
+// tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search):
+// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression).
+enum
+{
+  TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF
+};
+
+// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data.
+// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers).
+// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing.
+// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory).
+// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1)
+// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled.
+// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables.
+// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks.
+// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK).
+enum
+{
+  TDEFL_WRITE_ZLIB_HEADER             = 0x01000,
+  TDEFL_COMPUTE_ADLER32               = 0x02000,
+  TDEFL_GREEDY_PARSING_FLAG           = 0x04000,
+  TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
+  TDEFL_RLE_MATCHES                   = 0x10000,
+  TDEFL_FILTER_MATCHES                = 0x20000,
+  TDEFL_FORCE_ALL_STATIC_BLOCKS       = 0x40000,
+  TDEFL_FORCE_ALL_RAW_BLOCKS          = 0x80000
+};
+
+// High level compression functions:
+// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc().
+// On entry:
+//  pSrc_buf, src_buf_len: Pointer and size of source block to compress.
+//  flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression.
+// On return:
+//  Function returns a pointer to the compressed data, or NULL on failure.
+//  *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data.
+//  The caller must free() the returned block when it's no longer needed.
+void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
+
+// tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory.
+// Returns 0 on failure.
+size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
+
+// Compresses an image to a compressed PNG file in memory.
+// On entry:
+//  pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. 
+//  The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory.
+//  level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL
+//  If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps).
+// On return:
+//  Function returns a pointer to the compressed data, or NULL on failure.
+//  *pLen_out will be set to the size of the PNG image file.
+//  The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed.
+void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip);
+void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out);
+
+// Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time.
+typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
+
+// tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally.
+mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
+
+enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 };
+
+// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes).
+#if TDEFL_LESS_MEMORY
+enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
+#else
+enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
+#endif
+
+// The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions.
+typedef enum
+{
+  TDEFL_STATUS_BAD_PARAM = -2,
+  TDEFL_STATUS_PUT_BUF_FAILED = -1,
+  TDEFL_STATUS_OKAY = 0,
+  TDEFL_STATUS_DONE = 1,
+} tdefl_status;
+
+// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums
+typedef enum
+{
+  TDEFL_NO_FLUSH = 0,
+  TDEFL_SYNC_FLUSH = 2,
+  TDEFL_FULL_FLUSH = 3,
+  TDEFL_FINISH = 4
+} tdefl_flush;
+
+// tdefl's compression state structure.
+typedef struct
+{
+  tdefl_put_buf_func_ptr m_pPut_buf_func;
+  void *m_pPut_buf_user;
+  mz_uint m_flags, m_max_probes[2];
+  int m_greedy_parsing;
+  mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
+  mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
+  mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer;
+  mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish;
+  tdefl_status m_prev_return_status;
+  const void *m_pIn_buf;
+  void *m_pOut_buf;
+  size_t *m_pIn_buf_size, *m_pOut_buf_size;
+  tdefl_flush m_flush;
+  const mz_uint8 *m_pSrc;
+  size_t m_src_buf_left, m_out_buf_ofs;
+  mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
+  mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+  mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+  mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+  mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
+  mz_uint16 m_next[TDEFL_LZ_DICT_SIZE];
+  mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE];
+  mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE];
+} tdefl_compressor;
+
+// Initializes the compressor.
+// There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory.
+// pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression.
+// If pBut_buf_func is NULL the user should always call the tdefl_compress() API.
+// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.)
+tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
+
+// Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible.
+tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush);
+
+// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr.
+// tdefl_compress_buffer() always consumes the entire input buffer.
+tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush);
+
+tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d);
+mz_uint32 tdefl_get_adler32(tdefl_compressor *d);
+
+// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros.
+#ifndef MINIZ_NO_ZLIB_APIS
+// Create tdefl_compress() flags given zlib-style compression parameters.
+// level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files)
+// window_bits may be -15 (raw deflate) or 15 (zlib)
+// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED
+mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy);
+#endif // #ifndef MINIZ_NO_ZLIB_APIS
+
+} // namespace buminiz
+
+#endif // MINIZ_HEADER_INCLUDED
+
+// ------------------- End of Header: Implementation follows. (If you only want the header, define MINIZ_HEADER_FILE_ONLY.)
+
+#ifndef MINIZ_HEADER_FILE_ONLY
+
+#include <string.h>
+#include <assert.h>
+
+namespace buminiz {
+
+typedef unsigned char mz_validate_uint16[sizeof(mz_uint16)==2 ? 1 : -1];
+typedef unsigned char mz_validate_uint32[sizeof(mz_uint32)==4 ? 1 : -1];
+typedef unsigned char mz_validate_uint64[sizeof(mz_uint64)==8 ? 1 : -1];
+
+#define MZ_ASSERT(x) assert(x)
+
+#ifdef MINIZ_NO_MALLOC
+  #define MZ_MALLOC(x) NULL
+  #define MZ_FREE(x) (void)x, ((void)0)
+  #define MZ_REALLOC(p, x) NULL
+#else
+  #define MZ_MALLOC(x) malloc(x)
+  #define MZ_FREE(x) free(x)
+  #define MZ_REALLOC(p, x) realloc(p, x)
+#endif
+
+#define MZ_MAX(a,b) (((a)>(b))?(a):(b))
+#define MZ_MIN(a,b) (((a)<(b))?(a):(b))
+#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj))
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+  #define MZ_READ_LE16(p) *((const mz_uint16 *)(p))
+  #define MZ_READ_LE32(p) *((const mz_uint32 *)(p))
+#else
+  #define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U))
+  #define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U))
+#endif
+
+#ifdef _MSC_VER
+  #define MZ_FORCEINLINE __forceinline
+#elif defined(__GNUC__)
+  #define MZ_FORCEINLINE inline __attribute__((__always_inline__))
+#else
+  #define MZ_FORCEINLINE inline
+#endif
+
+// ------------------- zlib-style API's
+
+mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len)
+{
+  mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); size_t block_len = buf_len % 5552;
+  if (!ptr) return MZ_ADLER32_INIT;
+  while (buf_len) {
+    for (i = 0; i + 7 < block_len; i += 8, ptr += 8) {
+      s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1;
+      s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1;
+    }
+    for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1;
+    s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552;
+  }
+  return (s2 << 16) + s1;
+}
+
+// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/
+mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len)
+{
+  static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
+    0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c };
+  mz_uint32 crcu32 = (mz_uint32)crc;
+  if (!ptr) return MZ_CRC32_INIT;
+  crcu32 = ~crcu32; while (buf_len--) { mz_uint8 b = *ptr++; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; }
+  return ~crcu32;
+}
+
+void mz_free(void *p)
+{
+  MZ_FREE(p);
+}
+
+#ifndef MINIZ_NO_ZLIB_APIS
+
+static void *def_alloc_func(void *opaque, size_t items, size_t size) { (void)opaque, (void)items, (void)size; return MZ_MALLOC(items * size); }
+static void def_free_func(void *opaque, void *address) { (void)opaque, (void)address; MZ_FREE(address); }
+//static void *def_realloc_func(void *opaque, void *address, size_t items, size_t size) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC(address, items * size); }
+
+const char *mz_version(void)
+{
+  return MZ_VERSION;
+}
+
+int mz_deflateInit(mz_streamp pStream, int level)
+{
+  return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY);
+}
+
+int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy)
+{
+  tdefl_compressor *pComp;
+  mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy);
+
+  if (!pStream) return MZ_STREAM_ERROR;
+  if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) return MZ_PARAM_ERROR;
+
+  pStream->data_type = 0;
+  pStream->adler = MZ_ADLER32_INIT;
+  pStream->msg = NULL;
+  pStream->reserved = 0;
+  pStream->total_in = 0;
+  pStream->total_out = 0;
+  if (!pStream->zalloc) pStream->zalloc = def_alloc_func;
+  if (!pStream->zfree) pStream->zfree = def_free_func;
+
+  pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor));
+  if (!pComp)
+    return MZ_MEM_ERROR;
+
+  pStream->state = (struct mz_internal_state *)pComp;
+
+  if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY)
+  {
+    mz_deflateEnd(pStream);
+    return MZ_PARAM_ERROR;
+  }
+
+  return MZ_OK;
+}
+
+int mz_deflateReset(mz_streamp pStream)
+{
+  if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) return MZ_STREAM_ERROR;
+  pStream->total_in = pStream->total_out = 0;
+  tdefl_init((tdefl_compressor*)pStream->state, NULL, NULL, ((tdefl_compressor*)pStream->state)->m_flags);
+  return MZ_OK;
+}
+
+int mz_deflate(mz_streamp pStream, int flush)
+{
+  size_t in_bytes, out_bytes;
+  mz_ulong orig_total_in, orig_total_out;
+  int mz_status = MZ_OK;
+
+  if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) return MZ_STREAM_ERROR;
+  if (!pStream->avail_out) return MZ_BUF_ERROR;
+
+  if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH;
+
+  if (((tdefl_compressor*)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE)
+    return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR;
+
+  orig_total_in = pStream->total_in; orig_total_out = pStream->total_out;
+  for ( ; ; )
+  {
+    tdefl_status defl_status;
+    in_bytes = pStream->avail_in; out_bytes = pStream->avail_out;
+
+    defl_status = tdefl_compress((tdefl_compressor*)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush);
+    pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes;
+    pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32((tdefl_compressor*)pStream->state);
+
+    pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes;
+    pStream->total_out += (mz_uint)out_bytes;
+
+    if (defl_status < 0)
+    {
+      mz_status = MZ_STREAM_ERROR;
+      break;
+    }
+    else if (defl_status == TDEFL_STATUS_DONE)
+    {
+      mz_status = MZ_STREAM_END;
+      break;
+    }
+    else if (!pStream->avail_out)
+      break;
+    else if ((!pStream->avail_in) && (flush != MZ_FINISH))
+    {
+      if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out))
+        break;
+      return MZ_BUF_ERROR; // Can't make forward progress without some input.
+    }
+  }
+  return mz_status;
+}
+
+int mz_deflateEnd(mz_streamp pStream)
+{
+  if (!pStream) return MZ_STREAM_ERROR;
+  if (pStream->state)
+  {
+    pStream->zfree(pStream->opaque, pStream->state);
+    pStream->state = NULL;
+  }
+  return MZ_OK;
+}
+
+mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len)
+{
+  (void)pStream;
+  // This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.)
+  mz_uint64 a = 128ULL + (source_len * 110ULL) / 100ULL;
+  mz_uint64 b = 128ULL + (mz_uint64)source_len + ((source_len / (31 * 1024)) + 1ULL) * 5ULL;
+  
+  mz_uint64 t = MZ_MAX(a, b);
+  if (((mz_ulong)t) != t)
+     t = (mz_ulong)(-1);
+
+  return (mz_ulong)t;
+}
+
+int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level)
+{
+  int status;
+  mz_stream stream;
+  memset(&stream, 0, sizeof(stream));
+
+  // In case mz_ulong is 64-bits (argh I hate longs).
+  if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR;
+
+  stream.next_in = pSource;
+  stream.avail_in = (mz_uint32)source_len;
+  stream.next_out = pDest;
+  stream.avail_out = (mz_uint32)*pDest_len;
+
+  status = mz_deflateInit(&stream, level);
+  if (status != MZ_OK) return status;
+
+  status = mz_deflate(&stream, MZ_FINISH);
+  if (status != MZ_STREAM_END)
+  {
+    mz_deflateEnd(&stream);
+    return (status == MZ_OK) ? MZ_BUF_ERROR : status;
+  }
+
+  *pDest_len = stream.total_out;
+  return mz_deflateEnd(&stream);
+}
+
+int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len)
+{
+  return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION);
+}
+
+mz_ulong mz_compressBound(mz_ulong source_len)
+{
+  return mz_deflateBound(NULL, source_len);
+}
+
+typedef struct
+{
+  tinfl_decompressor m_decomp;
+  mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits;
+  mz_uint8 m_dict[TINFL_LZ_DICT_SIZE];
+  tinfl_status m_last_status;
+} inflate_state;
+
+int mz_inflateInit2(mz_streamp pStream, int window_bits)
+{
+  inflate_state *pDecomp;
+  if (!pStream) return MZ_STREAM_ERROR;
+  if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) return MZ_PARAM_ERROR;
+
+  pStream->data_type = 0;
+  pStream->adler = 0;
+  pStream->msg = NULL;
+  pStream->total_in = 0;
+  pStream->total_out = 0;
+  pStream->reserved = 0;
+  if (!pStream->zalloc) pStream->zalloc = def_alloc_func;
+  if (!pStream->zfree) pStream->zfree = def_free_func;
+
+  pDecomp = (inflate_state*)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state));
+  if (!pDecomp) return MZ_MEM_ERROR;
+
+  pStream->state = (struct mz_internal_state *)pDecomp;
+
+  tinfl_init(&pDecomp->m_decomp);
+  pDecomp->m_dict_ofs = 0;
+  pDecomp->m_dict_avail = 0;
+  pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT;
+  pDecomp->m_first_call = 1;
+  pDecomp->m_has_flushed = 0;
+  pDecomp->m_window_bits = window_bits;
+
+  return MZ_OK;
+}
+
+int mz_inflateInit(mz_streamp pStream)
+{
+   return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS);
+}
+
+int mz_inflate2(mz_streamp pStream, int flush, int adler32_checking)
+{
+  inflate_state* pState;
+  mz_uint n, first_call, decomp_flags = adler32_checking ? TINFL_FLAG_COMPUTE_ADLER32 : 0;
+  size_t in_bytes, out_bytes, orig_avail_in;
+  tinfl_status status;
+
+  if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR;
+  if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH;
+  if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR;
+
+  pState = (inflate_state*)pStream->state;
+  if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER;
+  orig_avail_in = pStream->avail_in;
+
+  first_call = pState->m_first_call; pState->m_first_call = 0;
+  if (pState->m_last_status < 0) return MZ_DATA_ERROR;
+
+  if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR;
+  pState->m_has_flushed |= (flush == MZ_FINISH);
+
+  if ((flush == MZ_FINISH) && (first_call))
+  {
+    // MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file.
+    decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
+    in_bytes = pStream->avail_in; out_bytes = pStream->avail_out;
+    status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags);
+    pState->m_last_status = status;
+    pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes;
+    pStream->adler = tinfl_get_adler32(&pState->m_decomp);
+    pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes;
+
+    if (status < 0)
+      return MZ_DATA_ERROR;
+    else if (status != TINFL_STATUS_DONE)
+    {
+      pState->m_last_status = TINFL_STATUS_FAILED;
+      return MZ_BUF_ERROR;
+    }
+    return MZ_STREAM_END;
+  }
+  // flush != MZ_FINISH then we must assume there's more input.
+  if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT;
+
+  if (pState->m_dict_avail)
+  {
+    n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
+    memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
+    pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n;
+    pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
+    return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK;
+  }
+
+  for ( ; ; )
+  {
+    in_bytes = pStream->avail_in;
+    out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs;
+
+    status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags);
+    pState->m_last_status = status;
+
+    pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes;
+    pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp);
+
+    pState->m_dict_avail = (mz_uint)out_bytes;
+
+    n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
+    memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
+    pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n;
+    pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
+
+    if (status < 0)
+       return MZ_DATA_ERROR; // Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well).
+    else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in))
+      return MZ_BUF_ERROR; // Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH.
+    else if (flush == MZ_FINISH)
+    {
+       // The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH.
+       if (status == TINFL_STATUS_DONE)
+          return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END;
+       // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong.
+       else if (!pStream->avail_out)
+          return MZ_BUF_ERROR;
+    }
+    else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail))
+      break;
+  }
+
+  return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK;
+}
+
+int mz_inflate(mz_streamp pStream, int flush)
+{
+    return mz_inflate2(pStream, flush, MZ_TRUE);
+}
+
+int mz_inflateEnd(mz_streamp pStream)
+{
+  if (!pStream)
+    return MZ_STREAM_ERROR;
+  if (pStream->state)
+  {
+    pStream->zfree(pStream->opaque, pStream->state);
+    pStream->state = NULL;
+  }
+  return MZ_OK;
+}
+
+int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len)
+{
+  mz_stream stream;
+  int status;
+  memset(&stream, 0, sizeof(stream));
+
+  // In case mz_ulong is 64-bits (argh I hate longs).
+  if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR;
+
+  stream.next_in = pSource;
+  stream.avail_in = (mz_uint32)source_len;
+  stream.next_out = pDest;
+  stream.avail_out = (mz_uint32)*pDest_len;
+
+  status = mz_inflateInit(&stream);
+  if (status != MZ_OK)
+    return status;
+
+  status = mz_inflate(&stream, MZ_FINISH);
+  if (status != MZ_STREAM_END)
+  {
+    mz_inflateEnd(&stream);
+    return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status;
+  }
+  *pDest_len = stream.total_out;
+
+  return mz_inflateEnd(&stream);
+}
+
+const char *mz_error(int err)
+{
+  static struct { int m_err; const char *m_pDesc; } s_error_descs[] =
+  {
+    { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" },
+    { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" }
+  };
+  mz_uint i; for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc;
+  return NULL;
+}
+
+#endif //MINIZ_NO_ZLIB_APIS
+
+// ------------------- Low-level Decompression (completely independent from all compression API's)
+
+#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l)
+#define TINFL_MEMSET(p, c, l) memset(p, c, l)
+
+#define TINFL_CR_BEGIN switch(r->m_state) { case 0:
+#define TINFL_CR_RETURN(state_index, result) do { status = result; r->m_state = state_index; goto common_exit; case state_index:; } MZ_MACRO_END
+#define TINFL_CR_RETURN_FOREVER(state_index, result) do { for ( ; ; ) { TINFL_CR_RETURN(state_index, result); } } MZ_MACRO_END
+#define TINFL_CR_FINISH }
+
+// TODO: If the caller has indicated that there's no more input, and we attempt to read beyond the input buf, then something is wrong with the input because the inflator never
+// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of the stream with 0's in this scenario.
+#define TINFL_GET_BYTE(state_index, c) do { \
+  if (pIn_buf_cur >= pIn_buf_end) { \
+    for ( ; ; ) { \
+      if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \
+        TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \
+        if (pIn_buf_cur < pIn_buf_end) { \
+          c = *pIn_buf_cur++; \
+          break; \
+        } \
+      } else { \
+        c = 0; \
+        break; \
+      } \
+    } \
+  } else c = *pIn_buf_cur++; } MZ_MACRO_END
+
+#define TINFL_NEED_BITS(state_index, n) do { mz_uint c; TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; } while (num_bits < (mz_uint)(n))
+#define TINFL_SKIP_BITS(state_index, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END
+#define TINFL_GET_BITS(state_index, b, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } b = bit_buf & ((1 << (n)) - 1); bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END
+
+// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2.
+// It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a
+// Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the
+// bit buffer contains >=15 bits (deflate's max. Huffman code size).
+#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \
+  do { \
+    temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \
+    if (temp >= 0) { \
+      code_len = temp >> 9; \
+      if ((code_len) && (num_bits >= code_len)) \
+      break; \
+    } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \
+       code_len = TINFL_FAST_LOOKUP_BITS; \
+       do { \
+          temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \
+       } while ((temp < 0) && (num_bits >= (code_len + 1))); if (temp >= 0) break; \
+    } TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; \
+  } while (num_bits < 15);
+
+// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read
+// beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully
+// decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32.
+// The slow path is only executed at the very end of the input buffer.
+#define TINFL_HUFF_DECODE(state_index, sym, pHuff) do { \
+  int temp; mz_uint code_len, c; \
+  if (num_bits < 15) { \
+    if ((pIn_buf_end - pIn_buf_cur) < 2) { \
+       TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \
+    } else { \
+       bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); pIn_buf_cur += 2; num_bits += 16; \
+    } \
+  } \
+  if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \
+    code_len = temp >> 9, temp &= 511; \
+  else { \
+    code_len = TINFL_FAST_LOOKUP_BITS; do { temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; } while (temp < 0); \
+  } sym = temp; bit_buf >>= code_len; num_bits -= code_len; } MZ_MACRO_END
+
+tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags)
+{
+  static const int s_length_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 };
+  static const int s_length_extra[31]= { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
+  static const int s_dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
+  static const int s_dist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+  static const mz_uint8 s_length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
+  static const int s_min_table_sizes[3] = { 257, 1, 4 };
+
+  tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf;
+  const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size;
+  mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size;
+  size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start;
+
+  // Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter).
+  if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; }
+
+  num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start;
+  TINFL_CR_BEGIN
+
+  bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1;
+  if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER)
+  {
+    TINFL_GET_BYTE(1, r->m_zhdr0); TINFL_GET_BYTE(2, r->m_zhdr1);
+    counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8));
+    if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1ULL << (8U + (r->m_zhdr0 >> 4)))));
+    if (counter) { TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); }
+  }
+
+  do
+  {
+    TINFL_GET_BITS(3, r->m_final, 3); r->m_type = r->m_final >> 1;
+    if (r->m_type == 0)
+    {
+      TINFL_SKIP_BITS(5, num_bits & 7);
+      for (counter = 0; counter < 4; ++counter) { if (num_bits) TINFL_GET_BITS(6, r->m_raw_header[counter], 8); else TINFL_GET_BYTE(7, r->m_raw_header[counter]); }
+      if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); }
+      while ((counter) && (num_bits))
+      {
+        TINFL_GET_BITS(51, dist, 8);
+        while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); }
+        *pOut_buf_cur++ = (mz_uint8)dist;
+        counter--;
+      }
+      while (counter)
+      {
+        size_t n; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); }
+        while (pIn_buf_cur >= pIn_buf_end)
+        {
+          if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT)
+          {
+            TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT);
+          }
+          else
+          {
+            TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED);
+          }
+        }
+        n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter);
+        TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n;
+      }
+    }
+    else if (r->m_type == 3)
+    {
+      TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED);
+    }
+    else
+    {
+      if (r->m_type == 1)
+      {
+        mz_uint8 *p = r->m_tables[0].m_code_size; mz_uint i;
+        r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32);
+        for ( i = 0; i <= 143; ++i) {
+          *p++ = 8;
+        }
+        for ( ; i <= 255; ++i) {
+          *p++ = 9;
+        }
+        for ( ; i <= 279; ++i) {
+          *p++ = 7;
+        }
+        for ( ; i <= 287; ++i) {
+          *p++ = 8;
+        }
+      }
+      else
+      {
+        for (counter = 0; counter < 3; counter++) { TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; }
+        MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; TINFL_GET_BITS(14, s, 3); r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; }
+        r->m_table_sizes[2] = 19;
+      }
+      for ( ; (int)r->m_type >= 0; r->m_type--)
+      {
+        int tree_next, tree_cur; tinfl_huff_table *pTable;
+        mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; MZ_CLEAR_OBJ(total_syms); MZ_CLEAR_OBJ(pTable->m_look_up); MZ_CLEAR_OBJ(pTable->m_tree);
+        for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++;
+        used_syms = 0, total = 0; next_code[0] = next_code[1] = 0;
+        for (i = 1; i <= 15; ++i) { used_syms += total_syms[i]; next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); }
+        if ((65536 != total) && (used_syms > 1))
+        {
+          TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED);
+        }
+        for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index)
+        {
+          mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; if (!code_size) continue;
+          cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1);
+          if (code_size <= TINFL_FAST_LOOKUP_BITS) { mz_int16 k = (mz_int16)((code_size << 9) | sym_index); while (rev_code < TINFL_FAST_LOOKUP_SIZE) { pTable->m_look_up[rev_code] = k; rev_code += (1 << code_size); } continue; }
+          if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) { pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; }
+          rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1);
+          for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--)
+          {
+            tree_cur -= ((rev_code >>= 1) & 1);
+            if (!pTable->m_tree[-tree_cur - 1]) { pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } else tree_cur = pTable->m_tree[-tree_cur - 1];
+          }
+          tree_cur -= ((rev_code >>= 1) & 1); pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index;
+        }
+        if (r->m_type == 2)
+        {
+          for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]); )
+          {
+            mz_uint s; TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); if (dist < 16) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; }
+            if ((dist == 16) && (!counter))
+            {
+              TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED);
+            }
+            num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16];
+            TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); counter += s;
+          }
+          if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter)
+          {
+            TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED);
+          }
+          TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]);
+        }
+      }
+      for ( ; ; )
+      {
+        mz_uint8 *pSrc;
+        for ( ; ; )
+        {
+          if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2))
+          {
+            TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]);
+            if (counter >= 256)
+              break;
+            while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); }
+            *pOut_buf_cur++ = (mz_uint8)counter;
+          }
+          else
+          {
+            int sym2; mz_uint code_len;
+#if TINFL_USE_64BIT_BITBUF
+            if (num_bits < 30) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); pIn_buf_cur += 4; num_bits += 32; }
+#else
+            if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; }
+#endif
+            if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)
+              code_len = sym2 >> 9;
+            else
+            {
+              code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0);
+            }
+            counter = sym2; bit_buf >>= code_len; num_bits -= code_len;
+            if (counter & 256)
+              break;
+
+#if !TINFL_USE_64BIT_BITBUF
+            if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; }
+#endif
+            if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)
+              code_len = sym2 >> 9;
+            else
+            {
+              code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0);
+            }
+            bit_buf >>= code_len; num_bits -= code_len;
+
+            pOut_buf_cur[0] = (mz_uint8)counter;
+            if (sym2 & 256)
+            {
+              pOut_buf_cur++;
+              counter = sym2;
+              break;
+            }
+            pOut_buf_cur[1] = (mz_uint8)sym2;
+            pOut_buf_cur += 2;
+          }
+        }
+        if ((counter &= 511) == 256) break;
+
+        num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257];
+        if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(25, extra_bits, num_extra); counter += extra_bits; }
+
+        TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]);
+        num_extra = s_dist_extra[dist]; dist = s_dist_base[dist];
+        if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(27, extra_bits, num_extra); dist += extra_bits; }
+
+        dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start;
+        if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))
+        {
+          TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED);
+        }
+
+        pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask);
+
+        if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end)
+        {
+          while (counter--)
+          {
+            while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); }
+            *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask];
+          }
+          continue;
+        }
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+        else if ((counter >= 9) && (counter <= dist))
+        {
+          const mz_uint8 *pSrc_end = pSrc + (counter & ~7);
+          do
+          {
+            ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0];
+            ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1];
+            pOut_buf_cur += 8;
+          } while ((pSrc += 8) < pSrc_end);
+          if ((counter &= 7) < 3)
+          {
+            if (counter)
+            {
+              pOut_buf_cur[0] = pSrc[0];
+              if (counter > 1)
+                pOut_buf_cur[1] = pSrc[1];
+              pOut_buf_cur += counter;
+            }
+            continue;
+          }
+        }
+#endif
+        do
+        {
+          pOut_buf_cur[0] = pSrc[0];
+          pOut_buf_cur[1] = pSrc[1];
+          pOut_buf_cur[2] = pSrc[2];
+          pOut_buf_cur += 3; pSrc += 3;
+        } while ((int)(counter -= 3) > 2);
+        if ((int)counter > 0)
+        {
+          pOut_buf_cur[0] = pSrc[0];
+          if ((int)counter > 1)
+            pOut_buf_cur[1] = pSrc[1];
+          pOut_buf_cur += counter;
+        }
+      }
+    }
+  } while (!(r->m_final & 1));
+  if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER)
+  {
+    TINFL_SKIP_BITS(32, num_bits & 7); for (counter = 0; counter < 4; ++counter) { mz_uint s; if (num_bits) TINFL_GET_BITS(41, s, 8); else TINFL_GET_BYTE(42, s); r->m_z_adler32 = (r->m_z_adler32 << 8) | s; }
+  }
+  TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE);
+  TINFL_CR_FINISH
+
+common_exit:
+  r->m_num_bits = num_bits; r->m_bit_buf = bit_buf; r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start;
+  *pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next;
+  //if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0))
+  if ((decomp_flags & TINFL_FLAG_COMPUTE_ADLER32) && (status >= 0))
+  {
+    const mz_uint8 *ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size;
+    mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552;
+    while (buf_len)
+    {
+      for (i = 0; i + 7 < block_len; i += 8, ptr += 8)
+      {
+        s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1;
+        s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1;
+      }
+      for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1;
+      s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552;
+    }
+    r->m_check_adler32 = (s2 << 16) + s1; 
+    if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) 
+        status = TINFL_STATUS_ADLER32_MISMATCH;
+  }
+  return status;
+}
+
+// Higher level helper functions.
+void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags)
+{
+  tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0;
+  *pOut_len = 0;
+  tinfl_init(&decomp);
+  for ( ; ; )
+  {
+    size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity;
+    tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8*)pBuf, pBuf ? (mz_uint8*)pBuf + *pOut_len : NULL, &dst_buf_size,
+      (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
+    if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT))
+    {
+      MZ_FREE(pBuf); *pOut_len = 0; return NULL;
+    }
+    src_buf_ofs += src_buf_size;
+    *pOut_len += dst_buf_size;
+    if (status == TINFL_STATUS_DONE) break;
+    new_out_buf_capacity = out_buf_capacity * 2; if (new_out_buf_capacity < 128) new_out_buf_capacity = 128;
+    pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity);
+    if (!pNew_buf)
+    {
+      MZ_FREE(pBuf); *pOut_len = 0; return NULL;
+    }
+    pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity;
+  }
+  return pBuf;
+}
+
+size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags)
+{
+  tinfl_decompressor decomp; tinfl_status status; tinfl_init(&decomp);
+  status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf, &src_buf_len, (mz_uint8*)pOut_buf, (mz_uint8*)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
+  return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len;
+}
+
+int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
+{
+  int result = 0;
+  tinfl_decompressor decomp;
+  mz_uint8 *pDict = (mz_uint8*)MZ_MALLOC(TINFL_LZ_DICT_SIZE); size_t in_buf_ofs = 0, dict_ofs = 0;
+  if (!pDict)
+    return TINFL_STATUS_FAILED;
+  tinfl_init(&decomp);
+  for ( ; ; )
+  {
+    size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs;
+    tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size,
+      (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)));
+    in_buf_ofs += in_buf_size;
+    if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user)))
+      break;
+    if (status != TINFL_STATUS_HAS_MORE_OUTPUT)
+    {
+      result = (status == TINFL_STATUS_DONE);
+      break;
+    }
+    dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1);
+  }
+  MZ_FREE(pDict);
+  *pIn_buf_size = in_buf_ofs;
+  return result;
+}
+
+// ------------------- Low-level Compression (independent from all decompression API's)
+
+// Purposely making these tables static for faster init and thread safety.
+static const mz_uint16 s_tdefl_len_sym[256] = {
+  257,258,259,260,261,262,263,264,265,265,266,266,267,267,268,268,269,269,269,269,270,270,270,270,271,271,271,271,272,272,272,272,
+  273,273,273,273,273,273,273,273,274,274,274,274,274,274,274,274,275,275,275,275,275,275,275,275,276,276,276,276,276,276,276,276,
+  277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,
+  279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,
+  281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,
+  282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,
+  283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,
+  284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,285 };
+
+static const mz_uint8 s_tdefl_len_extra[256] = {
+  0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0 };
+
+static const mz_uint8 s_tdefl_small_dist_sym[512] = {
+  0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,
+  11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,
+  13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,
+  14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,
+  14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+  15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+  17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+  17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+  17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17 };
+
+static const mz_uint8 s_tdefl_small_dist_extra[512] = {
+  0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7 };
+
+static const mz_uint8 s_tdefl_large_dist_sym[128] = {
+  0,0,18,19,20,20,21,21,22,22,22,22,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,26,26,26,26,
+  26,26,26,26,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,
+  28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 };
+
+static const mz_uint8 s_tdefl_large_dist_extra[128] = {
+  0,0,8,8,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,
+  12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+  13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13 };
+
+// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values.
+typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq;
+static tdefl_sym_freq* tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq* pSyms0, tdefl_sym_freq* pSyms1)
+{
+  mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_OBJ(hist);
+  for (i = 0; i < num_syms; i++) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; }
+  while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--;
+  for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)
+  {
+    const mz_uint32* pHist = &hist[pass << 8];
+    mz_uint offsets[256], cur_ofs = 0;
+    for (i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; }
+    for (i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];
+    { tdefl_sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; }
+  }
+  return pCur_syms;
+}
+
+// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
+static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n)
+{
+  int root, leaf, next, avbl, used, dpth;
+  if (n==0) return; else if (n==1) { A[0].m_key = 1; return; }
+  A[0].m_key += A[1].m_key; root = 0; leaf = 2;
+  for (next=1; next < n-1; next++)
+  {
+    if (leaf>=n || A[root].m_key<A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = (mz_uint16)next; } else A[next].m_key = A[leaf++].m_key;
+    if (leaf>=n || (root<next && A[root].m_key<A[leaf].m_key)) { A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); A[root++].m_key = (mz_uint16)next; } else A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key);
+  }
+  A[n-2].m_key = 0; for (next=n-3; next>=0; next--) A[next].m_key = A[A[next].m_key].m_key+1;
+  avbl = 1; used = dpth = 0; root = n-2; next = n-1;
+  while (avbl>0)
+  {
+    while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; }
+    while (avbl>used) { A[next--].m_key = (mz_uint16)(dpth); avbl--; }
+    avbl = 2*used; dpth++; used = 0;
+  }
+}
+
+// Limits canonical Huffman code table's max code size.
+enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 };
+static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size)
+{
+  int i; mz_uint32 total = 0; if (code_list_len <= 1) return;
+  for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i];
+  for (i = max_code_size; i > 0; i--) total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i));
+  while (total != (1UL << max_code_size))
+  {
+    pNum_codes[max_code_size]--;
+    for (i = max_code_size - 1; i > 0; i--) if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; }
+    total--;
+  }
+}
+
+static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table)
+{
+  int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_OBJ(num_codes);
+  if (static_table)
+  {
+    for (i = 0; i < table_len; i++) num_codes[d->m_huff_code_sizes[table_num][i]]++;
+  }
+  else
+  {
+    tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms;
+    int num_used_syms = 0;
+    const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0];
+    for (i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; }
+
+    pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); tdefl_calculate_minimum_redundancy(pSyms, num_used_syms);
+
+    for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++;
+
+    tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit);
+
+    MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); MZ_CLEAR_OBJ(d->m_huff_codes[table_num]);
+    for (i = 1, j = num_used_syms; i <= code_size_limit; i++)
+      for (l = num_codes[i]; l > 0; l--) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i);
+  }
+
+  next_code[1] = 0; for (j = 0, i = 2; i <= code_size_limit; i++) next_code[i] = j = ((j + num_codes[i - 1]) << 1);
+
+  for (i = 0; i < table_len; i++)
+  {
+    mz_uint rev_code = 0, code, code_size; if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue;
+    code = next_code[code_size]++; for (l = code_size; l > 0; l--, code >>= 1) rev_code = (rev_code << 1) | (code & 1);
+    d->m_huff_codes[table_num][i] = (mz_uint16)rev_code;
+  }
+}
+
+#define TDEFL_PUT_BITS(b, l) do { \
+  mz_uint bits = b; mz_uint len = l; MZ_ASSERT(bits <= ((1U << len) - 1U)); \
+  d->m_bit_buffer |= (bits << d->m_bits_in); d->m_bits_in += len; \
+  while (d->m_bits_in >= 8) { \
+    if (d->m_pOutput_buf < d->m_pOutput_buf_end) \
+      *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \
+      d->m_bit_buffer >>= 8; \
+      d->m_bits_in -= 8; \
+  } \
+} MZ_MACRO_END
+
+#define TDEFL_RLE_PREV_CODE_SIZE() { if (rle_repeat_count) { \
+  if (rle_repeat_count < 3) { \
+    d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \
+    while (rle_repeat_count--) packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \
+  } else { \
+    d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); packed_code_sizes[num_packed_code_sizes++] = 16; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \
+} rle_repeat_count = 0; } }
+
+#define TDEFL_RLE_ZERO_CODE_SIZE() { if (rle_z_count) { \
+  if (rle_z_count < 3) { \
+    d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \
+  } else if (rle_z_count <= 10) { \
+    d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); packed_code_sizes[num_packed_code_sizes++] = 17; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \
+  } else { \
+    d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); packed_code_sizes[num_packed_code_sizes++] = 18; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \
+} rle_z_count = 0; } }
+
+static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
+
+static void tdefl_start_dynamic_block(tdefl_compressor *d)
+{
+  int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index;
+  mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF;
+
+  d->m_huff_count[0][256] = 1;
+
+  tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE);
+  tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE);
+
+  for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break;
+  for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break;
+
+  memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes);
+  memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes);
+  total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0;
+
+  memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2);
+  for (i = 0; i < total_code_sizes_to_pack; i++)
+  {
+    mz_uint8 code_size = code_sizes_to_pack[i];
+    if (!code_size)
+    {
+      TDEFL_RLE_PREV_CODE_SIZE();
+      if (++rle_z_count == 138) { TDEFL_RLE_ZERO_CODE_SIZE(); }
+    }
+    else
+    {
+      TDEFL_RLE_ZERO_CODE_SIZE();
+      if (code_size != prev_code_size)
+      {
+        TDEFL_RLE_PREV_CODE_SIZE();
+        d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); packed_code_sizes[num_packed_code_sizes++] = code_size;
+      }
+      else if (++rle_repeat_count == 6)
+      {
+        TDEFL_RLE_PREV_CODE_SIZE();
+      }
+    }
+    prev_code_size = code_size;
+  }
+  if (rle_repeat_count) { TDEFL_RLE_PREV_CODE_SIZE(); } else { TDEFL_RLE_ZERO_CODE_SIZE(); }
+
+  tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE);
+
+  TDEFL_PUT_BITS(2, 2);
+
+  TDEFL_PUT_BITS(num_lit_codes - 257, 5);
+  TDEFL_PUT_BITS(num_dist_codes - 1, 5);
+
+  for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) break;
+  num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); TDEFL_PUT_BITS(num_bit_lengths - 4, 4);
+  for (i = 0; (int)i < num_bit_lengths; i++) TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3);
+
+  for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes; )
+  {
+    mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2);
+    TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]);
+    if (code >= 16) TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]);
+  }
+}
+
+static void tdefl_start_static_block(tdefl_compressor *d)
+{
+  mz_uint i;
+  mz_uint8 *p = &d->m_huff_code_sizes[0][0];
+
+  for (i = 0; i <= 143; ++i) *p++ = 8;
+  for ( ; i <= 255; ++i) *p++ = 9;
+  for ( ; i <= 279; ++i) *p++ = 7;
+  for ( ; i <= 287; ++i) *p++ = 8;
+
+  memset(d->m_huff_code_sizes[1], 5, 32);
+
+  tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE);
+  tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE);
+
+  TDEFL_PUT_BITS(1, 2);
+}
+
+static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF };
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS
+static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d)
+{
+  mz_uint flags;
+  mz_uint8 *pLZ_codes;
+  mz_uint8 *pOutput_buf = d->m_pOutput_buf;
+  mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf;
+  mz_uint64 bit_buffer = d->m_bit_buffer;
+  mz_uint bits_in = d->m_bits_in;
+
+#define TDEFL_PUT_BITS_FAST(b, l) { bit_buffer |= (((mz_uint64)(b)) << bits_in); bits_in += (l); }
+
+  flags = 1;
+  for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1)
+  {
+    if (flags == 1)
+      flags = *pLZ_codes++ | 0x100;
+
+    if (flags & 1)
+    {
+      mz_uint s0, s1, n0, n1, sym, num_extra_bits;
+      mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1); pLZ_codes += 3;
+
+      MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+      TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+      TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]);
+
+      // This sequence coaxes MSVC into using cmov's vs. jmp's.
+      s0 = s_tdefl_small_dist_sym[match_dist & 511];
+      n0 = s_tdefl_small_dist_extra[match_dist & 511];
+      s1 = s_tdefl_large_dist_sym[match_dist >> 8];
+      n1 = s_tdefl_large_dist_extra[match_dist >> 8];
+      sym = (match_dist < 512) ? s0 : s1;
+      num_extra_bits = (match_dist < 512) ? n0 : n1;
+
+      MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
+      TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
+      TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
+    }
+    else
+    {
+      mz_uint lit = *pLZ_codes++;
+      MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+      TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+
+      if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end))
+      {
+        flags >>= 1;
+        lit = *pLZ_codes++;
+        MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+        TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+
+        if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end))
+        {
+          flags >>= 1;
+          lit = *pLZ_codes++;
+          MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+          TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+        }
+      }
+    }
+
+    if (pOutput_buf >= d->m_pOutput_buf_end)
+      return MZ_FALSE;
+
+    *(mz_uint64*)pOutput_buf = bit_buffer;
+    pOutput_buf += (bits_in >> 3);
+    bit_buffer >>= (bits_in & ~7);
+    bits_in &= 7;
+  }
+
+#undef TDEFL_PUT_BITS_FAST
+
+  d->m_pOutput_buf = pOutput_buf;
+  d->m_bits_in = 0;
+  d->m_bit_buffer = 0;
+
+  while (bits_in)
+  {
+    mz_uint32 n = MZ_MIN(bits_in, 16);
+    TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n);
+    bit_buffer >>= n;
+    bits_in -= n;
+  }
+
+  TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
+
+  return (d->m_pOutput_buf < d->m_pOutput_buf_end);
+}
+#else
+static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d)
+{
+  mz_uint flags;
+  mz_uint8 *pLZ_codes;
+
+  flags = 1;
+  for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1)
+  {
+    if (flags == 1)
+      flags = *pLZ_codes++ | 0x100;
+    if (flags & 1)
+    {
+      mz_uint sym, num_extra_bits;
+      mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3;
+
+      MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+      TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+      TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]);
+
+      if (match_dist < 512)
+      {
+        sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist];
+      }
+      else
+      {
+        sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8];
+      }
+      MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
+      TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
+      TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
+    }
+    else
+    {
+      mz_uint lit = *pLZ_codes++;
+      MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+      TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+    }
+  }
+
+  TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
+
+  return (d->m_pOutput_buf < d->m_pOutput_buf_end);
+}
+#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS
+
+static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block)
+{
+  if (static_block)
+    tdefl_start_static_block(d);
+  else
+    tdefl_start_dynamic_block(d);
+  return tdefl_compress_lz_codes(d);
+}
+
+static int tdefl_flush_block(tdefl_compressor *d, int flush)
+{
+  mz_uint saved_bit_buf, saved_bits_in;
+  mz_uint8 *pSaved_output_buf;
+  mz_bool comp_block_succeeded = MZ_FALSE;
+  int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size;
+  mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf;
+
+  d->m_pOutput_buf = pOutput_buf_start;
+  d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16;
+
+  MZ_ASSERT(!d->m_output_flush_remaining);
+  d->m_output_flush_ofs = 0;
+  d->m_output_flush_remaining = 0;
+
+  *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left);
+  d->m_pLZ_code_buf -= (d->m_num_flags_left == 8);
+
+  if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index))
+  {
+    TDEFL_PUT_BITS(0x78, 8); TDEFL_PUT_BITS(0x01, 8);
+  }
+
+  TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1);
+
+  pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in;
+
+  if (!use_raw_block)
+    comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48));
+
+  // If the block gets expanded, forget the current contents of the output buffer and send a raw block instead.
+  if ( ((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) &&
+       ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size) )
+  {
+    mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
+    TDEFL_PUT_BITS(0, 2);
+    if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); }
+    for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF)
+    {
+      TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16);
+    }
+    for (i = 0; i < d->m_total_lz_bytes; ++i)
+    {
+      TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8);
+    }
+  }
+  // Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes.
+  else if (!comp_block_succeeded)
+  {
+    d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
+    tdefl_compress_block(d, MZ_TRUE);
+  }
+
+  if (flush)
+  {
+    if (flush == TDEFL_FINISH)
+    {
+      if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); }
+      if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { mz_uint i, a = d->m_adler32; for (i = 0; i < 4; i++) { TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); a <<= 8; } }
+    }
+    else
+    {
+      mz_uint i, z = 0; TDEFL_PUT_BITS(0, 3); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, z ^= 0xFFFF) { TDEFL_PUT_BITS(z & 0xFFFF, 16); }
+    }
+  }
+
+  MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end);
+
+  memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
+  memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
+
+  d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++;
+
+  if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0)
+  {
+    if (d->m_pPut_buf_func)
+    {
+      *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
+      if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user))
+        return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED);
+    }
+    else if (pOutput_buf_start == d->m_output_buf)
+    {
+      int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs));
+      memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy);
+      d->m_out_buf_ofs += bytes_to_copy;
+      if ((n -= bytes_to_copy) != 0)
+      {
+        d->m_output_flush_ofs = bytes_to_copy;
+        d->m_output_flush_remaining = n;
+      }
+    }
+    else
+    {
+      d->m_out_buf_ofs += n;
+    }
+  }
+
+  return d->m_output_flush_remaining;
+}
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16*)(p)
+static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len)
+{
+  mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len;
+  mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
+  const mz_uint16 *s = (const mz_uint16*)(d->m_dict + pos), *p, *q;
+  mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD(s);
+  MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return;
+  for ( ; ; )
+  {
+    for ( ; ; )
+    {
+      if (--num_probes_left == 0) return;
+      #define TDEFL_PROBE \
+        next_probe_pos = d->m_next[probe_pos]; \
+        if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \
+        probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \
+        if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) break;
+      TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE;
+    }
+    if (!dist) {
+      break;
+    }
+    q = (const mz_uint16*)(d->m_dict + probe_pos);
+    if (TDEFL_READ_UNALIGNED_WORD(q) != s01) {
+      continue;
+    }
+    p = s; probe_len = 32;
+    do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
+                   (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) );
+    if (!probe_len)
+    {
+      *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, (mz_uint) TDEFL_MAX_MATCH_LEN); break;
+    }
+    else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len)
+    {
+      *pMatch_dist = dist; if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) break;
+      c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]);
+    }
+  }
+}
+#else
+static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len)
+{
+  mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len;
+  mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
+  const mz_uint8 *s = d->m_dict + pos, *p, *q;
+  mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1];
+  MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return;
+  for ( ; ; )
+  {
+    for ( ; ; )
+    {
+      if (--num_probes_left == 0) return;
+      #define TDEFL_PROBE \
+        next_probe_pos = d->m_next[probe_pos]; \
+        if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \
+        probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \
+        if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) break;
+      TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE;
+    }
+    if (!dist) break; p = s; q = d->m_dict + probe_pos; for (probe_len = 0; probe_len < max_match_len; probe_len++) if (*p++ != *q++) break;
+    if (probe_len > match_len)
+    {
+      *pMatch_dist = dist; if ((*pMatch_len = match_len = probe_len) == max_match_len) return;
+      c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1];
+    }
+  }
+}
+#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+static mz_bool tdefl_compress_fast(tdefl_compressor *d)
+{
+  // Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio.
+  mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left;
+  mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags;
+  mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
+
+  while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size)))
+  {
+    const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096;
+    mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
+    mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size);
+    d->m_src_buf_left -= num_bytes_to_process;
+    lookahead_size += num_bytes_to_process;
+
+    while (num_bytes_to_process)
+    {
+      mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process);
+      memcpy(d->m_dict + dst_pos, d->m_pSrc, n);
+      if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
+        memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos));
+      d->m_pSrc += n;
+      dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK;
+      num_bytes_to_process -= n;
+    }
+
+    dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size);
+    if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) break;
+
+    while (lookahead_size >= 4)
+    {
+      mz_uint cur_match_dist, cur_match_len = 1;
+      mz_uint8 *pCur_dict = d->m_dict + cur_pos;
+      mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF;
+      mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK;
+      mz_uint probe_pos = d->m_hash[hash];
+      d->m_hash[hash] = (mz_uint16)lookahead_pos;
+
+      if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((*(const mz_uint32 *)(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram))
+      {
+        const mz_uint16 *p = (const mz_uint16 *)pCur_dict;
+        const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos);
+        mz_uint32 probe_len = 32;
+        do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
+          (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) );
+        cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q);
+        if (!probe_len)
+          cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0;
+
+        if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U)))
+        {
+          cur_match_len = 1;
+          *pLZ_code_buf++ = (mz_uint8)first_trigram;
+          *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
+          d->m_huff_count[0][(mz_uint8)first_trigram]++;
+        }
+        else
+        {
+          mz_uint32 s0, s1;
+          cur_match_len = MZ_MIN(cur_match_len, lookahead_size);
+
+          MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE));
+
+          cur_match_dist--;
+
+          pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN);
+          *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist;
+          pLZ_code_buf += 3;
+          *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80);
+
+          s0 = s_tdefl_small_dist_sym[cur_match_dist & 511];
+          s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8];
+          d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++;
+
+          d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++;
+        }
+      }
+      else
+      {
+        *pLZ_code_buf++ = (mz_uint8)first_trigram;
+        *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
+        d->m_huff_count[0][(mz_uint8)first_trigram]++;
+      }
+
+      if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; }
+
+      total_lz_bytes += cur_match_len;
+      lookahead_pos += cur_match_len;
+      dict_size = MZ_MIN(dict_size + cur_match_len, (mz_uint) TDEFL_LZ_DICT_SIZE);
+      cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK;
+      MZ_ASSERT(lookahead_size >= cur_match_len);
+      lookahead_size -= cur_match_len;
+
+      if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8])
+      {
+        int n;
+        d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size;
+        d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left;
+        if ((n = tdefl_flush_block(d, 0)) != 0)
+          return (n < 0) ? MZ_FALSE : MZ_TRUE;
+        total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left;
+      }
+    }
+
+    while (lookahead_size)
+    {
+      mz_uint8 lit = d->m_dict[cur_pos];
+
+      total_lz_bytes++;
+      *pLZ_code_buf++ = lit;
+      *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
+      if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; }
+
+      d->m_huff_count[0][lit]++;
+
+      lookahead_pos++;
+      dict_size = MZ_MIN(dict_size + 1, (mz_uint) TDEFL_LZ_DICT_SIZE);
+      cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
+      lookahead_size--;
+
+      if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8])
+      {
+        int n;
+        d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size;
+        d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left;
+        if ((n = tdefl_flush_block(d, 0)) != 0)
+          return (n < 0) ? MZ_FALSE : MZ_TRUE;
+        total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left;
+      }
+    }
+  }
+
+  d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size;
+  d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left;
+  return MZ_TRUE;
+}
+#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+
+static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit)
+{
+  d->m_total_lz_bytes++;
+  *d->m_pLZ_code_buf++ = lit;
+  *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; }
+  d->m_huff_count[0][lit]++;
+}
+
+static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist)
+{
+  mz_uint32 s0, s1;
+
+  MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE));
+
+  d->m_total_lz_bytes += match_len;
+
+  d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN);
+
+  match_dist -= 1;
+  d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF);
+  d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); d->m_pLZ_code_buf += 3;
+
+  *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; }
+
+  s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127];
+  d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++;
+
+  if (match_len >= TDEFL_MIN_MATCH_LEN) d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++;
+}
+
+static mz_bool tdefl_compress_normal(tdefl_compressor *d)
+{
+  const mz_uint8 *pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left;
+  tdefl_flush flush = d->m_flush;
+
+  while ((src_buf_left) || ((flush) && (d->m_lookahead_size)))
+  {
+    mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos;
+    // Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN.
+    if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1))
+    {
+      mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2;
+      mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK];
+      mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size);
+      const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process;
+      src_buf_left -= num_bytes_to_process;
+      d->m_lookahead_size += num_bytes_to_process;
+      while (pSrc != pSrc_end)
+      {
+        mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
+        hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1);
+        d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos);
+        dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++;
+      }
+    }
+    else
+    {
+      while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN))
+      {
+        mz_uint8 c = *pSrc++;
+        mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
+        src_buf_left--;
+        d->m_dict[dst_pos] = c;
+        if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
+          d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
+        if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN)
+        {
+          mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2;
+          mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1);
+          d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos);
+        }
+      }
+    }
+    d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size);
+    if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN))
+      break;
+
+    // Simple lazy/greedy parsing state machine.
+    len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
+    if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS))
+    {
+      if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))
+      {
+        mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK];
+        cur_match_len = 0; while (cur_match_len < d->m_lookahead_size) { if (d->m_dict[cur_pos + cur_match_len] != c) break; cur_match_len++; }
+        if (cur_match_len < TDEFL_MIN_MATCH_LEN) cur_match_len = 0; else cur_match_dist = 1;
+      }
+    }
+    else
+    {
+      tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len);
+    }
+    if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5)))
+    {
+      cur_match_dist = cur_match_len = 0;
+    }
+    if (d->m_saved_match_len)
+    {
+      if (cur_match_len > d->m_saved_match_len)
+      {
+        tdefl_record_literal(d, (mz_uint8)d->m_saved_lit);
+        if (cur_match_len >= 128)
+        {
+          tdefl_record_match(d, cur_match_len, cur_match_dist);
+          d->m_saved_match_len = 0; len_to_move = cur_match_len;
+        }
+        else
+        {
+          d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len;
+        }
+      }
+      else
+      {
+        tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist);
+        len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0;
+      }
+    }
+    else if (!cur_match_dist)
+      tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]);
+    else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128))
+    {
+      tdefl_record_match(d, cur_match_len, cur_match_dist);
+      len_to_move = cur_match_len;
+    }
+    else
+    {
+      d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len;
+    }
+    // Move the lookahead forward by len_to_move bytes.
+    d->m_lookahead_pos += len_to_move;
+    MZ_ASSERT(d->m_lookahead_size >= len_to_move);
+    d->m_lookahead_size -= len_to_move;
+    d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, (mz_uint) TDEFL_LZ_DICT_SIZE);
+    // Check if it's time to flush the current LZ codes to the internal output buffer.
+    if ( (d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) ||
+         ( (d->m_total_lz_bytes > 31*1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) )
+    {
+      int n;
+      d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left;
+      if ((n = tdefl_flush_block(d, 0)) != 0)
+        return (n < 0) ? MZ_FALSE : MZ_TRUE;
+    }
+  }
+
+  d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left;
+  return MZ_TRUE;
+}
+
+static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d)
+{
+  if (d->m_pIn_buf_size)
+  {
+    *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
+  }
+
+  if (d->m_pOut_buf_size)
+  {
+    size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining);
+    memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n);
+    d->m_output_flush_ofs += (mz_uint)n;
+    d->m_output_flush_remaining -= (mz_uint)n;
+    d->m_out_buf_ofs += n;
+
+    *d->m_pOut_buf_size = d->m_out_buf_ofs;
+  }
+
+  return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY;
+}
+
+tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush)
+{
+  if (!d)
+  {
+    if (pIn_buf_size) *pIn_buf_size = 0;
+    if (pOut_buf_size) *pOut_buf_size = 0;
+    return TDEFL_STATUS_BAD_PARAM;
+  }
+
+  d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size;
+  d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size;
+  d->m_pSrc = (const mz_uint8 *)(pIn_buf); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0;
+  d->m_out_buf_ofs = 0;
+  d->m_flush = flush;
+
+  if ( ((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) ||
+        (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf) )
+  {
+    if (pIn_buf_size) *pIn_buf_size = 0;
+    if (pOut_buf_size) *pOut_buf_size = 0;
+    return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM);
+  }
+  d->m_wants_to_finish |= (flush == TDEFL_FINISH);
+
+  if ((d->m_output_flush_remaining) || (d->m_finished))
+    return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+  if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) &&
+      ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) &&
+      ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0))
+  {
+    if (!tdefl_compress_fast(d))
+      return d->m_prev_return_status;
+  }
+  else
+#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+  {
+    if (!tdefl_compress_normal(d))
+      return d->m_prev_return_status;
+  }
+
+  if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf))
+    d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf);
+
+  if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining))
+  {
+    if (tdefl_flush_block(d, flush) < 0)
+      return d->m_prev_return_status;
+    d->m_finished = (flush == TDEFL_FINISH);
+    if (flush == TDEFL_FULL_FLUSH) { MZ_CLEAR_OBJ(d->m_hash); MZ_CLEAR_OBJ(d->m_next); d->m_dict_size = 0; }
+  }
+
+  return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
+}
+
+tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush)
+{
+  MZ_ASSERT(d->m_pPut_buf_func); return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush);
+}
+
+tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
+{
+  d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user;
+  d->m_flags = (mz_uint)(flags); d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0;
+  d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3;
+  if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash);
+  d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0;
+  d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0;
+  d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8;
+  d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY;
+  d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1;
+  d->m_pIn_buf = NULL; d->m_pOut_buf = NULL;
+  d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL;
+  d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0;
+  memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
+  memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
+  return TDEFL_STATUS_OKAY;
+}
+
+tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d)
+{
+  return d->m_prev_return_status;
+}
+
+mz_uint32 tdefl_get_adler32(tdefl_compressor *d)
+{
+  return d->m_adler32;
+}
+
+mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
+{
+  tdefl_compressor *pComp; mz_bool succeeded; if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE;
+  pComp = (tdefl_compressor*)MZ_MALLOC(sizeof(tdefl_compressor)); if (!pComp) return MZ_FALSE;
+  succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY);
+  succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE);
+  MZ_FREE(pComp); return succeeded;
+}
+
+typedef struct
+{
+  size_t m_size, m_capacity;
+  mz_uint8 *m_pBuf;
+  mz_bool m_expandable;
+} tdefl_output_buffer;
+
+static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser)
+{
+  tdefl_output_buffer *p = (tdefl_output_buffer *)pUser;
+  size_t new_size = p->m_size + len;
+  if (new_size > p->m_capacity)
+  {
+    size_t new_capacity = p->m_capacity; mz_uint8 *pNew_buf; if (!p->m_expandable) return MZ_FALSE;
+    do { new_capacity = MZ_MAX(128U, new_capacity << 1U); } while (new_size > new_capacity);
+    pNew_buf = (mz_uint8*)MZ_REALLOC(p->m_pBuf, new_capacity); if (!pNew_buf) return MZ_FALSE;
+    p->m_pBuf = pNew_buf; p->m_capacity = new_capacity;
+  }
+  memcpy((mz_uint8*)p->m_pBuf + p->m_size, pBuf, len); p->m_size = new_size;
+  return MZ_TRUE;
+}
+
+void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags)
+{
+  tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf);
+  if (!pOut_len) return MZ_FALSE; else *pOut_len = 0;
+  out_buf.m_expandable = MZ_TRUE;
+  if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return NULL;
+  *pOut_len = out_buf.m_size; return out_buf.m_pBuf;
+}
+
+size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags)
+{
+  tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf);
+  if (!pOut_buf) return 0;
+  out_buf.m_pBuf = (mz_uint8*)pOut_buf; out_buf.m_capacity = out_buf_len;
+  if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return 0;
+  return out_buf.m_size;
+}
+
+#ifndef MINIZ_NO_ZLIB_APIS
+static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32,  16, 32, 128, 256,  512, 768, 1500 };
+
+// level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files).
+mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy)
+{
+  mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0);
+  if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER;
+
+  if (!level) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS;
+  else if (strategy == MZ_FILTERED) comp_flags |= TDEFL_FILTER_MATCHES;
+  else if (strategy == MZ_HUFFMAN_ONLY) comp_flags &= ~TDEFL_MAX_PROBES_MASK;
+  else if (strategy == MZ_FIXED) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS;
+  else if (strategy == MZ_RLE) comp_flags |= TDEFL_RLE_MATCHES;
+
+  return comp_flags;
+}
+#endif //MINIZ_NO_ZLIB_APIS
+
+#ifdef _MSC_VER
+#pragma warning (push)
+#pragma warning (disable:4204) // nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal)
+#endif
+
+// Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at
+// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/.
+// This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck.
+void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip)
+{
+  // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined.
+  static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32,  16, 32, 128, 256,  512, 768, 1500 };
+  tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0;
+  if (!pComp) return NULL;
+  MZ_CLEAR_OBJ(out_buf); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57+MZ_MAX(64, (1+bpl)*h); if (NULL == (out_buf.m_pBuf = (mz_uint8*)MZ_MALLOC(out_buf.m_capacity))) { MZ_FREE(pComp); return NULL; }
+  // write dummy header
+  for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf);
+  // compress image data
+  tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER | (level <= 3 ? TDEFL_GREEDY_PARSING_FLAG : 0));
+  for (y = 0; y < h; ++y) { tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); tdefl_compress_buffer(pComp, (mz_uint8*)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH); }
+  if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) { MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; }
+  // write real header
+  *pLen_out = out_buf.m_size-41;
+  {
+    static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06};
+    mz_uint8 pnghdr[41]={0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a,0x00,0x00,0x00,0x0d,0x49,0x48,0x44,0x52,
+      0,0,(mz_uint8)(w>>8),(mz_uint8)w,0,0,(mz_uint8)(h>>8),(mz_uint8)h,8,chans[num_chans],0,0,0,0,0,0,0,
+      (mz_uint8)(*pLen_out>>24),(mz_uint8)(*pLen_out>>16),(mz_uint8)(*pLen_out>>8),(mz_uint8)*pLen_out,0x49,0x44,0x41,0x54};
+    c=(mz_uint32)mz_crc32(MZ_CRC32_INIT,pnghdr+12,17); for (i=0; i<4; ++i, c<<=8) ((mz_uint8*)(pnghdr+29))[i]=(mz_uint8)(c>>24);
+    memcpy(out_buf.m_pBuf, pnghdr, 41);
+  }
+  // write footer (IDAT CRC-32, followed by IEND chunk)
+  if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { *pLen_out = 0; MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; }
+  c = (mz_uint32)mz_crc32(MZ_CRC32_INIT,out_buf.m_pBuf+41-4, *pLen_out+4); for (i=0; i<4; ++i, c<<=8) (out_buf.m_pBuf+out_buf.m_size-16)[i] = (mz_uint8)(c >> 24);
+  // compute final size of file, grab compressed data buffer and return
+  *pLen_out += 57; MZ_FREE(pComp); return out_buf.m_pBuf;
+}
+void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out)
+{
+  // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out)
+  return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE);
+}
+
+#ifdef _MSC_VER
+#pragma warning (pop)
+#endif
+
+} // namespace buminiz
+
+#endif // MINIZ_HEADER_FILE_ONLY
+
diff --git a/modules/core/src/tools/file/vpIoTools.cpp b/modules/core/src/tools/file/vpIoTools.cpp
index bda66bdf19..e228872ce8 100644
--- a/modules/core/src/tools/file/vpIoTools.cpp
+++ b/modules/core/src/tools/file/vpIoTools.cpp
@@ -104,6 +104,368 @@
 #define VP_STAT stat
 #endif
 
+#define USE_ZLIB_API 0
+
+#if !USE_ZLIB_API
+// See: https://github.com/BinomialLLC/basis_universal/blob/master/encoder/basisu_miniz.h
+// Apache License, Version 2.0
+#include "basisu_miniz.h"
+
+using namespace buminiz;
+#else
+#include <zlib.h>
+#endif
+
+// To avoid warnings such as: warning: unused variable ‘littleEndian’ [-Wunused-variable]
+#define _unused(x) ((void)(x)) // see: https://stackoverflow.com/a/777359
+
+// Copyright (C) 2011  Carl Rogers
+// Released under MIT License
+// license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
+
+#include <regex>
+
+char visp::cnpy::BigEndianTest()
+{
+  int x = 1;
+  return (((char *)&x)[0]) ? '<' : '>';
+}
+
+char visp::cnpy::map_type(const std::type_info &t)
+{
+  if (t == typeid(float)) return 'f';
+  if (t == typeid(double)) return 'f';
+  if (t == typeid(long double)) return 'f';
+
+  if (t == typeid(int)) return 'i';
+  if (t == typeid(char)) return 'i';
+  if (t == typeid(short)) return 'i';
+  if (t == typeid(long)) return 'i';
+  if (t == typeid(long long)) return 'i';
+
+  if (t == typeid(unsigned char)) return 'u';
+  if (t == typeid(unsigned short)) return 'u';
+  if (t == typeid(unsigned long)) return 'u';
+  if (t == typeid(unsigned long long)) return 'u';
+  if (t == typeid(unsigned int)) return 'u';
+
+  if (t == typeid(bool)) return 'b';
+
+  if (t == typeid(std::complex<float>)) return 'c';
+  if (t == typeid(std::complex<double>)) return 'c';
+  if (t == typeid(std::complex<long double>)) return 'c';
+
+  else return '?';
+}
+
+void visp::cnpy::parse_npy_header(unsigned char *buffer, size_t &word_size, std::vector<size_t> &shape, bool &fortran_order)
+{
+  //std::string magic_string(buffer,6);
+  // uint8_t major_version = *reinterpret_cast<uint8_t*>(buffer+6);
+  // uint8_t minor_version = *reinterpret_cast<uint8_t*>(buffer+7);
+  uint16_t header_len = *reinterpret_cast<uint16_t *>(buffer+8);
+  std::string header(reinterpret_cast<char *>(buffer+9), header_len);
+
+  //fortran order
+  size_t loc1 = header.find("fortran_order")+16;
+  fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
+
+  //shape
+  loc1 = header.find("(");
+  size_t loc2 = header.find(")");
+
+  std::regex num_regex("[0-9][0-9]*");
+  std::smatch sm;
+  shape.clear();
+
+  std::string str_shape = header.substr(loc1+1, loc2-loc1-1);
+  while (std::regex_search(str_shape, sm, num_regex)) {
+    shape.push_back(std::stoi(sm[0].str()));
+    str_shape = sm.suffix().str();
+  }
+
+  //endian, word size, data type
+  //byte order code | stands for not applicable.
+  //not sure when this applies except for byte array
+  loc1 = header.find("descr")+9;
+  bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
+  _unused(littleEndian); assert(littleEndian);
+
+  //char type = header[loc1+1];
+  //assert(type == map_type(T));
+
+  std::string str_ws = header.substr(loc1+2);
+  loc2 = str_ws.find("'");
+  word_size = atoi(str_ws.substr(0, loc2).c_str());
+}
+
+void visp::cnpy::parse_npy_header(FILE *fp, size_t &word_size, std::vector<size_t> &shape, bool &fortran_order)
+{
+  char buffer[256];
+  size_t res = fread(buffer, sizeof(char), 11, fp);
+  if (res != 11)
+    throw std::runtime_error("parse_npy_header: failed fread");
+  std::string header = fgets(buffer, 256, fp);
+  assert(header[header.size()-1] == '\n');
+
+  size_t loc1, loc2;
+
+  //fortran order
+  loc1 = header.find("fortran_order");
+  if (loc1 == std::string::npos)
+    throw std::runtime_error("parse_npy_header: failed to find header keyword: 'fortran_order'");
+  loc1 += 16;
+  fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
+
+  //shape
+  loc1 = header.find("(");
+  loc2 = header.find(")");
+  if (loc1 == std::string::npos || loc2 == std::string::npos)
+    throw std::runtime_error("parse_npy_header: failed to find header keyword: '(' or ')'");
+
+  std::regex num_regex("[0-9][0-9]*");
+  std::smatch sm;
+  shape.clear();
+
+  std::string str_shape = header.substr(loc1+1, loc2-loc1-1);
+  while (std::regex_search(str_shape, sm, num_regex)) {
+    shape.push_back(std::stoi(sm[0].str()));
+    str_shape = sm.suffix().str();
+  }
+
+  //endian, word size, data type
+  //byte order code | stands for not applicable.
+  //not sure when this applies except for byte array
+  loc1 = header.find("descr");
+  if (loc1 == std::string::npos)
+    throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'");
+  loc1 += 9;
+  bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
+  _unused(littleEndian); assert(littleEndian);
+
+  //char type = header[loc1+1];
+  //assert(type == map_type(T));
+
+  std::string str_ws = header.substr(loc1+2);
+  loc2 = str_ws.find("'");
+  word_size = atoi(str_ws.substr(0, loc2).c_str());
+}
+
+void visp::cnpy::parse_zip_footer(FILE *fp, uint16_t &nrecs, size_t &global_header_size, size_t &global_header_offset)
+{
+  std::vector<char> footer(22);
+  fseek(fp, -22, SEEK_END);
+  size_t res = fread(&footer[0], sizeof(char), 22, fp);
+  if (res != 22)
+    throw std::runtime_error("parse_zip_footer: failed fread");
+
+  uint16_t disk_no, disk_start, nrecs_on_disk, comment_len;
+  disk_no = *(uint16_t *)&footer[4];
+  disk_start = *(uint16_t *)&footer[6];
+  nrecs_on_disk = *(uint16_t *)&footer[8];
+  nrecs = *(uint16_t *)&footer[10];
+  global_header_size = *(uint32_t *)&footer[12];
+  global_header_offset = *(uint32_t *)&footer[16];
+  comment_len = *(uint16_t *)&footer[20];
+
+  _unused(disk_no); assert(disk_no == 0);
+  _unused(disk_start); assert(disk_start == 0);
+  _unused(nrecs_on_disk); assert(nrecs_on_disk == nrecs);
+  _unused(comment_len); assert(comment_len == 0);
+}
+
+visp::cnpy::NpyArray load_the_npy_file(FILE *fp)
+{
+  std::vector<size_t> shape;
+  size_t word_size;
+  bool fortran_order;
+  visp::cnpy::parse_npy_header(fp, word_size, shape, fortran_order);
+
+  visp::cnpy::NpyArray arr(shape, word_size, fortran_order);
+  size_t nread = fread(arr.data<char>(), 1, arr.num_bytes(), fp);
+  if (nread != arr.num_bytes())
+    throw std::runtime_error("load_the_npy_file: failed fread");
+  return arr;
+}
+
+visp::cnpy::NpyArray load_the_npz_array(FILE *fp, uint32_t compr_bytes, uint32_t uncompr_bytes)
+{
+  std::vector<unsigned char> buffer_compr(compr_bytes);
+  std::vector<unsigned char> buffer_uncompr(uncompr_bytes);
+  size_t nread = fread(&buffer_compr[0], 1, compr_bytes, fp);
+  if (nread != compr_bytes)
+    throw std::runtime_error("load_the_npy_file: failed fread");
+
+  z_stream d_stream;
+
+  d_stream.zalloc = Z_NULL;
+  d_stream.zfree = Z_NULL;
+  d_stream.opaque = Z_NULL;
+  d_stream.avail_in = 0;
+  d_stream.next_in = Z_NULL;
+  int err = inflateInit2(&d_stream, -MAX_WBITS);
+  _unused(err); assert(err == 0);
+
+  d_stream.avail_in = compr_bytes;
+  d_stream.next_in = &buffer_compr[0];
+  d_stream.avail_out = uncompr_bytes;
+  d_stream.next_out = &buffer_uncompr[0];
+
+  err = inflate(&d_stream, Z_FINISH);
+  _unused(err); assert(err == 0);
+  err = inflateEnd(&d_stream);
+  _unused(err); assert(err == 0);
+
+  std::vector<size_t> shape;
+  size_t word_size;
+  bool fortran_order;
+  visp::cnpy::parse_npy_header(&buffer_uncompr[0], word_size, shape, fortran_order);
+
+  visp::cnpy::NpyArray array(shape, word_size, fortran_order);
+
+  size_t offset = uncompr_bytes - array.num_bytes();
+  memcpy(array.data<unsigned char>(), &buffer_uncompr[0]+offset, array.num_bytes());
+
+  return array;
+}
+
+/*!
+  Load the specified \p fname filepath as arrays of data. This function is similar to the
+  <a href="https://numpy.org/doc/stable/reference/generated/numpy.load.html">numpy.load</a> function.
+  \param[in] fname : Path to the npz file.
+  \return A map of arrays data. The key represents the variable name, the value is an array of basic data type.
+  \warning This function has only been tested on little endian platform.
+  \note Original library: <a href="https://github.com/rogersce/cnpy">cnpy</a> with MIT license.
+ */
+visp::cnpy::npz_t visp::cnpy::npz_load(std::string fname)
+{
+  FILE *fp = fopen(fname.c_str(), "rb");
+
+  if (!fp) {
+    throw std::runtime_error("npz_load: Error! Unable to open file "+fname+"!");
+  }
+
+  visp::cnpy::npz_t arrays;
+
+  while (1) {
+    std::vector<char> local_header(30);
+    size_t headerres = fread(&local_header[0], sizeof(char), 30, fp);
+    if (headerres != 30)
+      throw std::runtime_error("npz_load: failed fread");
+
+    //if we've reached the global header, stop reading
+    if (local_header[2] != 0x03 || local_header[3] != 0x04) break;
+
+    //read in the variable name
+    uint16_t name_len = *(uint16_t *)&local_header[26];
+    std::string varname(name_len, ' ');
+    size_t vname_res = fread(&varname[0], sizeof(char), name_len, fp);
+    if (vname_res != name_len)
+      throw std::runtime_error("npz_load: failed fread");
+
+    //erase the lagging .npy
+    varname.erase(varname.end()-4, varname.end());
+
+    //read in the extra field
+    uint16_t extra_field_len = *(uint16_t *)&local_header[28];
+    if (extra_field_len > 0) {
+      std::vector<char> buff(extra_field_len);
+      size_t efield_res = fread(&buff[0], sizeof(char), extra_field_len, fp);
+      if (efield_res != extra_field_len)
+        throw std::runtime_error("npz_load: failed fread");
+    }
+
+    uint16_t compr_method = *reinterpret_cast<uint16_t *>(&local_header[0]+8);
+    uint32_t compr_bytes = *reinterpret_cast<uint32_t *>(&local_header[0]+18);
+    uint32_t uncompr_bytes = *reinterpret_cast<uint32_t *>(&local_header[0]+22);
+
+    if (compr_method == 0) { arrays[varname] = load_the_npy_file(fp); }
+    else { arrays[varname] = load_the_npz_array(fp, compr_bytes, uncompr_bytes); }
+  }
+
+  fclose(fp);
+  return arrays;
+}
+
+/*!
+  Load the specified \p varname array of data from the \p fname npz file. This function is similar to the
+  <a href="https://numpy.org/doc/stable/reference/generated/numpy.load.html">numpy.load</a> function.
+  \param[in] fname : Path to the npz file.
+  \param[in] varname : Identifier for the requested array of data.
+  \return An array of basic data type.
+  \warning This function has only been tested on little endian platform.
+  \note Original library: <a href="https://github.com/rogersce/cnpy">cnpy</a> with MIT license.
+ */
+visp::cnpy::NpyArray visp::cnpy::npz_load(std::string fname, std::string varname)
+{
+  FILE *fp = fopen(fname.c_str(), "rb");
+
+  if (!fp) throw std::runtime_error("npz_load: Unable to open file "+fname);
+
+  while (1) {
+    std::vector<char> local_header(30);
+    size_t header_res = fread(&local_header[0], sizeof(char), 30, fp);
+    if (header_res != 30)
+      throw std::runtime_error("npz_load: failed fread");
+
+  //if we've reached the global header, stop reading
+    if (local_header[2] != 0x03 || local_header[3] != 0x04) break;
+
+    //read in the variable name
+    uint16_t name_len = *(uint16_t *)&local_header[26];
+    std::string vname(name_len, ' ');
+    size_t vname_res = fread(&vname[0], sizeof(char), name_len, fp);
+    if (vname_res != name_len)
+      throw std::runtime_error("npz_load: failed fread");
+    vname.erase(vname.end()-4, vname.end()); //erase the lagging .npy
+
+    //read in the extra field
+    uint16_t extra_field_len = *(uint16_t *)&local_header[28];
+    fseek(fp, extra_field_len, SEEK_CUR); //skip past the extra field
+
+    uint16_t compr_method = *reinterpret_cast<uint16_t *>(&local_header[0]+8);
+    uint32_t compr_bytes = *reinterpret_cast<uint32_t *>(&local_header[0]+18);
+    uint32_t uncompr_bytes = *reinterpret_cast<uint32_t *>(&local_header[0]+22);
+
+    if (vname == varname) {
+      NpyArray array = (compr_method == 0) ? load_the_npy_file(fp) : load_the_npz_array(fp, compr_bytes, uncompr_bytes);
+      fclose(fp);
+      return array;
+    }
+    else {
+        //skip past the data
+      uint32_t size = *(uint32_t *)&local_header[22];
+      fseek(fp, size, SEEK_CUR);
+    }
+  }
+
+  fclose(fp);
+
+  //if we get here, we haven't found the variable in the file
+  throw std::runtime_error("npz_load: Variable name "+varname+" not found in "+fname);
+}
+
+/*!
+  Load the specified npy \p fname filepath as one array of data. This function is similar to the
+  <a href="https://numpy.org/doc/stable/reference/generated/numpy.load.html">numpy.load</a> function.
+  \param[in] fname : Path to the npy file.
+  \return An array of basic data type.
+  \warning This function has only been tested on little endian platform.
+  \note Original library: <a href="https://github.com/rogersce/cnpy">cnpy</a> with MIT license.
+ */
+visp::cnpy::NpyArray visp::cnpy::npy_load(std::string fname)
+{
+
+  FILE *fp = fopen(fname.c_str(), "rb");
+
+  if (!fp) throw std::runtime_error("npy_load: Unable to open file "+fname);
+
+  NpyArray arr = load_the_npy_file(fp);
+
+  fclose(fp);
+  return arr;
+}
+
 std::string vpIoTools::baseName = "";
 std::string vpIoTools::baseDir = "";
 std::string vpIoTools::configFile = "";
diff --git a/modules/io/include/visp3/io/vpImageIo.h b/modules/io/include/visp3/io/vpImageIo.h
index 49be1de4a8..b2c400aa7f 100644
--- a/modules/io/include/visp3/io/vpImageIo.h
+++ b/modules/io/include/visp3/io/vpImageIo.h
@@ -174,5 +174,15 @@ class VISP_EXPORT vpImageIo
 
   static void writeEXR(const vpImage<float> &I, const std::string &filename, int backend = IO_DEFAULT_BACKEND);
   static void writeEXR(const vpImage<vpRGBf> &I, const std::string &filename, int backend = IO_DEFAULT_BACKEND);
+
+  static void readPNGfromMem(const std::vector<unsigned char> &buffer, vpImage<unsigned char> &I,
+      int backend = IO_DEFAULT_BACKEND);
+  static void readPNGfromMem(const std::vector<unsigned char> &buffer, vpImage<vpRGBa> &I,
+      int backend = IO_DEFAULT_BACKEND);
+
+  static void writePNGtoMem(const vpImage<unsigned char> &I, std::vector<unsigned char> &buffer,
+      int backend = IO_DEFAULT_BACKEND);
+  static void writePNGtoMem(const vpImage<vpRGBa> &I, std::vector<unsigned char> &buffer,
+      int backend = IO_DEFAULT_BACKEND, bool saveAlpha = false);
 };
 #endif
diff --git a/modules/io/src/image/private/vpImageIoBackend.h b/modules/io/src/image/private/vpImageIoBackend.h
index bff9f1dbf5..56fcc2e3ba 100644
--- a/modules/io/src/image/private/vpImageIoBackend.h
+++ b/modules/io/src/image/private/vpImageIoBackend.h
@@ -74,6 +74,8 @@ void readPNGLibpng(vpImage<vpRGBa> &I, const std::string &filename);
 void writePNGLibpng(const vpImage<unsigned char> &I, const std::string &filename);
 void writePNGLibpng(const vpImage<vpRGBa> &I, const std::string &filename);
 
+#if ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_IMGCODECS)) || ((VISP_HAVE_OPENCV_VERSION < 0x030000) \
+    && defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC))
 // OpenCV
 void readOpenCV(vpImage<unsigned char> &I, const std::string &filename);
 void readOpenCV(vpImage<vpRGBa> &I, const std::string &filename);
@@ -85,6 +87,13 @@ void writeOpenCV(const vpImage<vpRGBa> &I, const std::string &filename, int qual
 void writeOpenCV(const vpImage<float> &I, const std::string &filename);
 void writeOpenCV(const vpImage<vpRGBf> &I, const std::string &filename);
 
+void readPNGfromMemOpenCV(const std::vector<unsigned char> &buffer, vpImage<unsigned char> &I);
+void readPNGfromMemOpenCV(const std::vector<unsigned char> &buffer, vpImage<vpRGBa> &I);
+
+void writePNGtoMemOpenCV(const vpImage<unsigned char> &I, std::vector<unsigned char> &buffer);
+void writePNGtoMemOpenCV(const vpImage<vpRGBa> &I, std::vector<unsigned char> &buffer, bool saveAlpha);
+#endif
+
 #if defined(VISP_HAVE_SIMDLIB)
 // Simd lib
 void readSimdlib(vpImage<unsigned char> &I, const std::string &filename);
@@ -116,6 +125,12 @@ void writeJPEGStb(const vpImage<vpRGBa> &I, const std::string &filename, int qua
 
 void writePNGStb(const vpImage<unsigned char> &I, const std::string &filename);
 void writePNGStb(const vpImage<vpRGBa> &I, const std::string &filename);
+
+void readPNGfromMemStb(const std::vector<unsigned char> &buffer, vpImage<unsigned char> &I);
+void readPNGfromMemStb(const std::vector<unsigned char> &buffer, vpImage<vpRGBa> &I);
+
+void writePNGtoMemStb(const vpImage<unsigned char> &I, std::vector<unsigned char> &buffer);
+void writePNGtoMemStb(const vpImage<vpRGBa> &I, std::vector<unsigned char> &buffer, bool saveAlpha);
 #endif
 
 #endif
diff --git a/modules/io/src/image/private/vpImageIoOpenCV.cpp b/modules/io/src/image/private/vpImageIoOpenCV.cpp
index 6e830eccb8..80a5466150 100644
--- a/modules/io/src/image/private/vpImageIoOpenCV.cpp
+++ b/modules/io/src/image/private/vpImageIoOpenCV.cpp
@@ -56,7 +56,8 @@
 
 #include <visp3/core/vpImageConvert.h>
 
-#if ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_IMGCODECS)) || ((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC))
+#if ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_IMGCODECS)) || ((VISP_HAVE_OPENCV_VERSION < 0x030000) \
+    && defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC))
 
 /*!
   Read the contents of the image file, allocate memory
@@ -166,6 +167,33 @@ void readOpenCV(vpImage<vpRGBf> &I, const std::string &filename)
 #endif
 }
 
+/*!
+  Read the content of the grayscale image bitmap stored in memory and encoded using the PNG format.
+
+  \param[in] buffer : Grayscale image buffer encoded in PNG as 1-D unsigned char vector.
+  \param[out] I : Output decoded grayscale image.
+*/
+void readPNGfromMemOpenCV(const std::vector<unsigned char> &buffer, vpImage<unsigned char> &I)
+{
+  cv::Mat1b buf(buffer.size(), 1, const_cast<unsigned char *>(buffer.data()));
+  cv::Mat1b img = cv::imdecode(buf, cv::IMREAD_GRAYSCALE);
+  I.resize(img.rows, img.cols);
+  std::copy(img.begin(), img.end(), I.bitmap);
+}
+
+/*!
+  Read the content of the color image bitmap stored in memory and encoded using the PNG format.
+
+  \param[in] buffer : Color image buffer encoded in PNG as 1-D unsigned char vector.
+  \param[out] I_color : Output decoded color image.
+*/
+void readPNGfromMemOpenCV(const std::vector<unsigned char> &buffer, vpImage<vpRGBa> &I_color)
+{
+  cv::Mat1b buf(buffer.size(), 1, const_cast<unsigned char *>(buffer.data()));
+  cv::Mat3b img = cv::imdecode(buf, cv::IMREAD_COLOR);
+  vpImageConvert::convert(img, I_color);
+}
+
 /*!
   Write the content of the image bitmap in the file which name is given by \e
   filename. This function writes a JPEG file.
@@ -218,4 +246,60 @@ void writeOpenCV(const vpImage<vpRGBf> &I, const std::string &filename)
   cv::imwrite(filename.c_str(), Ip);
 }
 
+/*!
+  In-memory PNG encoding of the grayscale image.
+
+  \param[in] I : Input grayscale image.
+  \param[out] buffer : Encoded image as 1-D unsigned char vector using the PNG format.
+*/
+void writePNGtoMemOpenCV(const vpImage<unsigned char> &I, std::vector<unsigned char> &buffer)
+{
+  cv::Mat1b img(I.getRows(), I.getCols(), I.bitmap);
+  bool result = cv::imencode(".png", img, buffer);
+
+  if (!result) {
+    std::string message = "Cannot write png to memory";
+    throw(vpImageException(vpImageException::ioError, message));
+  }
+}
+
+/*!
+  In-memory PNG encoding of the color image.
+
+  \param[in] I_color : Input color image.
+  \param[out] buffer : Encoded image as 1-D unsigned char vector using the PNG format.
+  \param[in] saveAlpha : If true, alpha channel is also used for encoding.
+*/
+void writePNGtoMemOpenCV(const vpImage<vpRGBa> &I_color, std::vector<unsigned char> &buffer, bool saveAlpha)
+{
+  const int height = I_color.getRows();
+  const int width = I_color.getCols();
+  const int channels = saveAlpha ? 4 : 3;
+
+  if (saveAlpha) {
+    cv::Mat4b img(height, width, reinterpret_cast<cv::Vec4b *>(I_color.bitmap));
+    // No need to perform RGB to BGR conversion
+    bool result = cv::imencode(".png", img, buffer);
+
+    if (!result) {
+      std::string message = "Cannot write png to memory";
+      throw(vpImageException(vpImageException::ioError, message));
+    }
+  }
+  else {
+    unsigned char *bitmap = new unsigned char[height * width * channels];
+    vpImageConvert::RGBaToRGB(reinterpret_cast<unsigned char *>(I_color.bitmap), bitmap, height*width);
+
+    cv::Mat3b img(height, width, reinterpret_cast<cv::Vec3b *>(bitmap));
+    // No need to perform RGB to BGR conversion
+    bool result = cv::imencode(".png", img, buffer);
+    delete[] bitmap;
+
+    if (!result) {
+      std::string message = "Cannot write png to memory";
+      throw(vpImageException(vpImageException::ioError, message));
+    }
+  }
+}
+
 #endif
diff --git a/modules/io/src/image/private/vpImageIoStb.cpp b/modules/io/src/image/private/vpImageIoStb.cpp
index 253b3a986a..77ab6492b6 100644
--- a/modules/io/src/image/private/vpImageIoStb.cpp
+++ b/modules/io/src/image/private/vpImageIoStb.cpp
@@ -41,6 +41,7 @@
 #if defined(VISP_HAVE_STBIMAGE)
 
 #include "vpImageIoBackend.h"
+#include <visp3/core/vpImageConvert.h>
 
 #if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
 #define VISP_HAVE_SSE2 1
@@ -116,4 +117,140 @@ void writePNGStb(const vpImage<vpRGBa> &I, const std::string &filename)
   }
 }
 
+namespace
+{
+typedef struct
+{
+  int last_pos;
+  void *context;
+} custom_stbi_mem_context;
+
+// custom write function
+static void custom_stbi_write_mem(void *context, void *data, int size)
+{
+  custom_stbi_mem_context *c = (custom_stbi_mem_context *)context;
+  char *dst = (char *)c->context;
+  char *src = (char *)data;
+  int cur_pos = c->last_pos;
+  for (int i = 0; i < size; i++) {
+    dst[cur_pos++] = src[i];
+  }
+  c->last_pos = cur_pos;
+}
+}
+
+/*!
+  Read the content of the grayscale image bitmap stored in memory and encoded using the PNG format.
+
+  \param[in] buffer : Grayscale image buffer encoded in PNG as 1-D unsigned char vector.
+  \param[out] I : Output decoded grayscale image.
+*/
+void readPNGfromMemStb(const std::vector<unsigned char> &buffer, vpImage<unsigned char> &I)
+{
+  int x = 0, y = 0, comp = 0;
+  const int req_channels = 1;
+  unsigned char *buffer_read = stbi_load_from_memory(buffer.data(), buffer.size(), &x, &y, &comp, req_channels);
+  assert(comp == req_channels);
+
+  I.init(buffer_read, y, x, true);
+  STBI_FREE(buffer_read);
+}
+
+/*!
+  Read the content of the color image bitmap stored in memory and encoded using the PNG format.
+
+  \param[in] buffer : Color image buffer encoded in PNG as 1-D unsigned char vector.
+  \param[out] I_color : Output decoded color image.
+*/
+void readPNGfromMemStb(const std::vector<unsigned char> &buffer, vpImage<vpRGBa> &I_color)
+{
+  int x = 0, y = 0, comp = 0;
+  unsigned char *buffer_read = stbi_load_from_memory(buffer.data(), buffer.size(), &x, &y, &comp, 0);
+
+  if (comp == 4) {
+    const bool copyData = true;
+    I_color.init(reinterpret_cast<vpRGBa *>(buffer_read), y, x, copyData);
+  }
+  else if (comp == 3) {
+    I_color.init(y, x);
+    const bool flip = false;
+    vpImageConvert::RGBToRGBa(buffer_read, reinterpret_cast<unsigned char *>(I_color.bitmap), x, y, flip);
+  }
+  else {
+    STBI_FREE(buffer_read);
+    std::string message = "Wrong number of channels for the input buffer: " + std::to_string(comp);
+    throw(vpImageException(vpImageException::ioError, message));
+  }
+
+  STBI_FREE(buffer_read);
+}
+
+/*!
+  In-memory PNG encoding of the grayscale image.
+
+  \param[in] I : Input grayscale image.
+  \param[out] buffer : Encoded image as 1-D unsigned char vector using the PNG format.
+*/
+void writePNGtoMemStb(const vpImage<unsigned char> &I, std::vector<unsigned char> &buffer)
+{
+  const int height = I.getRows();
+  const int width = I.getCols();
+  const int channels = 1;
+
+  custom_stbi_mem_context context;
+  context.last_pos = 0;
+  buffer.resize(I.getHeight() * I.getWidth());
+  context.context = (void *)buffer.data();
+
+  const int stride_bytes = 0;
+  int result = stbi_write_png_to_func(custom_stbi_write_mem, &context, width, height, channels, I.bitmap, stride_bytes);
+
+  if (result) {
+    buffer.resize(context.last_pos);
+  }
+  else {
+    std::string message = "Cannot write png to memory, result: " + std::to_string(result);
+    throw(vpImageException(vpImageException::ioError, message));
+  }
+}
+
+/*!
+  In-memory PNG encoding of the color image.
+
+  \param[in] I_color : Input color image.
+  \param[out] buffer : Encoded image as 1-D unsigned char vector using the PNG format.
+  \param[in] saveAlpha : If true, alpha channel is also used for encoding.
+*/
+void writePNGtoMemStb(const vpImage<vpRGBa> &I_color, std::vector<unsigned char> &buffer, bool saveAlpha)
+{
+  const int height = I_color.getRows();
+  const int width = I_color.getCols();
+  const int channels = saveAlpha ? 4 : 3;
+
+  custom_stbi_mem_context context;
+  context.last_pos = 0;
+  buffer.resize(height * width * channels);
+  context.context = (void *)buffer.data();
+
+  const int stride_bytes = 0;
+  int result = 0;
+  if (saveAlpha) {
+    result = stbi_write_png_to_func(custom_stbi_write_mem, &context, width, height, channels,
+      reinterpret_cast<unsigned char *>(I_color.bitmap), stride_bytes);
+  }
+  else {
+    unsigned char *bitmap = new unsigned char[height * width * channels];
+    vpImageConvert::RGBaToRGB(reinterpret_cast<unsigned char *>(I_color.bitmap), bitmap, height*width);
+    result = stbi_write_png_to_func(custom_stbi_write_mem, &context, width, height, channels, bitmap, stride_bytes);
+    delete[] bitmap;
+  }
+
+  if (result) {
+    buffer.resize(context.last_pos);
+  }
+  else {
+    std::string message = "Cannot write png to memory, result: " + std::to_string(result);
+    throw(vpImageException(vpImageException::ioError, message));
+  }
+}
 #endif
diff --git a/modules/io/src/image/vpImageIo.cpp b/modules/io/src/image/vpImageIo.cpp
index ea0d7b6168..1c7ef72892 100644
--- a/modules/io/src/image/vpImageIo.cpp
+++ b/modules/io/src/image/vpImageIo.cpp
@@ -144,7 +144,7 @@ void vpImageIo::read(vpImage<unsigned char> &I, const std::string &filename, int
 {
   bool exist = vpIoTools::checkFilename(filename);
   if (!exist) {
-    std::string message = "Cannot read file: \"" + std::string(filename) + "\" doesn't exist";
+    const std::string message = "Cannot read file: \"" + std::string(filename) + "\" doesn't exist";
     throw(vpImageException(vpImageException::ioError, message));
   }
 
@@ -181,7 +181,7 @@ void vpImageIo::read(vpImage<unsigned char> &I, const std::string &filename, int
 #if ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_IMGCODECS)) || ((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC))
     readOpenCV(I, filename);
 #else
-    std::string message = "Cannot read file \"" + filename + "\": No backend able to support this image format";
+    const std::string message = "Cannot read file \"" + filename + "\": No backend able to support this image format";
     throw(vpImageException(vpImageException::ioError, message));
 #endif
   }
@@ -220,7 +220,7 @@ void vpImageIo::read(vpImage<vpRGBa> &I, const std::string &filename, int backen
 {
   bool exist = vpIoTools::checkFilename(filename);
   if (!exist) {
-    std::string message = "Cannot read file: \"" + std::string(filename) + "\" doesn't exist";
+    const std::string message = "Cannot read file: \"" + std::string(filename) + "\" doesn't exist";
     throw(vpImageException(vpImageException::ioError, message));
   }
   // Allows to use ~ symbol or env variables in path
@@ -256,7 +256,7 @@ void vpImageIo::read(vpImage<vpRGBa> &I, const std::string &filename, int backen
 #if ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_IMGCODECS)) || ((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC))
     readOpenCV(I, filename);
 #else
-    std::string message = "Cannot read file \"" + filename + "\": No backend able to support this image format";
+    const std::string message = "Cannot read file \"" + filename + "\": No backend able to support this image format";
     throw(vpImageException(vpImageException::ioError, message));
 #endif
   }
@@ -316,7 +316,7 @@ void vpImageIo::write(const vpImage<unsigned char> &I, const std::string &filena
 #if ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_IMGCODECS)) || ((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC))
     writeOpenCV(I, filename, 90);
 #else
-    std::string message = "Cannot write file \"" + filename + "\": No backend able to support this image format";
+    const std::string message = "Cannot write file \"" + filename + "\": No backend able to support this image format";
     throw(vpImageException(vpImageException::ioError, message));
 #endif
   }
@@ -376,7 +376,7 @@ void vpImageIo::write(const vpImage<vpRGBa> &I, const std::string &filename, int
 #if ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_IMGCODECS)) || ((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC))
     writeOpenCV(I, filename, 90);
 #else
-    std::string message = "Cannot write file \"" + filename + "\": No backend able to support this image format";
+    const std::string message = "Cannot write file \"" + filename + "\": No backend able to support this image format";
     throw(vpImageException(vpImageException::ioError, message));
 #endif
   }
@@ -479,15 +479,17 @@ void vpImageIo::readJPEG(vpImage<vpRGBa> &I, const std::string &filename, int ba
 {
   if (backend == IO_SYSTEM_LIB_BACKEND) {
 #if !defined(VISP_HAVE_JPEG)
-    std::string message =
+    const std::string message =
       "Libjpeg backend is not available to read file \"" + filename + "\": switch to stb_image backend";
+    std::cerr << message << std::endl;
     backend = IO_STB_IMAGE_BACKEND;
 #endif
   }
   else if (backend == IO_OPENCV_BACKEND) {
 #if !(defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS))
-    std::string message =
+    const std::string message =
       "OpenCV backend is not available to read file \"" + filename + "\": switch to stb_image backend";
+    std::cerr << message << std::endl;
     backend = IO_STB_IMAGE_BACKEND;
 #endif
   }
@@ -564,15 +566,17 @@ void vpImageIo::readPNG(vpImage<unsigned char> &I, const std::string &filename,
 {
   if (backend == IO_SYSTEM_LIB_BACKEND) {
 #if !defined(VISP_HAVE_PNG)
-    std::string message =
+    const std::string message =
       "Libpng backend is not available to read file \"" + filename + "\": switch to stb_image backend";
+    std::cerr << message << std::endl;
     backend = IO_STB_IMAGE_BACKEND;
 #endif
   }
   else if (backend == IO_OPENCV_BACKEND) {
 #if !(defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS))
-    std::string message =
+    const std::string message =
       "OpenCV backend is not available to read file \"" + filename + "\": switch to stb_image backend";
+    std::cerr << message << std::endl;
     backend = IO_STB_IMAGE_BACKEND;
 #endif
   }
@@ -649,15 +653,17 @@ void vpImageIo::readPNG(vpImage<vpRGBa> &I, const std::string &filename, int bac
 {
   if (backend == IO_SYSTEM_LIB_BACKEND) {
 #if !defined(VISP_HAVE_PNG)
-    std::string message =
+    const std::string message =
       "Libpng backend is not available to read file \"" + filename + "\": switch to stb_image backend";
+    std::cerr << message << std::endl;
     backend = IO_STB_IMAGE_BACKEND;
 #endif
   }
   else if (backend == IO_OPENCV_BACKEND) {
 #if !(defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS))
-    std::string message =
+    const std::string message =
       "OpenCV backend is not available to read file \"" + filename + "\": switch to stb_image backend";
+    std::cerr << message << std::endl;
     backend = IO_STB_IMAGE_BACKEND;
 #endif
   }
@@ -832,10 +838,10 @@ void vpImageIo::writeJPEG(const vpImage<unsigned char> &I, const std::string &fi
   if (backend == IO_SYSTEM_LIB_BACKEND) {
 #if !defined(VISP_HAVE_JPEG)
 #if defined(VISP_HAVE_SIMDLIB)
-    std::string message = "Libjpeg backend is not available to save file \"" + filename + "\": switch to simd backend";
+    // Libjpeg backend is not available to save file \"" + filename + "\": switch to simd backend
     backend = IO_SIMDLIB_BACKEND;
 #else
-    std::string message = "Libjpeg backend is not available to save file \"" + filename + "\": switch to stb_image backend";
+    // Libjpeg backend is not available to save file \"" + filename + "\": switch to stb_image backend
     backend = IO_STB_IMAGE_BACKEND;
 #endif
 #endif
@@ -843,10 +849,10 @@ void vpImageIo::writeJPEG(const vpImage<unsigned char> &I, const std::string &fi
   else if (backend == IO_OPENCV_BACKEND) {
 #if !(defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS))
 #if defined(VISP_HAVE_SIMDLIB)
-    std::string message = "OpenCV backend is not available to save file \"" + filename + "\": switch to simd backend";
+    // OpenCV backend is not available to save file \"" + filename + "\": switch to simd backend
     backend = IO_SIMDLIB_BACKEND;
 #else
-    std::string message = "OpenCV backend is not available to save file \"" + filename + "\": switch to stb_image backend";
+    // OpenCV backend is not available to save file \"" + filename + "\": switch to stb_image backend
     backend = IO_STB_IMAGE_BACKEND;
 #endif
 #endif
@@ -863,7 +869,7 @@ void vpImageIo::writeJPEG(const vpImage<unsigned char> &I, const std::string &fi
 #else
     (void)I;
     (void)filename;
-    std::string message = "Cannot save file \"" + filename + "\": no backend available";
+    std::string message = "Cannot save file \"" + filename + "\": no available backend";
     throw(vpImageException(vpImageException::ioError, message));
 #endif
   }
@@ -880,7 +886,8 @@ void vpImageIo::writeJPEG(const vpImage<unsigned char> &I, const std::string &fi
 #endif
   }
   else if (backend == IO_OPENCV_BACKEND) {
-#if ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_IMGCODECS)) || ((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC))
+#if ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_IMGCODECS)) || ((VISP_HAVE_OPENCV_VERSION < 0x030000) \
+    && defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC))
     writeOpenCV(I, filename, quality);
 #else
     (void)I;
@@ -930,10 +937,10 @@ void vpImageIo::writeJPEG(const vpImage<vpRGBa> &I, const std::string &filename,
   if (backend == IO_SYSTEM_LIB_BACKEND) {
 #if !defined(VISP_HAVE_JPEG)
 #if defined(VISP_HAVE_SIMDLIB)
-    std::string message = "Libjpeg backend is not available to save file \"" + filename + "\": switch to simd backend";
+    // Libjpeg backend is not available to save file \"" + filename + "\": switch to simd backend
     backend = IO_SIMDLIB_BACKEND;
 #else
-    std::string message = "Libjpeg backend is not available to save file \"" + filename + "\": switch to stb_image backend";
+    // Libjpeg backend is not available to save file \"" + filename + "\": switch to stb_image backend
     backend = IO_STB_IMAGE_BACKEND;
 #endif
 #endif
@@ -941,10 +948,10 @@ void vpImageIo::writeJPEG(const vpImage<vpRGBa> &I, const std::string &filename,
   else if (backend == IO_OPENCV_BACKEND) {
 #if !(defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS))
 #if defined(VISP_HAVE_SIMDLIB)
-    std::string message = "OpenCV backend is not available to save file \"" + filename + "\": switch to simd backend";
+    // OpenCV backend is not available to save file \"" + filename + "\": switch to simd backend
     backend = IO_SIMDLIB_BACKEND;
 #else
-    std::string message = "OpenCV backend is not available to save file \"" + filename + "\": switch to stb_image backend";
+    // OpenCV backend is not available to save file \"" + filename + "\": switch to stb_image backend
     backend = IO_STB_IMAGE_BACKEND;
 #endif
 #endif
@@ -1027,10 +1034,10 @@ void vpImageIo::writePNG(const vpImage<unsigned char> &I, const std::string &fil
   if (backend == IO_SYSTEM_LIB_BACKEND) {
 #if !defined(VISP_HAVE_PNG)
 #if defined(VISP_HAVE_SIMDLIB)
-    std::string message = "Libpng backend is not available to save file \"" + filename + "\": switch to simd backend";
+    // Libpng backend is not available to save file \"" + filename + "\": switch to simd backend
     backend = IO_SIMDLIB_BACKEND;
 #else
-    std::string message = "Libpng backend is not available to save file \"" + filename + "\": switch to stb_image backend";
+    // Libpng backend is not available to save file \"" + filename + "\": switch to stb_image backend
     backend = IO_STB_IMAGE_BACKEND;
 #endif
 #endif
@@ -1038,10 +1045,10 @@ void vpImageIo::writePNG(const vpImage<unsigned char> &I, const std::string &fil
   else if (backend == IO_OPENCV_BACKEND) {
 #if !(defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS))
 #if defined(VISP_HAVE_SIMDLIB)
-    std::string message = "OpenCV backend is not available to save file \"" + filename + "\": switch to simd backend";
+    // OpenCV backend is not available to save file \"" + filename + "\": switch to simd backend
     backend = IO_SIMDLIB_BACKEND;
 #else
-    std::string message = "OpenCV backend is not available to save file \"" + filename + "\": switch to stb_image backend";
+    // OpenCV backend is not available to save file \"" + filename + "\": switch to stb_image backend
     backend = IO_STB_IMAGE_BACKEND;
 #endif
 #endif
@@ -1118,10 +1125,10 @@ void vpImageIo::writePNG(const vpImage<vpRGBa> &I, const std::string &filename,
   if (backend == IO_SYSTEM_LIB_BACKEND) {
 #if !defined(VISP_HAVE_PNG)
 #if defined(VISP_HAVE_SIMDLIB)
-    std::string message = "Libpng backend is not available to save file \"" + filename + "\": switch to simd backend";
+    // Libpng backend is not available to save file \"" + filename + "\": switch to simd backend
     backend = IO_SIMDLIB_BACKEND;
 #else
-    std::string message = "Libpng backend is not available to save file \"" + filename + "\": switch to stb_image backend";
+    // Libpng backend is not available to save file \"" + filename + "\": switch to stb_image backend
     backend = IO_STB_IMAGE_BACKEND;
 #endif
 #endif
@@ -1129,10 +1136,10 @@ void vpImageIo::writePNG(const vpImage<vpRGBa> &I, const std::string &filename,
   else if (backend == IO_OPENCV_BACKEND) {
 #if !(defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS))
 #if defined(VISP_HAVE_SIMDLIB)
-    std::string message = "OpenCV backend is not available to save file \"" + filename + "\": switch to simd backend";
+    // OpenCV backend is not available to save file \"" + filename + "\": switch to simd backend
     backend = IO_SIMDLIB_BACKEND;
 #else
-    std::string message = "OpenCV backend is not available to save file \"" + filename + "\": switch to stb_image backend";
+    // OpenCV backend is not available to save file \"" + filename + "\": switch to stb_image backend
     backend = IO_STB_IMAGE_BACKEND;
 #endif
 #endif
@@ -1399,3 +1406,234 @@ void vpImageIo::writePPM(const vpImage<unsigned char> &I, const std::string &fil
   \param[in] filename : Image location.
  */
 void vpImageIo::writePPM(const vpImage<vpRGBa> &I, const std::string &filename) { vp_writePPM(I, filename); }
+
+/*!
+  Read the content of the grayscale image bitmap stored in memory and encoded using the PNG format.
+  \param[in] buffer : Grayscale image buffer encoded in PNG as 1-D unsigned char vector.
+  \param[out] I : Output decoded grayscale image.
+  \param[in] backend : Supported backends are IO_OPENCV_BACKEND and IO_STB_IMAGE_BACKEND, default IO_DEFAULT_BACKEND
+  will choose IO_OPENCV_BACKEND if available or IO_STB_IMAGE_BACKEND otherwise.
+ */
+void vpImageIo::readPNGfromMem(const std::vector<unsigned char> &buffer, vpImage<unsigned char> &I, int backend)
+{
+  if (backend == IO_SYSTEM_LIB_BACKEND || backend == IO_SIMDLIB_BACKEND) {
+    backend = IO_STB_IMAGE_BACKEND;
+  }
+  else if (backend == IO_OPENCV_BACKEND) {
+#if !(defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS))
+    backend = IO_STB_IMAGE_BACKEND;
+#endif
+  }
+  else if (backend == IO_DEFAULT_BACKEND) {
+#if defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS)
+    backend = IO_OPENCV_BACKEND;
+#elif defined(VISP_HAVE_STBIMAGE)
+    backend = IO_STB_IMAGE_BACKEND;
+#else
+    (void)I;
+    (void)buffer;
+    std::string message = "Cannot in-memory png read: OpenCV or std_image backend are not available";
+    throw(vpImageException(vpImageException::ioError, message));
+#endif
+  }
+
+  if (backend == IO_OPENCV_BACKEND) {
+#if ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_IMGCODECS)) || ((VISP_HAVE_OPENCV_VERSION < 0x030000) \
+    && defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC))
+    readPNGfromMemOpenCV(buffer, I);
+    (void)backend;
+#else
+    (void)buffer;
+    (void)I;
+    (void)backend;
+    std::string message = "Cannot in-memory png read: OpenCV backend is not available";
+    throw(vpImageException(vpImageException::ioError, message));
+#endif
+  }
+  else {
+#if defined(VISP_HAVE_STBIMAGE)
+    readPNGfromMemStb(buffer, I);
+    (void)backend;
+#else
+    (void)buffer;
+    (void)I;
+    (void)backend;
+    std::string message = "Cannot in-memory png read: std_image backend is not available";
+    throw(vpImageException(vpImageException::ioError, message));
+#endif
+  }
+}
+
+/*!
+  Read the content of the grayscale image bitmap stored in memory and encoded using the PNG format.
+  \param[in] buffer : Color image buffer encoded in PNG as 1-D unsigned char vector.
+  \param[out] I : Output decoded color image.
+  \param[in] backend : Supported backends are IO_OPENCV_BACKEND and IO_STB_IMAGE_BACKEND, default IO_DEFAULT_BACKEND
+  will choose IO_OPENCV_BACKEND if available or IO_STB_IMAGE_BACKEND otherwise.
+ */
+void vpImageIo::readPNGfromMem(const std::vector<unsigned char> &buffer, vpImage<vpRGBa> &I, int backend)
+{
+  if (backend == IO_SYSTEM_LIB_BACKEND || backend == IO_SIMDLIB_BACKEND) {
+    backend = IO_STB_IMAGE_BACKEND;
+  }
+  else if (backend == IO_OPENCV_BACKEND) {
+#if !(defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS))
+    backend = IO_STB_IMAGE_BACKEND;
+#endif
+  }
+  else if (backend == IO_DEFAULT_BACKEND) {
+#if defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS)
+    backend = IO_OPENCV_BACKEND;
+#elif defined(VISP_HAVE_STBIMAGE)
+    backend = IO_STB_IMAGE_BACKEND;
+#else
+    (void)I;
+    (void)buffer;
+    std::string message = "Cannot in-memory png read: OpenCV or std_image backend are not available";
+    throw(vpImageException(vpImageException::ioError, message));
+#endif
+  }
+
+  if (backend == IO_OPENCV_BACKEND) {
+#if ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_IMGCODECS)) || ((VISP_HAVE_OPENCV_VERSION < 0x030000) \
+    && defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC))
+    readPNGfromMemOpenCV(buffer, I);
+    (void)backend;
+#else
+    (void)buffer;
+    (void)I;
+    (void)backend;
+    std::string message = "Cannot in-memory png read: OpenCV backend is not available";
+    throw(vpImageException(vpImageException::ioError, message));
+#endif
+  }
+  else {
+#if defined(VISP_HAVE_STBIMAGE)
+    readPNGfromMemStb(buffer, I);
+    (void)backend;
+#else
+    (void)buffer;
+    (void)I;
+    (void)backend;
+    std::string message = "Cannot in-memory png read: std_image backend is not available";
+    throw(vpImageException(vpImageException::ioError, message));
+#endif
+  }
+}
+
+/*!
+  In-memory PNG encoding of the grayscale image.
+
+  \param[in] I : Input grayscale image.
+  \param[out] buffer : Encoded image as 1-D unsigned char vector using the PNG format.
+  \param[in] backend : Supported backends are IO_OPENCV_BACKEND and IO_STB_IMAGE_BACKEND, default IO_DEFAULT_BACKEND
+  will choose IO_OPENCV_BACKEND if available or IO_STB_IMAGE_BACKEND otherwise.
+*/
+void vpImageIo::writePNGtoMem(const vpImage<unsigned char> &I, std::vector<unsigned char> &buffer, int backend)
+{
+  if (backend == IO_SYSTEM_LIB_BACKEND || backend == IO_SIMDLIB_BACKEND) {
+    backend = IO_STB_IMAGE_BACKEND;
+  }
+  else if (backend == IO_OPENCV_BACKEND) {
+#if !(defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS))
+    backend = IO_STB_IMAGE_BACKEND;
+#endif
+  }
+  else if (backend == IO_DEFAULT_BACKEND) {
+#if defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS)
+    backend = IO_OPENCV_BACKEND;
+#elif defined(VISP_HAVE_STBIMAGE)
+    backend = IO_STB_IMAGE_BACKEND;
+#else
+    (void)I;
+    (void)buffer;
+    std::string message = "Cannot in-memory png write: OpenCV or std_image backend are not available";
+    throw(vpImageException(vpImageException::ioError, message));
+#endif
+  }
+
+  if (backend == IO_OPENCV_BACKEND) {
+#if ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_IMGCODECS)) || ((VISP_HAVE_OPENCV_VERSION < 0x030000) \
+    && defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC))
+    writePNGtoMemOpenCV(I, buffer);
+#else
+    (void)I;
+    (void)buffer;
+    std::string message = "Cannot in-memory png write: OpenCV backend is not available";
+    throw(vpImageException(vpImageException::ioError, message));
+#endif
+  }
+  else if (backend == IO_STB_IMAGE_BACKEND) {
+#if defined(VISP_HAVE_STBIMAGE)
+    writePNGtoMemStb(I, buffer);
+#else
+    (void)I;
+    (void)buffer;
+    std::string message = "Cannot in-memory png write: std_image backend is not available";
+    throw(vpImageException(vpImageException::ioError, message));
+#endif
+  }
+  else {
+    std::string message = "The " + std::to_string(backend) + " backend is not available.";
+    throw(vpImageException(vpImageException::ioError, message));
+  }
+}
+
+/*!
+  In-memory PNG encoding of the color image.
+
+  \param[in] I : Input color image.
+  \param[out] buffer : Encoded image as 1-D unsigned char vector using the PNG format.
+  \param[in] backend : Supported backends are IO_OPENCV_BACKEND and IO_STB_IMAGE_BACKEND, default IO_DEFAULT_BACKEND
+  will choose IO_OPENCV_BACKEND if available or IO_STB_IMAGE_BACKEND otherwise.
+  \param[in] saveAlpha : If true, alpha channel is also used for encoding.
+*/
+void vpImageIo::writePNGtoMem(const vpImage<vpRGBa> &I, std::vector<unsigned char> &buffer, int backend, bool saveAlpha)
+{
+  if (backend == IO_SYSTEM_LIB_BACKEND || backend == IO_SIMDLIB_BACKEND) {
+    backend = IO_STB_IMAGE_BACKEND;
+  }
+  else if (backend == IO_OPENCV_BACKEND) {
+#if !(defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS))
+    backend = IO_STB_IMAGE_BACKEND;
+#endif
+  }
+  else if (backend == IO_DEFAULT_BACKEND) {
+#if defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS)
+    backend = IO_OPENCV_BACKEND;
+#elif defined(VISP_HAVE_STBIMAGE)
+    backend = IO_STB_IMAGE_BACKEND;
+#else
+    (void)I;
+    (void)buffer;
+    std::string message = "Cannot in-memory png write: OpenCV or std_image backend are not available";
+    throw(vpImageException(vpImageException::ioError, message));
+#endif
+  }
+
+  if (backend == IO_OPENCV_BACKEND) {
+#if ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_IMGCODECS)) || ((VISP_HAVE_OPENCV_VERSION < 0x030000) \
+    && defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC))
+    writePNGtoMemOpenCV(I, buffer, saveAlpha);
+#else
+    (void)I;
+    (void)buffer;
+    std::string message = "Cannot in-memory png write: OpenCV backend is not available";
+    throw(vpImageException(vpImageException::ioError, message));
+#endif
+  }
+  else if (backend == IO_STB_IMAGE_BACKEND) {
+#if defined(VISP_HAVE_STBIMAGE)
+    writePNGtoMemStb(I, buffer, saveAlpha);
+#else
+    (void)I;
+    (void)buffer;
+    std::string message = "Cannot in-memory png write: std_image backend is not available";
+    throw(vpImageException(vpImageException::ioError, message));
+#endif
+  }
+  else {
+    std::string message = "The " + std::to_string(backend) + " backend is not available.";
+    throw(vpImageException(vpImageException::ioError, message));
+  }
+}
diff --git a/modules/io/test/image-with-dataset/testImageLoadSave.cpp b/modules/io/test/image-with-dataset/testImageLoadSave.cpp
index 50e4def6ee..a58ccb560a 100644
--- a/modules/io/test/image-with-dataset/testImageLoadSave.cpp
+++ b/modules/io/test/image-with-dataset/testImageLoadSave.cpp
@@ -86,6 +86,25 @@ static std::vector<std::string> backendNamesPng
       "simd", "stb"
 };
 
+static const std::vector<vpImageIo::vpImageIoBackendType> backendsInMemory
+{
+#if defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS)
+  vpImageIo::IO_OPENCV_BACKEND,
+#endif
+#if defined VISP_HAVE_STBIMAGE
+  vpImageIo::IO_STB_IMAGE_BACKEND
+#endif
+};
+static std::vector<std::string> backendNamesPngInMemory
+{
+#if defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGCODECS)
+  "OpenCV",
+#endif
+#if defined VISP_HAVE_STBIMAGE
+  "stb"
+#endif
+};
+
 static const unsigned int imgWidth = 640;
 static const unsigned int imgHeight = 440;
 
@@ -314,6 +333,74 @@ TEST_CASE("Test RGBA PNG image saving", "[image_I/O]")
   REQUIRE(vpIoTools::remove(directory_filename_tmp));
 }
 
+TEST_CASE("Test grayscale in-memory PNG image encoding/decoding", "[image_I/O]")
+{
+  vpImage<unsigned char> I_ref;
+  vpImageIo::read(I_ref, path + ".png");
+  REQUIRE(I_ref.getSize() > 0);
+
+  for (size_t i = 0; i < backendsInMemory.size(); i++) {
+    SECTION(backendNamesPngInMemory[i] + " backend")
+    {
+      std::vector<unsigned char> buffer;
+      vpImageIo::writePNGtoMem(I_ref, buffer, backendsInMemory[i]);
+      REQUIRE(!buffer.empty());
+
+      for (size_t j = 0; j < backendsInMemory.size(); j++) {
+        SECTION(backendNamesPngInMemory[i] + " backend")
+        {
+          vpImage<unsigned char> I;
+          vpImageIo::readPNGfromMem(buffer, I, backendsInMemory[j]);
+          CHECK(I_ref == I);
+        };
+      }
+    };
+  }
+}
+
+TEST_CASE("Test color in-memory PNG image encoding/decoding", "[image_I/O]")
+{
+  vpImage<vpRGBa> I_ref;
+  vpImageIo::read(I_ref, path + ".png");
+  REQUIRE(I_ref.getSize() > 0);
+
+  for (size_t i = 0; i < backendsInMemory.size(); i++) {
+    SECTION(backendNamesPngInMemory[i] + " backend")
+    {
+      {
+        std::vector<unsigned char> buffer;
+        const bool saveAlpha = false;
+        vpImageIo::writePNGtoMem(I_ref, buffer, backendsInMemory[i], saveAlpha);
+        REQUIRE(!buffer.empty());
+
+        for (size_t j = 0; j < backendsInMemory.size(); j++) {
+          SECTION(backendNamesPngInMemory[i] + " backend")
+          {
+            vpImage<vpRGBa> I;
+            vpImageIo::readPNGfromMem(buffer, I, backendsInMemory[j]);
+            CHECK(I_ref == I);
+          };
+        }
+      }
+      {
+        std::vector<unsigned char> buffer;
+        const bool saveAlpha = true;
+        vpImageIo::writePNGtoMem(I_ref, buffer, backendsInMemory[i], saveAlpha);
+        REQUIRE(!buffer.empty());
+
+        for (size_t j = 0; j < backendsInMemory.size(); j++) {
+          SECTION(backendNamesPngInMemory[i] + " backend")
+          {
+            vpImage<vpRGBa> I;
+            vpImageIo::readPNGfromMem(buffer, I, backendsInMemory[j]);
+            CHECK(I_ref == I);
+          };
+        }
+      }
+    };
+  }
+}
+
 int main(int argc, char *argv[])
 {
   Catch::Session session; // There must be exactly one instance
diff --git a/script/PlotCameraTrajectory.py b/script/PlotCameraTrajectory.py
index 8b8f5a7126..09ff0d2894 100644
--- a/script/PlotCameraTrajectory.py
+++ b/script/PlotCameraTrajectory.py
@@ -101,17 +101,27 @@ def visp_rotation_to_thetau(R):
             thetau[0,2] = -thetau[0,2]
     return thetau
 
-def load_camera_poses(filename, use_thetau=False):
-    if use_thetau:
-        camera_poses_raw = np.loadtxt(filename)
+def load_camera_poses(filename, use_thetau=False, use_npz_file_format=False):
+    if use_npz_file_format:
+        datafile = np.load(filename)
+        camera_poses_raw = datafile['vec_poses']
         camera_poses = np.zeros((4*camera_poses_raw.shape[0], 4))
         for i in range(camera_poses_raw.shape[0]):
-            camera_poses[i*4:i*4+3, 0:3] = visp_thetau_to_rotation(camera_poses_raw[i, 3:])
-            camera_poses[i*4:i*4+3, 3] = camera_poses_raw[i,0:3].T
+            camera_poses[i*4:i*4+3, 0:3] = visp_thetau_to_rotation(camera_poses_raw[i,3:])
+            camera_poses[i*4:i*4+3, 3] = camera_poses_raw[i,:3].T
             camera_poses[i*4+3, 3] = 1
         return camera_poses
     else:
-        return np.loadtxt(filename)
+      if use_thetau:
+          camera_poses_raw = np.loadtxt(filename)
+          camera_poses = np.zeros((4*camera_poses_raw.shape[0], 4))
+          for i in range(camera_poses_raw.shape[0]):
+              camera_poses[i*4:i*4+3, 0:3] = visp_thetau_to_rotation(camera_poses_raw[i,3:])
+              camera_poses[i*4:i*4+3, 3] = camera_poses_raw[i,:3].T
+              camera_poses[i*4+3, 3] = 1
+          return camera_poses
+      else:
+          return np.loadtxt(filename)
 
 def inverse_homogeneoux_matrix(M):
     R = M[0:3, 0:3]
@@ -713,6 +723,8 @@ def main():
     parser.add_argument('-p', type=str, nargs=1, required=True, help='Path to poses file.')
     parser.add_argument('--theta-u', action='store_true', default=False,
                         help='If true, camera poses are expressed using [tx ty tz tux tuy tuz] formalism, otherwise in homogeneous form.')
+    parser.add_argument('--npz', action='store_true', default=False,
+                        help='If true, poses are loaded from a npz file with \"vec_poses\" field and [tx ty tz tux tuy tuz] data.')
     parser.add_argument('-m', type=str, nargs=1, required=True, help='Path to CAO model file.')
     parser.add_argument('--colormap', default='gist_rainbow', type=str, help='Colormap to use for the camera path.')
     parser.add_argument('--save', action='store_true', help='If true, save the figures on disk.')
@@ -774,8 +786,9 @@ def main():
     # Load camera poses
     camera_pose_filename = args.p[0]
     use_thetau = args.theta_u
-    print(f"Load camera poses from: {camera_pose_filename} ; Use theta-u? {use_thetau}")
-    camera_poses = load_camera_poses(camera_pose_filename, use_thetau)
+    use_npz_file_format = args.npz
+    print(f"Load camera poses from: {camera_pose_filename} ; Use theta-u? {use_thetau} ; Load from npz file? {use_npz_file_format}")
+    camera_poses = load_camera_poses(camera_pose_filename, use_thetau, use_npz_file_format)
     print("poses: ", camera_poses.shape)
 
     colormap = args.colormap
@@ -854,12 +867,12 @@ def main():
 
         for i in range(0, inverse_camera_poses.shape[0], 4*pose_step):
             camera_pose = inverse_camera_poses[i:i+4,:]
-            draw_camera(ax, camera_pose, cam_width, cam_height, cam_focal, cam_scale)
+            draw_camera(ax, camera_pose, cam_width, cam_height, cam_focal, cam_scale, camera_colors[i//4])
 
         # Draw the last camera pose
         for i in range(inverse_camera_poses.shape[0]-4, inverse_camera_poses.shape[0], 4):
             camera_pose = inverse_camera_poses[i:i+4,:]
-            draw_camera(ax, camera_pose, cam_width, cam_height, cam_focal, cam_scale)
+            draw_camera(ax, camera_pose, cam_width, cam_height, cam_focal, cam_scale, camera_colors[-1])
 
         draw_camera_path(ax, inverse_camera_poses, camera_colors)
         draw_model(ax, model)
diff --git a/tutorial/tracking/model-based/generic/CMakeLists.txt b/tutorial/tracking/model-based/generic/CMakeLists.txt
index 8e0a68fc85..85c9a8aee8 100644
--- a/tutorial/tracking/model-based/generic/CMakeLists.txt
+++ b/tutorial/tracking/model-based/generic/CMakeLists.txt
@@ -9,6 +9,8 @@ set(tutorial_cpp
   tutorial-mb-generic-tracker.cpp
   tutorial-mb-generic-tracker-live.cpp
   tutorial-mb-generic-tracker-full.cpp
+  tutorial-mb-generic-tracker-save.cpp
+  tutorial-mb-generic-tracker-read.cpp
 )
 
 foreach(cpp ${tutorial_cpp})
diff --git a/tutorial/tracking/model-based/generic/tutorial-mb-generic-tracker-full.cpp b/tutorial/tracking/model-based/generic/tutorial-mb-generic-tracker-full.cpp
index 1640de31e9..a8f8e06bd4 100644
--- a/tutorial/tracking/model-based/generic/tutorial-mb-generic-tracker-full.cpp
+++ b/tutorial/tracking/model-based/generic/tutorial-mb-generic-tracker-full.cpp
@@ -10,6 +10,18 @@
 #include <visp3/io/vpVideoReader.h>
 #include <visp3/io/vpVideoWriter.h>
 
+namespace
+{
+std::vector<double> poseToVec(const vpHomogeneousMatrix &cMo)
+{
+  vpThetaUVector tu = cMo.getThetaUVector();
+  vpTranslationVector t = cMo.getTranslationVector();
+  std::vector<double> vec { t[0], t[1], t[2], tu[0], tu[1], tu[2] };
+
+  return vec;
+}
+}
+
 int main(int argc, char **argv)
 {
 #if defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_VIDEOIO) && defined(HAVE_OPENCV_HIGHGUI)
@@ -23,12 +35,13 @@ int main(int argc, char **argv)
   bool opt_display_scale_auto = false;
   vpColVector opt_dof_to_estimate(6, 1.); // Here we consider 6 dof estimation
   std::string opt_save;
+  std::string opt_save_results;
   unsigned int thickness = 2;
 
   vpImage<unsigned char> I;
-  vpDisplay *display = nullptr;
-  vpPlot *plot = nullptr;
-  vpVideoWriter *writer = nullptr;
+  std::shared_ptr<vpDisplay> display;
+  std::shared_ptr<vpPlot> plot;
+  std::shared_ptr<vpVideoWriter> writer;
 
   try {
     for (int i = 0; i < argc; i++) {
@@ -50,6 +63,9 @@ int main(int argc, char **argv)
       else if (std::string(argv[i]) == "--save") {
         opt_save = std::string(argv[++i]);
       }
+      else if (std::string(argv[i]) == "--save-results") {
+        opt_save_results = std::string(argv[++i]);
+      }
       else if (std::string(argv[i]) == "--plot") {
         opt_plot = true;
       }
@@ -80,7 +96,8 @@ int main(int argc, char **argv)
           << " [--tracker <0=egde|1=keypoint|2=hybrid>]"
           << " [--downscale-img <scale factor>]"
           << " [--dof <0/1 0/1 0/1 0/1 0/1 0/1>]"
-          << " [--save <video>]"
+          << " [--save <e.g. results-%04d.png>]"
+          << " [--save-results <e.g. tracking_poses.npz>]"
           << " [--display-scale-auto]"
           << " [--plot]"
           << " [--verbose,-v]"
@@ -121,12 +138,18 @@ int main(int argc, char **argv)
           << "      is not estimated. It's value is the one from the initialisation." << std::endl
           << "      Default: 1 1 1 1 1 1 (to estimate all 6 dof)" << std::endl
           << std::endl
-          << "  --save <video>" << std::endl
+          << "  --save <e.g. results-%04d.png>" << std::endl
           << "      Name of the saved image sequence that contains tracking results in overlay." << std::endl
           << "      When the name contains a folder like in the next example, the folder" << std::endl
           << "      is created if it doesn't exist."
           << "      Example: \"result/image-%04d.png\"." << std::endl
           << std::endl
+          << "  --save-results <e.g. tracking_results.npz>" << std::endl
+          << "      Name of the npz file containing cMo data estimated from MBT." << std::endl
+          << "      When the name contains a folder like in the next example, the folder" << std::endl
+          << "      is created if it doesn't exist."
+          << "      Example: \"result/tracking_results.npz\"." << std::endl
+          << std::endl
           << "  --display-scale-auto" << std::endl
           << "      Enable display window auto scaling to ensure that the image is fully" << std::endl
           << "      visible on the screen. Useful for large images." << std::endl
@@ -168,6 +191,13 @@ int main(int argc, char **argv)
         vpIoTools::makeDirectory(parent);
       }
     }
+    if (!opt_save_results.empty()) {
+      std::string parent = vpIoTools::getParent(opt_save_results);
+      if (!parent.empty()) {
+        std::cout << "Create output directory for the npz file: " << parent << std::endl;
+        vpIoTools::makeDirectory(parent);
+      }
+    }
 
     //! [Image]
     vpImage<unsigned char> Ivideo;
@@ -191,17 +221,17 @@ int main(int argc, char **argv)
 
     vpImage<vpRGBa> O;
     if (!opt_save.empty()) {
-      writer = new vpVideoWriter();
+      writer = std::make_shared<vpVideoWriter>();
       writer->setFileName(opt_save);
       writer->open(O);
     }
 
 #if defined(VISP_HAVE_X11)
-    display = new vpDisplayX;
+    display = std::make_shared<vpDisplayX>();
 #elif defined(VISP_HAVE_GDI)
-    display = new vpDisplayGDI;
+    display = std::make_shared<vpDisplayGDI>();
 #elif defined(HAVE_OPENCV_HIGHGUI)
-    display = new vpDisplayOpenCV;
+    display = std::make_shared<vpDisplayOpenCV>();
 #endif
     if (opt_display_scale_auto) {
       display->setDownScalingFactor(vpDisplay::SCALE_AUTO);
@@ -209,8 +239,8 @@ int main(int argc, char **argv)
     display->init(I, 100, 100, "Model-based tracker");
 
     if (opt_plot) {
-      plot = new vpPlot(2, 700, 700, display->getWindowXPosition() + I.getWidth() / display->getDownScalingFactor() + 30,
-                        display->getWindowYPosition(), "Estimated pose");
+      plot = std::make_shared<vpPlot>(2, 700, 700, display->getWindowXPosition() + I.getWidth() / display->getDownScalingFactor() + 30,
+                                      display->getWindowYPosition(), "Estimated pose");
       plot->initGraph(0, 3); // Translation
       plot->setTitle(0, "Translation [m]");
       plot->setColor(0, 0, vpColor::red);
@@ -343,6 +373,19 @@ int main(int argc, char **argv)
 
     std::cout << "Initialize tracker on image size: " << I.getWidth() << " x " << I.getHeight() << std::endl;
 
+    std::vector<double> vec_poses;
+    if (!opt_save_results.empty()) {
+      const unsigned int height = I.getHeight(), width = I.getWidth();
+      visp::cnpy::npz_save(opt_save_results, "height", &height, { 1 }, "w");
+      visp::cnpy::npz_save(opt_save_results, "width", &width, { 1 }, "a");
+
+      const double cam_px = cam.get_px(), cam_py = cam.get_py(), cam_u0 = cam.get_u0(), cam_v0 = cam.get_v0();
+      visp::cnpy::npz_save(opt_save_results, "cam_px", &cam_px, { 1 }, "a");
+      visp::cnpy::npz_save(opt_save_results, "cam_py", &cam_py, { 1 }, "a");
+      visp::cnpy::npz_save(opt_save_results, "cam_u0", &cam_u0, { 1 }, "a");
+      visp::cnpy::npz_save(opt_save_results, "cam_v0", &cam_v0, { 1 }, "a");
+    }
+
     //! [Init]
     tracker.initClick(I, objectname + ".init", true);
     //! [Init]
@@ -414,10 +457,19 @@ int main(int argc, char **argv)
         writer->saveFrame(O);
       }
 
+      if (!opt_save_results.empty()) {
+        std::vector<double> vec_pose = poseToVec(cMo);
+        vec_poses.insert(vec_poses.end(), vec_pose.begin(), vec_pose.end());
+      }
+
       if (vpDisplay::getClick(I, false))
         break;
     }
     vpDisplay::getClick(I);
+
+    if (!opt_save_results.empty()) {
+      visp::cnpy::npz_save(opt_save_results, "vec_poses", vec_poses.data(), { static_cast<size_t>(vec_poses.size()/6), 6 }, "a");
+    }
   }
   catch (const vpException &e) {
     std::cout << "Catch a ViSP exception: " << e << std::endl;
@@ -429,15 +481,6 @@ int main(int argc, char **argv)
     vpDisplay::getClick(I);
   }
 #endif
-  //! [Cleanup]
-  delete display;
-  if (opt_plot) {
-    delete plot;
-  }
-  if (writer) {
-    delete writer;
-  }
-  //! [Cleanup]
 #else
   (void)argc;
   (void)argv;
diff --git a/tutorial/tracking/model-based/generic/tutorial-mb-generic-tracker-read.cpp b/tutorial/tracking/model-based/generic/tutorial-mb-generic-tracker-read.cpp
new file mode 100755
index 0000000000..913cbd86f2
--- /dev/null
+++ b/tutorial/tracking/model-based/generic/tutorial-mb-generic-tracker-read.cpp
@@ -0,0 +1,197 @@
+#include <memory>
+#include <visp3/core/vpIoTools.h>
+#include <visp3/gui/vpDisplayX.h>
+#include <visp3/gui/vpDisplayGDI.h>
+#include <visp3/gui/vpDisplayOpenCV.h>
+#include <visp3/io/vpImageIo.h>
+#include <visp3/core/vpImageDraw.h>
+
+namespace
+{
+// https://en.cppreference.com/w/cpp/io/c/fprintf
+std::string toString(const std::string &name, int val)
+{
+  auto fmt = name.c_str();
+  int sz = std::snprintf(nullptr, 0, fmt, val);
+  std::vector<char> buf(sz + 1);      // note +1 for null terminator
+  std::sprintf(buf.data(), fmt, val);
+  std::string str(buf.begin(), buf.end());
+
+  return str;
+}
+
+template<typename T, typename... Args>
+std::unique_ptr<T> make_unique_compat(Args&&... args)
+{
+#if ((__cplusplus >= 201402L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201402L)))
+  return std::make_unique<T>(args...);
+#else
+  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+#endif
+}
+}
+
+int main(int argc, char *argv[])
+{
+#if defined(VISP_HAVE_X11) || defined(VISP_HAVE_GDI) || defined(HAVE_OPENCV_HIGHGUI)
+  bool opencv_backend = false;
+  std::string npz_filename = "npz_tracking_teabox.npz";
+  bool print_cMo = false;
+  bool dump_infos = false;
+
+  for (int i = 1; i < argc; i++) {
+    if (std::string(argv[i]) == "--cv-backend") {
+      opencv_backend = true;
+    }
+    else if ((std::string(argv[i]) == "--read" || std::string(argv[i]) == "-i") && i+1 < argc) {
+      npz_filename = argv[i+1];
+    }
+    else if (std::string(argv[i]) == "--print-cMo" && i+1 < argc) {
+      print_cMo = true;
+    }
+    else if (std::string(argv[i]) == "--dump" && i+1 < argc) {
+      dump_infos = true;
+    }
+    else {
+      std::cout << "Options:" << std::endl;
+      std::cout << "  --cv-backend   use OpenCV if available for in-memory PNG decoding" << std::endl;
+      std::cout << "  --read / -i    input filename" << std::endl;
+      std::cout << "  --print-cMo    print cMo" << std::endl;
+      std::cout << "  --dump         print all the data name in the file" << std::endl;
+      return EXIT_SUCCESS;
+    }
+  }
+
+  std::cout << "Read file: " << npz_filename << std::endl;
+  std::cout << "OpenCV backend? " << opencv_backend << std::endl;
+
+  const vpImageIo::vpImageIoBackendType backend =
+    opencv_backend ? vpImageIo::IO_OPENCV_BACKEND : vpImageIo::IO_STB_IMAGE_BACKEND;
+
+  visp::cnpy::npz_t npz_data = visp::cnpy::npz_load(npz_filename);
+  if (dump_infos) {
+    std::cout << npz_filename << " file contains the following data:" << std::endl;
+    for (visp::cnpy::npz_t::const_iterator it = npz_data.begin(); it != npz_data.end(); ++it) {
+      std::cout << "  " << it->first << std::endl;
+    }
+  }
+
+  visp::cnpy::NpyArray arr_height = npz_data["height"];
+  visp::cnpy::NpyArray arr_width = npz_data["width"];
+  visp::cnpy::NpyArray arr_channel = npz_data["channel"];
+  int height = *arr_height.data<int>();
+  int width = *arr_width.data<int>();
+  int channel = *arr_channel.data<int>();
+  std::cout << "height: " << height << std::endl;
+  std::cout << "width: " << width << std::endl;
+  std::cout << "channel: " << channel << std::endl;
+  std::cout << "Color mode? " << (channel > 1) << std::endl;
+
+  visp::cnpy::NpyArray arr_camera_name = npz_data["camera_name"];
+  const std::string camera_name = std::string(arr_camera_name.data<char>());
+  std::cout << "Camera name: " << camera_name << std::endl;
+
+  visp::cnpy::NpyArray arr_px = npz_data["cam_px"];
+  visp::cnpy::NpyArray arr_py = npz_data["cam_py"];
+  visp::cnpy::NpyArray arr_u0 = npz_data["cam_u0"];
+  visp::cnpy::NpyArray arr_v0 = npz_data["cam_v0"];
+  vpCameraParameters cam(*arr_px.data<double>(), *arr_py.data<double>(), *arr_u0.data<double>(), *arr_v0.data<double>());
+  std::cout << "Cam: " << cam << std::endl;
+
+  vpImage<unsigned char> I(height, width);
+  vpImage<vpRGBa> I_display(height, width);
+
+  std::unique_ptr<vpDisplay> display;
+#if defined(VISP_HAVE_X11)
+  display = make_unique_compat<vpDisplayX>(I_display, 100, 100, "Model-based tracker");
+#elif defined(VISP_HAVE_GDI)
+  display = make_unique_compat<vpDisplayGDI>(I_display, 100, 100, "Model-based tracker");
+#elif defined(HAVE_OPENCV_HIGHGUI)
+  display = make_unique_compat<vpDisplayOpenCV>(I_display, 100, 100, "Model-based tracker");
+#endif
+
+  visp::cnpy::NpyArray arr_nb_data = npz_data["nb_data"];
+  int nb_data = *arr_nb_data.data<int>();
+  std::cout << "Number of images: " << nb_data << std::endl;
+
+  // Load all the images data
+  visp::cnpy::NpyArray arr_vec_img_data_size = npz_data["vec_img_data_size"];
+  int *vec_img_data_size_ptr = arr_vec_img_data_size.data<int>();
+  visp::cnpy::NpyArray arr_vec_img = npz_data["vec_img"];
+  unsigned char *vec_img_ptr = arr_vec_img.data<unsigned char>();
+  std::vector<unsigned char> vec_img;
+  size_t img_data_offset = 0;
+
+  // Load all the poses
+  visp::cnpy::NpyArray arr_vec_poses = npz_data["vec_poses"];
+  double *vec_poses_ptr = arr_vec_poses.data<double>();
+  assert(arr_vec_poses.shape.size() == 2);
+  assert(arr_vec_poses.shape[1] == 6);
+  size_t pose_size = arr_vec_poses.shape[1];
+
+  std::vector<double> times;
+
+  for (int iter = 0; iter < nb_data; iter++) {
+    // std::copy(vec_img_ptr + img_data_offset, vec_img_ptr + img_data_offset + vec_img_data_size_ptr[iter],
+    //     std::back_inserter(vec_img));
+    vec_img = std::vector<unsigned char>(vec_img_ptr + img_data_offset, vec_img_ptr + img_data_offset + vec_img_data_size_ptr[iter]);
+    double start = vpTime::measureTimeMs(), end = -1;
+    if (channel > 1) {
+      vpImageIo::readPNGfromMem(vec_img, I_display, backend);
+      end = vpTime::measureTimeMs();
+    }
+    else {
+      vpImageIo::readPNGfromMem(vec_img, I, backend);
+      end = vpTime::measureTimeMs();
+      vpImageConvert::convert(I, I_display);
+    }
+    times.push_back(end-start);
+    img_data_offset += vec_img_data_size_ptr[iter];
+
+    const std::string str_model_iter_sz = toString("model_%06d", iter) + "_sz";
+    visp::cnpy::NpyArray arr_model_iter_sz = npz_data[str_model_iter_sz];
+    size_t model_sz = *arr_model_iter_sz.data<size_t>();
+
+    for (size_t i = 0; i < model_sz; i++) {
+      char buffer[100];
+      int res = snprintf(buffer, 100, "model_%06d_%06zu", iter, i);
+      if (res > 0 && res < 100) {
+        std::string str_model_iter_data = buffer;
+        visp::cnpy::NpyArray arr_model_iter_data = npz_data[str_model_iter_data];
+
+        if (arr_model_iter_data.shape[0] >= 5) {
+          if (std::fabs(arr_model_iter_data.data<double>()[0]) <= std::numeric_limits<double>::epsilon()) {  // line feature
+            vpImageDraw::drawLine(I_display,
+              vpImagePoint(arr_model_iter_data.data<double>()[1], arr_model_iter_data.data<double>()[2]),
+              vpImagePoint(arr_model_iter_data.data<double>()[3], arr_model_iter_data.data<double>()[4]), vpColor::red, 3);
+          }
+        }
+      }
+    }
+
+    vpHomogeneousMatrix cMo(vpTranslationVector(vec_poses_ptr[pose_size*iter], vec_poses_ptr[pose_size*iter + 1], vec_poses_ptr[pose_size*iter + 2]),
+      vpThetaUVector(vec_poses_ptr[pose_size*iter + 3], vec_poses_ptr[pose_size*iter + 4], vec_poses_ptr[pose_size*iter + 5])
+    );
+
+    if (print_cMo) {
+      std::cout << "\ncMo:\n" << cMo << std::endl;
+    }
+
+    vpDisplay::display(I_display);
+    vpDisplay::displayFrame(I_display, cMo, cam, 0.025, vpColor::none, 3);
+    vpDisplay::flush(I_display);
+
+    vpTime::wait(30);
+  }
+
+  std::cout << "Mean time for image decoding: " << vpMath::getMean(times) << " ms ; Median time: "
+    << vpMath::getMedian(times) << " ms ; Std: " << vpMath::getStdev(times) << " ms" << std::endl;
+
+  vpDisplay::getClick(I_display, true);
+
+#else
+  std::cerr << "Error, a missing display library is needed (X11, GDI or OpenCV built with HighGUI module)."
+#endif
+
+    return EXIT_SUCCESS;
+}
diff --git a/tutorial/tracking/model-based/generic/tutorial-mb-generic-tracker-save.cpp b/tutorial/tracking/model-based/generic/tutorial-mb-generic-tracker-save.cpp
new file mode 100755
index 0000000000..446fc693af
--- /dev/null
+++ b/tutorial/tracking/model-based/generic/tutorial-mb-generic-tracker-save.cpp
@@ -0,0 +1,205 @@
+#include <visp3/core/vpIoTools.h>
+#include <visp3/io/vpImageIo.h>
+#include <visp3/mbt/vpMbGenericTracker.h>
+#include <visp3/io/vpVideoReader.h>
+
+namespace
+{
+std::vector<double> poseToVec(const vpHomogeneousMatrix &cMo)
+{
+  vpThetaUVector tu = cMo.getThetaUVector();
+  vpTranslationVector t = cMo.getTranslationVector();
+  std::vector<double> vec { t[0], t[1], t[2], tu[0], tu[1], tu[2] };
+
+  return vec;
+}
+
+// https://en.cppreference.com/w/cpp/io/c/fprintf
+std::string toString(const std::string &name, int val)
+{
+  auto fmt = name.c_str();
+  int sz = std::snprintf(nullptr, 0, fmt, val);
+  std::vector<char> buf(sz + 1);      // note +1 for null terminator
+  std::sprintf(buf.data(), fmt, val);
+  std::string str(buf.begin(), buf.end());
+
+  return str;
+}
+}
+
+int main(int argc, char **argv)
+{
+  bool opencv_backend = false;
+  std::string npz_filename = "npz_tracking_teabox.npz";
+  bool color_mode = false;
+  bool save_alpha = false;
+  bool print_cMo = false;
+
+  for (int i = 1; i < argc; i++) {
+    if (std::string(argv[i]) == "--cv-backend") {
+      opencv_backend = true;
+    }
+    else if ((std::string(argv[i]) == "--save" || std::string(argv[i]) == "-o") && i+1 < argc) {
+      npz_filename = argv[i+1];
+    }
+    else if (std::string(argv[i]) == "--color" || std::string(argv[i]) == "-c") {
+      color_mode = true;
+    }
+    else if (std::string(argv[i]) == "--alpha" || std::string(argv[i]) == "-a") {
+      save_alpha = true;
+    }
+    else if (std::string(argv[i]) == "--print-cMo" && i+1 < argc) {
+      print_cMo = true;
+    }
+    else {
+      std::cout << "Options:" << std::endl;
+      std::cout << "  --cv-backend   use OpenCV if available for in-memory PNG encoding" << std::endl;
+      std::cout << "  --save / -o    output filename" << std::endl;
+      std::cout << "  --color        save RGB data" << std::endl;
+      std::cout << "  --alpha        if --color opton, save RGBa data" << std::endl;
+      std::cout << "  --print-cMo    print cMo" << std::endl;
+      return EXIT_SUCCESS;
+    }
+  }
+
+  std::cout << "Save file: " << npz_filename << std::endl;
+  std::cout << "OpenCV backend? " << opencv_backend << std::endl;
+  std::cout << "Color image? " << color_mode << std::endl;
+  std::cout << "Save alpha channel? " << save_alpha << std::endl;
+
+  std::string opt_videoname = "model/teabox/teabox.mp4";
+  std::string opt_modelname = "model/teabox/teabox.cao";
+
+  std::string parentname = vpIoTools::getParent(opt_modelname);
+  std::string objectname = vpIoTools::getNameWE(opt_modelname);
+
+  if (!parentname.empty())
+    objectname = parentname + "/" + objectname;
+
+  std::cout << "Video name: " << opt_videoname << std::endl;
+
+  vpImage<unsigned char> I;
+  vpImage<vpRGBa> I_color;
+
+  vpVideoReader g;
+  g.setFileName(opt_videoname);
+  g.open(I);
+
+  vpMbGenericTracker tracker;
+  tracker.setTrackerType(vpMbGenericTracker::EDGE_TRACKER);
+
+  vpCameraParameters cam;
+  cam.initPersProjWithoutDistortion(839, 839, 325, 243);
+  tracker.setCameraParameters(cam);
+  tracker.loadModel(objectname + ".cao");
+
+  vpHomogeneousMatrix cMo;
+  cMo[0][0] = 0.4889237963; cMo[0][1] = 0.8664706489; cMo[0][2] = 0.1009065709; cMo[0][3] = -0.07010159786;
+  cMo[1][0] = 0.4218451176; cMo[1][1] = -0.1335995053; cMo[1][2] = -0.8967708007; cMo[1][3] = -0.08363026223;
+  cMo[2][0] = -0.7635445096; cMo[2][1] = 0.4810195286; cMo[2][2] = -0.4308363901; cMo[2][3] = 0.4510066725;
+  tracker.initFromPose(I, cMo);
+
+  const int height = I.getRows();
+  const int width = I.getCols();
+  int channel = 1;
+  if (color_mode) {
+    channel = save_alpha ? 4 : 3;
+  }
+
+  const vpImageIo::vpImageIoBackendType backend =
+    opencv_backend ? vpImageIo::IO_OPENCV_BACKEND : vpImageIo::IO_STB_IMAGE_BACKEND;
+
+  // vector of byte data to store encoded image data using PNG format
+  std::vector<unsigned char> img_buffer;
+
+  const std::string camera_name = "Camera";
+  std::vector<char> vec_camera_name(camera_name.begin(), camera_name.end());
+
+  visp::cnpy::npz_save(npz_filename, "camera_name", &vec_camera_name[0], { vec_camera_name.size() }, "w"); // overwrite
+  visp::cnpy::npz_save(npz_filename, "height", &height, { 1 }, "a"); // append
+  visp::cnpy::npz_save(npz_filename, "width", &width, { 1 }, "a");
+  visp::cnpy::npz_save(npz_filename, "channel", &channel, { 1 }, "a");
+
+  const double cam_px = cam.get_px(), cam_py = cam.get_py(), cam_u0 = cam.get_u0(), cam_v0 = cam.get_v0();
+  visp::cnpy::npz_save(npz_filename, "cam_px", &cam_px, { 1 }, "a");
+  visp::cnpy::npz_save(npz_filename, "cam_py", &cam_py, { 1 }, "a");
+  visp::cnpy::npz_save(npz_filename, "cam_u0", &cam_u0, { 1 }, "a");
+  visp::cnpy::npz_save(npz_filename, "cam_v0", &cam_v0, { 1 }, "a");
+
+  std::vector<double> vec_poses;
+  vec_poses.reserve(g.getLastFrameIndex() * 6);
+
+  std::vector<int> vec_img_data_size;
+  vec_img_data_size.reserve(g.getLastFrameIndex());
+  std::vector<unsigned char> vec_img_data;
+  vec_img_data.reserve(g.getLastFrameIndex() * height * width);
+
+  std::vector<double> times;
+  int iter = 0;
+  while (!g.end()) {
+    g.acquire(I);
+    tracker.track(I);
+    tracker.getPose(cMo);
+    if (print_cMo) {
+      std::cout << "\ncMo:\n" << cMo << std::endl;
+    }
+
+    vpImageConvert::convert(I, I_color);
+    double start = vpTime::measureTimeMs();
+    if (color_mode) {
+      vpImageIo::writePNGtoMem(I_color, img_buffer, backend, save_alpha);
+    }
+    else {
+      vpImageIo::writePNGtoMem(I, img_buffer, backend);
+    }
+    double end = vpTime::measureTimeMs();
+    times.push_back(end-start);
+    vec_img_data_size.push_back(img_buffer.size());
+    vec_img_data.insert(vec_img_data.end(), img_buffer.begin(), img_buffer.end());
+
+    std::vector<double> vec_pose = poseToVec(cMo);
+    vec_poses.insert(vec_poses.end(), vec_pose.begin(), vec_pose.end());
+
+    std::map<std::string, std::vector<std::vector<double> > > mapOfModels;
+    std::map<std::string, unsigned int> mapOfW;
+    mapOfW[camera_name] = I.getWidth();
+    std::map<std::string, unsigned int> mapOfH;
+    mapOfH[camera_name] = I.getHeight();
+    std::map<std::string, vpHomogeneousMatrix> mapOfcMos;
+    mapOfcMos[camera_name] = cMo;
+    std::map<std::string, vpCameraParameters> mapOfCams;
+    mapOfCams[camera_name] = cam;
+    tracker.getModelForDisplay(mapOfModels, mapOfW, mapOfH, mapOfcMos, mapOfCams);
+
+    std::vector<std::vector<double>> model = mapOfModels[camera_name];
+    const std::string model_iter = toString("model_%06d", iter);
+    const std::string model_iter_sz = model_iter + "_sz";
+    const size_t model_size = model.size();
+    visp::cnpy::npz_save(npz_filename, model_iter_sz, &model_size, { 1 }, "a");
+
+    for (size_t i = 0; i < model.size(); i++) {
+      char buffer[100];
+      int res = snprintf(buffer, 100, "model_%06d_%06zu", iter, i);
+      if (res > 0 && res < 100) {
+        const std::string model_iter_data = buffer;
+        std::vector<double> &vec_line = model[i];
+        visp::cnpy::npz_save(npz_filename, model_iter_data, &vec_line[0], { vec_line.size() }, "a");
+      }
+    }
+
+    iter++;
+  }
+
+  std::cout << "Mean time for image encoding: " << vpMath::getMean(times) << " ms ; Median time: "
+    << vpMath::getMedian(times) << " ms ; Std: " << vpMath::getStdev(times) << " ms" << std::endl;
+
+  visp::cnpy::npz_save(npz_filename, "vec_img_data_size", vec_img_data_size.data(), { vec_img_data_size.size() }, "a");
+  visp::cnpy::npz_save(npz_filename, "vec_img", vec_img_data.data(), { vec_img_data.size() }, "a");
+  // Show how to save a "multidimensional" array
+  assert(iter == vec_poses.size()/6);
+  visp::cnpy::npz_save(npz_filename, "vec_poses", vec_poses.data(), { static_cast<size_t>(iter), 6 }, "a");
+
+  visp::cnpy::npz_save(npz_filename, "nb_data", &iter, { 1 }, "a");
+
+  return EXIT_SUCCESS;
+}