Implement decoding of .xz files

LZMA decoding code taken from the public domain LZMA SDK by Igor Pavlov.
2025-06-23 15:30:45 -04:00 · 2015-08-06 13:14:37 +05:30 · 2015-08-06 13:14:37 +05:30 · 50e0da0804
commit 50e0da0804
parent df3f850407
13 changed files with 2638 additions and 2 deletions
--- a/12
+++ b/12
@ -8,8 +8,8 @@ Files: src/duktape/*
 Copyright: Various
 License: MIT

-Files: resources/rapydscript/compiler.js
-Copyright: Alexander Tsepkov
+Files: resources/rapydscript/*
+Copyright: Various
 License: BSD

 Files: src/unrar/*
@ -21,6 +21,14 @@ Files: src/html5lib/*
 Copyright: Copyright (c) 2006-2013 James Graham and other contributors
 License: Expat

+Files: src/lzma/*
+Copyright: Igor Pavlov
+License: Public Domain
+
+Files: src/lzma/*.py src/lzma/lzma_binding.c
+Copyright: Kovid Goyal
+License: GPLv3
+
 Files: src/templite/*
 Copyright: Copyright (c) 2009 joonis new media, Thimo Kraemer
 License: GPL-2+
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -66,6 +66,11 @@ if iswindows:

 extensions = [

+    Extension('lzma_binding',
+              glob.glob(os.path.join(SRC, 'lzma', '*.c')),
+              headers=glob.glob(os.path.join(SRC, 'lzma', '*.h')),
+              ),
+
    Extension('dukpy',
              ['duktape/%s.c' % x for x in 'errors context conversions proxy module duktape/duktape'.split()],
              headers=['duktape/dukpy.h', 'duktape/duktape/duktape.h'],
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -147,6 +147,7 @@ class Plugins(collections.Mapping):
                'tokenizer',
                'certgen',
                'dukpy',
+                'lzma_binding',
            ]
        if iswindows:
            plugins.extend(['winutil', 'wpd', 'winfonts'])
--- a/src/lzma/7zTypes.h
+++ b/src/lzma/7zTypes.h
@ -0,0 +1,256 @@
+/* 7zTypes.h -- Basic types
+2013-11-12 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_TYPES_H
+#define __7Z_TYPES_H
+
+#ifdef _WIN32
+/* #include <windows.h> */
+#endif
+
+#include <stddef.h>
+
+#ifndef EXTERN_C_BEGIN
+#ifdef __cplusplus
+#define EXTERN_C_BEGIN extern "C" {
+#define EXTERN_C_END }
+#else
+#define EXTERN_C_BEGIN
+#define EXTERN_C_END
+#endif
+#endif
+
+EXTERN_C_BEGIN
+
+#define SZ_OK 0
+
+#define SZ_ERROR_DATA 1
+#define SZ_ERROR_MEM 2
+#define SZ_ERROR_CRC 3
+#define SZ_ERROR_UNSUPPORTED 4
+#define SZ_ERROR_PARAM 5
+#define SZ_ERROR_INPUT_EOF 6
+#define SZ_ERROR_OUTPUT_EOF 7
+#define SZ_ERROR_READ 8
+#define SZ_ERROR_WRITE 9
+#define SZ_ERROR_PROGRESS 10
+#define SZ_ERROR_FAIL 11
+#define SZ_ERROR_THREAD 12
+
+#define SZ_ERROR_ARCHIVE 16
+#define SZ_ERROR_NO_ARCHIVE 17
+
+typedef int SRes;
+
+#ifdef _WIN32
+/* typedef DWORD WRes; */
+typedef unsigned WRes;
+#else
+typedef int WRes;
+#endif
+
+#ifndef RINOK
+#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
+#endif
+
+typedef unsigned char Byte;
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#ifdef _LZMA_UINT32_IS_ULONG
+typedef long Int32;
+typedef unsigned long UInt32;
+#else
+typedef int Int32;
+typedef unsigned int UInt32;
+#endif
+
+#ifdef _SZ_NO_INT_64
+
+/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
+   NOTES: Some code will work incorrectly in that case! */
+
+typedef long Int64;
+typedef unsigned long UInt64;
+
+#else
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+typedef __int64 Int64;
+typedef unsigned __int64 UInt64;
+#define UINT64_CONST(n) n
+#else
+typedef long long int Int64;
+typedef unsigned long long int UInt64;
+#define UINT64_CONST(n) n ## ULL
+#endif
+
+#endif
+
+#ifdef _LZMA_NO_SYSTEM_SIZE_T
+typedef UInt32 SizeT;
+#else
+typedef size_t SizeT;
+#endif
+
+typedef int Bool;
+#define True 1
+#define False 0
+
+
+#ifdef _WIN32
+#define MY_STD_CALL __stdcall
+#else
+#define MY_STD_CALL
+#endif
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1300
+#define MY_NO_INLINE __declspec(noinline)
+#else
+#define MY_NO_INLINE
+#endif
+
+#define MY_CDECL __cdecl
+#define MY_FAST_CALL __fastcall
+
+#else
+
+#define MY_NO_INLINE
+#define MY_CDECL
+#define MY_FAST_CALL
+
+#endif
+
+
+/* The following interfaces use first parameter as pointer to structure */
+
+typedef struct
+{
+  Byte (*Read)(void *p); /* reads one byte, returns 0 in case of EOF or error */
+} IByteIn;
+
+typedef struct
+{
+  void (*Write)(void *p, Byte b);
+} IByteOut;
+
+typedef struct
+{
+  SRes (*Read)(void *p, void *buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) < input(*size)) is allowed */
+} ISeqInStream;
+
+/* it can return SZ_ERROR_INPUT_EOF */
+SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size);
+SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType);
+SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf);
+
+typedef struct
+{
+  size_t (*Write)(void *p, const void *buf, size_t size);
+    /* Returns: result - the number of actually written bytes.
+       (result < size) means error */
+} ISeqOutStream;
+
+typedef enum
+{
+  SZ_SEEK_SET = 0,
+  SZ_SEEK_CUR = 1,
+  SZ_SEEK_END = 2
+} ESzSeek;
+
+typedef struct
+{
+  SRes (*Read)(void *p, void *buf, size_t *size);  /* same as ISeqInStream::Read */
+  SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
+} ISeekInStream;
+
+typedef struct
+{
+  SRes (*Look)(void *p, const void **buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) > input(*size)) is not allowed
+       (output(*size) < input(*size)) is allowed */
+  SRes (*Skip)(void *p, size_t offset);
+    /* offset must be <= output(*size) of Look */
+
+  SRes (*Read)(void *p, void *buf, size_t *size);
+    /* reads directly (without buffer). It's same as ISeqInStream::Read */
+  SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
+} ILookInStream;
+
+SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size);
+SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset);
+
+/* reads via ILookInStream::Read */
+SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType);
+SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size);
+
+#define LookToRead_BUF_SIZE (1 << 14)
+
+typedef struct
+{
+  ILookInStream s;
+  ISeekInStream *realStream;
+  size_t pos;
+  size_t size;
+  Byte buf[LookToRead_BUF_SIZE];
+} CLookToRead;
+
+void LookToRead_CreateVTable(CLookToRead *p, int lookahead);
+void LookToRead_Init(CLookToRead *p);
+
+typedef struct
+{
+  ISeqInStream s;
+  ILookInStream *realStream;
+} CSecToLook;
+
+void SecToLook_CreateVTable(CSecToLook *p);
+
+typedef struct
+{
+  ISeqInStream s;
+  ILookInStream *realStream;
+} CSecToRead;
+
+void SecToRead_CreateVTable(CSecToRead *p);
+
+typedef struct
+{
+  SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize);
+    /* Returns: result. (result != SZ_OK) means break.
+       Value (UInt64)(Int64)-1 for size means unknown value. */
+} ICompressProgress;
+
+typedef struct
+{
+  void *(*Alloc)(void *p, size_t size);
+  void (*Free)(void *p, void *address); /* address can be 0 */
+} ISzAlloc;
+
+#define IAlloc_Alloc(p, size) (p)->Alloc((p), size)
+#define IAlloc_Free(p, a) (p)->Free((p), a)
+
+#ifdef _WIN32
+
+#define CHAR_PATH_SEPARATOR '\\'
+#define WCHAR_PATH_SEPARATOR L'\\'
+#define STRING_PATH_SEPARATOR "\\"
+#define WSTRING_PATH_SEPARATOR L"\\"
+
+#else
+
+#define CHAR_PATH_SEPARATOR '/'
+#define WCHAR_PATH_SEPARATOR L'/'
+#define STRING_PATH_SEPARATOR "/"
+#define WSTRING_PATH_SEPARATOR L"/"
+
+#endif
+
+EXTERN_C_END
+
+#endif
--- a/src/lzma/Compiler.h
+++ b/src/lzma/Compiler.h
@ -0,0 +1,31 @@
+/* Compiler.h
+2015-03-25 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_COMPILER_H
+#define __7Z_COMPILER_H
+
+#ifdef _MSC_VER
+
+  #ifdef UNDER_CE
+    #define RPC_NO_WINDOWS_H
+    /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
+    #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
+    #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
+  #endif
+
+  #if _MSC_VER >= 1300
+    #pragma warning(disable : 4996) // This function or variable may be unsafe
+  #else
+    #pragma warning(disable : 4511) // copy constructor could not be generated
+    #pragma warning(disable : 4512) // assignment operator could not be generated
+    #pragma warning(disable : 4702) // unreachable code
+    #pragma warning(disable : 4710) // not inlined
+    #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
+  #endif
+
+#endif
+
+#define UNUSED_VAR(x) (void)x;
+/* #define UNUSED_VAR(x) x=x; */
+
+#endif
--- a/src/lzma/Lzma2Dec.c
+++ b/src/lzma/Lzma2Dec.c
@ -0,0 +1,378 @@
+/* Lzma2Dec.c -- LZMA2 Decoder
+2014-10-29 : Igor Pavlov : Public domain */
+
+/* #define SHOW_DEBUG_INFO */
+
+#include "Precomp.h"
+
+#ifdef SHOW_DEBUG_INFO
+#include <stdio.h>
+#endif
+
+#include <string.h>
+
+#include "Lzma2Dec.h"
+
+/*
+00000000  -  EOS
+00000001 U U  -  Uncompressed Reset Dic
+00000010 U U  -  Uncompressed No Reset
+100uuuuu U U P P  -  LZMA no reset
+101uuuuu U U P P  -  LZMA reset state
+110uuuuu U U P P S  -  LZMA reset state + new prop
+111uuuuu U U P P S  -  LZMA reset state + new prop + reset dic
+
+  u, U - Unpack Size
+  P - Pack Size
+  S - Props
+*/
+
+#define LZMA2_CONTROL_LZMA (1 << 7)
+#define LZMA2_CONTROL_COPY_NO_RESET 2
+#define LZMA2_CONTROL_COPY_RESET_DIC 1
+#define LZMA2_CONTROL_EOF 0
+
+#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & LZMA2_CONTROL_LZMA) == 0)
+
+#define LZMA2_GET_LZMA_MODE(p) (((p)->control >> 5) & 3)
+#define LZMA2_IS_THERE_PROP(mode) ((mode) >= 2)
+
+#define LZMA2_LCLP_MAX 4
+#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
+
+#ifdef SHOW_DEBUG_INFO
+#define PRF(x) x
+#else
+#define PRF(x)
+#endif
+
+typedef enum
+{
+  LZMA2_STATE_CONTROL,
+  LZMA2_STATE_UNPACK0,
+  LZMA2_STATE_UNPACK1,
+  LZMA2_STATE_PACK0,
+  LZMA2_STATE_PACK1,
+  LZMA2_STATE_PROP,
+  LZMA2_STATE_DATA,
+  LZMA2_STATE_DATA_CONT,
+  LZMA2_STATE_FINISHED,
+  LZMA2_STATE_ERROR
+} ELzma2State;
+
+static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props)
+{
+  UInt32 dicSize;
+  if (prop > 40)
+    return SZ_ERROR_UNSUPPORTED;
+  dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop);
+  props[0] = (Byte)LZMA2_LCLP_MAX;
+  props[1] = (Byte)(dicSize);
+  props[2] = (Byte)(dicSize >> 8);
+  props[3] = (Byte)(dicSize >> 16);
+  props[4] = (Byte)(dicSize >> 24);
+  return SZ_OK;
+}
+
+SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAlloc *alloc)
+{
+  Byte props[LZMA_PROPS_SIZE];
+  RINOK(Lzma2Dec_GetOldProps(prop, props));
+  return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
+}
+
+SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAlloc *alloc)
+{
+  Byte props[LZMA_PROPS_SIZE];
+  RINOK(Lzma2Dec_GetOldProps(prop, props));
+  return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
+}
+
+void Lzma2Dec_Init(CLzma2Dec *p)
+{
+  p->state = LZMA2_STATE_CONTROL;
+  p->needInitDic = True;
+  p->needInitState = True;
+  p->needInitProp = True;
+  LzmaDec_Init(&p->decoder);
+}
+
+static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
+{
+  switch (p->state)
+  {
+    case LZMA2_STATE_CONTROL:
+      p->control = b;
+      PRF(printf("\n %4X ", p->decoder.dicPos));
+      PRF(printf(" %2X", b));
+      if (p->control == 0)
+        return LZMA2_STATE_FINISHED;
+      if (LZMA2_IS_UNCOMPRESSED_STATE(p))
+      {
+        if ((p->control & 0x7F) > 2)
+          return LZMA2_STATE_ERROR;
+        p->unpackSize = 0;
+      }
+      else
+        p->unpackSize = (UInt32)(p->control & 0x1F) << 16;
+      return LZMA2_STATE_UNPACK0;
+    
+    case LZMA2_STATE_UNPACK0:
+      p->unpackSize |= (UInt32)b << 8;
+      return LZMA2_STATE_UNPACK1;
+    
+    case LZMA2_STATE_UNPACK1:
+      p->unpackSize |= (UInt32)b;
+      p->unpackSize++;
+      PRF(printf(" %8d", p->unpackSize));
+      return (LZMA2_IS_UNCOMPRESSED_STATE(p)) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0;
+    
+    case LZMA2_STATE_PACK0:
+      p->packSize = (UInt32)b << 8;
+      return LZMA2_STATE_PACK1;
+
+    case LZMA2_STATE_PACK1:
+      p->packSize |= (UInt32)b;
+      p->packSize++;
+      PRF(printf(" %8d", p->packSize));
+      return LZMA2_IS_THERE_PROP(LZMA2_GET_LZMA_MODE(p)) ? LZMA2_STATE_PROP:
+        (p->needInitProp ? LZMA2_STATE_ERROR : LZMA2_STATE_DATA);
+
+    case LZMA2_STATE_PROP:
+    {
+      unsigned lc, lp;
+      if (b >= (9 * 5 * 5))
+        return LZMA2_STATE_ERROR;
+      lc = b % 9;
+      b /= 9;
+      p->decoder.prop.pb = b / 5;
+      lp = b % 5;
+      if (lc + lp > LZMA2_LCLP_MAX)
+        return LZMA2_STATE_ERROR;
+      p->decoder.prop.lc = lc;
+      p->decoder.prop.lp = lp;
+      p->needInitProp = False;
+      return LZMA2_STATE_DATA;
+    }
+  }
+  return LZMA2_STATE_ERROR;
+}
+
+static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)
+{
+  memcpy(p->dic + p->dicPos, src, size);
+  p->dicPos += size;
+  if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size)
+    p->checkDicSize = p->prop.dicSize;
+  p->processedPos += (UInt32)size;
+}
+
+void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState);
+
+SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT inSize = *srcLen;
+  *srcLen = 0;
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+
+  while (p->state != LZMA2_STATE_FINISHED)
+  {
+    SizeT dicPos = p->decoder.dicPos;
+    
+    if (p->state == LZMA2_STATE_ERROR)
+      return SZ_ERROR_DATA;
+    
+    if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY)
+    {
+      *status = LZMA_STATUS_NOT_FINISHED;
+      return SZ_OK;
+    }
+
+    if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
+    {
+      if (*srcLen == inSize)
+      {
+        *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+        return SZ_OK;
+      }
+      (*srcLen)++;
+      p->state = Lzma2Dec_UpdateState(p, *src++);
+
+      if (dicPos == dicLimit && p->state != LZMA2_STATE_FINISHED)
+      {
+        p->state = LZMA2_STATE_ERROR;
+        return SZ_ERROR_DATA;
+      }
+      continue;
+    }
+    
+    {
+      SizeT destSizeCur = dicLimit - dicPos;
+      SizeT srcSizeCur = inSize - *srcLen;
+      ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY;
+      
+      if (p->unpackSize <= destSizeCur)
+      {
+        destSizeCur = (SizeT)p->unpackSize;
+        curFinishMode = LZMA_FINISH_END;
+      }
+
+      if (LZMA2_IS_UNCOMPRESSED_STATE(p))
+      {
+        if (*srcLen == inSize)
+        {
+          *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+          return SZ_OK;
+        }
+
+        if (p->state == LZMA2_STATE_DATA)
+        {
+          Bool initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC);
+          if (initDic)
+            p->needInitProp = p->needInitState = True;
+          else if (p->needInitDic)
+          {
+            p->state = LZMA2_STATE_ERROR;
+            return SZ_ERROR_DATA;
+          }
+          p->needInitDic = False;
+          LzmaDec_InitDicAndState(&p->decoder, initDic, False);
+        }
+
+        if (srcSizeCur > destSizeCur)
+          srcSizeCur = destSizeCur;
+
+        if (srcSizeCur == 0)
+        {
+          p->state = LZMA2_STATE_ERROR;
+          return SZ_ERROR_DATA;
+        }
+
+        LzmaDec_UpdateWithUncompressed(&p->decoder, src, srcSizeCur);
+
+        src += srcSizeCur;
+        *srcLen += srcSizeCur;
+        p->unpackSize -= (UInt32)srcSizeCur;
+        p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
+      }
+      else
+      {
+        SizeT outSizeProcessed;
+        SRes res;
+
+        if (p->state == LZMA2_STATE_DATA)
+        {
+          unsigned mode = LZMA2_GET_LZMA_MODE(p);
+          Bool initDic = (mode == 3);
+          Bool initState = (mode != 0);
+          if ((!initDic && p->needInitDic) || (!initState && p->needInitState))
+          {
+            p->state = LZMA2_STATE_ERROR;
+            return SZ_ERROR_DATA;
+          }
+          
+          LzmaDec_InitDicAndState(&p->decoder, initDic, initState);
+          p->needInitDic = False;
+          p->needInitState = False;
+          p->state = LZMA2_STATE_DATA_CONT;
+        }
+  
+        if (srcSizeCur > p->packSize)
+          srcSizeCur = (SizeT)p->packSize;
+          
+        res = LzmaDec_DecodeToDic(&p->decoder, dicPos + destSizeCur, src, &srcSizeCur, curFinishMode, status);
+        
+        src += srcSizeCur;
+        *srcLen += srcSizeCur;
+        p->packSize -= (UInt32)srcSizeCur;
+
+        outSizeProcessed = p->decoder.dicPos - dicPos;
+        p->unpackSize -= (UInt32)outSizeProcessed;
+
+        RINOK(res);
+        if (*status == LZMA_STATUS_NEEDS_MORE_INPUT)
+          return res;
+
+        if (srcSizeCur == 0 && outSizeProcessed == 0)
+        {
+          if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+              || p->unpackSize != 0
+              || p->packSize != 0)
+          {
+            p->state = LZMA2_STATE_ERROR;
+            return SZ_ERROR_DATA;
+          }
+          p->state = LZMA2_STATE_CONTROL;
+        }
+        
+        if (*status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK)
+          *status = LZMA_STATUS_NOT_FINISHED;
+      }
+    }
+  }
+  
+  *status = LZMA_STATUS_FINISHED_WITH_MARK;
+  return SZ_OK;
+}
+
+SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT outSize = *destLen, inSize = *srcLen;
+  *srcLen = *destLen = 0;
+  for (;;)
+  {
+    SizeT srcSizeCur = inSize, outSizeCur, dicPos;
+    ELzmaFinishMode curFinishMode;
+    SRes res;
+    if (p->decoder.dicPos == p->decoder.dicBufSize)
+      p->decoder.dicPos = 0;
+    dicPos = p->decoder.dicPos;
+    if (outSize > p->decoder.dicBufSize - dicPos)
+    {
+      outSizeCur = p->decoder.dicBufSize;
+      curFinishMode = LZMA_FINISH_ANY;
+    }
+    else
+    {
+      outSizeCur = dicPos + outSize;
+      curFinishMode = finishMode;
+    }
+
+    res = Lzma2Dec_DecodeToDic(p, outSizeCur, src, &srcSizeCur, curFinishMode, status);
+    src += srcSizeCur;
+    inSize -= srcSizeCur;
+    *srcLen += srcSizeCur;
+    outSizeCur = p->decoder.dicPos - dicPos;
+    memcpy(dest, p->decoder.dic + dicPos, outSizeCur);
+    dest += outSizeCur;
+    outSize -= outSizeCur;
+    *destLen += outSizeCur;
+    if (res != 0)
+      return res;
+    if (outSizeCur == 0 || outSize == 0)
+      return SZ_OK;
+  }
+}
+
+SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc)
+{
+  CLzma2Dec p;
+  SRes res;
+  SizeT outSize = *destLen, inSize = *srcLen;
+  *destLen = *srcLen = 0;
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+  Lzma2Dec_Construct(&p);
+  RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc));
+  p.decoder.dic = dest;
+  p.decoder.dicBufSize = outSize;
+  Lzma2Dec_Init(&p);
+  *srcLen = inSize;
+  res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
+  *destLen = p.decoder.dicPos;
+  if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+    res = SZ_ERROR_INPUT_EOF;
+  Lzma2Dec_FreeProbs(&p, alloc);
+  return res;
+}
--- a/src/lzma/Lzma2Dec.h
+++ b/src/lzma/Lzma2Dec.h
@ -0,0 +1,80 @@
+/* Lzma2Dec.h -- LZMA2 Decoder
+2015-05-13 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA2_DEC_H
+#define __LZMA2_DEC_H
+
+#include "LzmaDec.h"
+
+EXTERN_C_BEGIN
+
+/* ---------- State Interface ---------- */
+
+typedef struct
+{
+  CLzmaDec decoder;
+  UInt32 packSize;
+  UInt32 unpackSize;
+  unsigned state;
+  Byte control;
+  Bool needInitDic;
+  Bool needInitState;
+  Bool needInitProp;
+} CLzma2Dec;
+
+#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder)
+#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc);
+#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc);
+
+SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAlloc *alloc);
+SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAlloc *alloc);
+void Lzma2Dec_Init(CLzma2Dec *p);
+
+
+/*
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen or dicLimit).
+  LZMA_FINISH_ANY - use smallest number of input bytes
+  LZMA_FINISH_END - read EndOfStream marker after decoding
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_NEEDS_MORE_INPUT
+  SZ_ERROR_DATA - Data error
+*/
+
+SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- One Call Interface ---------- */
+
+/*
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - use smallest number of input bytes
+  LZMA_FINISH_END - read EndOfStream marker after decoding
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+  SZ_ERROR_DATA - Data error
+  SZ_ERROR_MEM  - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+*/
+
+SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc);
+
+EXTERN_C_END
+
+#endif
--- a/src/lzma/LzmaDec.c
+++ b/src/lzma/LzmaDec.c
--- a/src/lzma/LzmaDec.h
+++ b/src/lzma/LzmaDec.h
@ -0,0 +1,227 @@
+/* LzmaDec.h -- LZMA Decoder
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA_DEC_H
+#define __LZMA_DEC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/* #define _LZMA_PROB32 */
+/* _LZMA_PROB32 can increase the speed on some CPUs,
+   but memory usage for CLzmaDec::probs will be doubled in that case */
+
+#ifdef _LZMA_PROB32
+#define CLzmaProb UInt32
+#else
+#define CLzmaProb UInt16
+#endif
+
+
+/* ---------- LZMA Properties ---------- */
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaProps
+{
+  unsigned lc, lp, pb;
+  UInt32 dicSize;
+} CLzmaProps;
+
+/* LzmaProps_Decode - decodes properties
+Returns:
+  SZ_OK
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
+
+
+/* ---------- LZMA Decoder state ---------- */
+
+/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
+   Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
+
+#define LZMA_REQUIRED_INPUT_MAX 20
+
+typedef struct
+{
+  CLzmaProps prop;
+  CLzmaProb *probs;
+  Byte *dic;
+  const Byte *buf;
+  UInt32 range, code;
+  SizeT dicPos;
+  SizeT dicBufSize;
+  UInt32 processedPos;
+  UInt32 checkDicSize;
+  unsigned state;
+  UInt32 reps[4];
+  unsigned remainLen;
+  int needFlush;
+  int needInitState;
+  UInt32 numProbs;
+  unsigned tempBufSize;
+  Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
+} CLzmaDec;
+
+#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; }
+
+void LzmaDec_Init(CLzmaDec *p);
+
+/* There are two types of LZMA streams:
+     0) Stream with end mark. That end mark adds about 6 bytes to compressed size.
+     1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */
+
+typedef enum
+{
+  LZMA_FINISH_ANY,   /* finish at any point */
+  LZMA_FINISH_END    /* block must be finished at the end */
+} ELzmaFinishMode;
+
+/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
+
+   You must use LZMA_FINISH_END, when you know that current output buffer
+   covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
+
+   If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
+   and output value of destLen will be less than output buffer size limit.
+   You can check status result also.
+
+   You can use multiple checks to test data integrity after full decompression:
+     1) Check Result and "status" variable.
+     2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
+     3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
+        You must use correct finish mode in that case. */
+
+typedef enum
+{
+  LZMA_STATUS_NOT_SPECIFIED,               /* use main error code instead */
+  LZMA_STATUS_FINISHED_WITH_MARK,          /* stream was finished with end mark. */
+  LZMA_STATUS_NOT_FINISHED,                /* stream was not finished */
+  LZMA_STATUS_NEEDS_MORE_INPUT,            /* you must provide more input bytes */
+  LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK  /* there is probability that stream was finished without end mark */
+} ELzmaStatus;
+
+/* ELzmaStatus is used only as output value for function call */
+
+
+/* ---------- Interfaces ---------- */
+
+/* There are 3 levels of interfaces:
+     1) Dictionary Interface
+     2) Buffer Interface
+     3) One Call Interface
+   You can select any of these interfaces, but don't mix functions from different
+   groups for same object. */
+
+
+/* There are two variants to allocate state for Dictionary Interface:
+     1) LzmaDec_Allocate / LzmaDec_Free
+     2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
+   You can use variant 2, if you set dictionary buffer manually.
+   For Buffer Interface you must always use variant 1.
+
+LzmaDec_Allocate* can return:
+  SZ_OK
+  SZ_ERROR_MEM         - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+   
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc);
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc);
+
+SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc);
+void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc);
+
+/* ---------- Dictionary Interface ---------- */
+
+/* You can use it, if you want to eliminate the overhead for data copying from
+   dictionary to some other external buffer.
+   You must work with CLzmaDec variables directly in this interface.
+
+   STEPS:
+     LzmaDec_Constr()
+     LzmaDec_Allocate()
+     for (each new stream)
+     {
+       LzmaDec_Init()
+       while (it needs more decompression)
+       {
+         LzmaDec_DecodeToDic()
+         use data from CLzmaDec::dic and update CLzmaDec::dicPos
+       }
+     }
+     LzmaDec_Free()
+*/
+
+/* LzmaDec_DecodeToDic
+   
+   The decoding to internal dictionary buffer (CLzmaDec::dic).
+   You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (dicLimit).
+  LZMA_FINISH_ANY - Decode just dicLimit bytes.
+  LZMA_FINISH_END - Stream must be finished after dicLimit.
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_NEEDS_MORE_INPUT
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+*/
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- Buffer Interface ---------- */
+
+/* It's zlib-like interface.
+   See LzmaDec_DecodeToDic description for information about STEPS and return results,
+   but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
+   to work with CLzmaDec variables manually.
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+*/
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- One Call Interface ---------- */
+
+/* LzmaDecode
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+  SZ_ERROR_MEM  - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+*/
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+    ELzmaStatus *status, ISzAlloc *alloc);
+
+EXTERN_C_END
+
+#endif
--- a/src/lzma/Precomp.h
+++ b/src/lzma/Precomp.h
@ -0,0 +1,10 @@
+/* Precomp.h -- StdAfx
+2013-11-12 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_PRECOMP_H
+#define __7Z_PRECOMP_H
+
+#include "Compiler.h"
+/* #include "7zTypes.h" */
+
+#endif
--- a/src/lzma/init.py
+++ b/src/lzma/init.py
@ -0,0 +1,10 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
+
+
+
--- a/src/lzma/lzma_binding.c
+++ b/src/lzma/lzma_binding.c
@ -0,0 +1,189 @@
+/*
+ * lzma_binding.c
+ * Copyright (C) 2015 Kovid Goyal <kovid at kovidgoyal.net>
+ *
+ * Distributed under terms of the GPL3 license.
+ */
+
+#define PY_SSIZE_T_CLEAN
+#define UNICODE
+#include "Python.h"
+#include "Lzma2Dec.h"
+
+static void *Alloc(void *p, size_t size) { p = p; return PyMem_Malloc(size); }
+static void Free(void *p, void *address) { p = p; PyMem_Free(address); }
+static ISzAlloc allocator = { Alloc, Free };
+static const char* error_codes[18] = {
+    "OK",
+    "SZ_ERROR_DATA",
+    "SZ_ERROR_MEM",
+    "SZ_ERROR_CRC",
+    "SZ_ERROR_UNSUPPORTED",
+    "SZ_ERROR_PARAM",
+    "SZ_ERROR_INPUT_EOF",
+    "SZ_ERROR_OUTPUT_EOF",
+    "SZ_ERROR_READ",
+    "SZ_ERROR_WRITE",
+    "SZ_ERROR_PROGRESS",
+    "SZ_ERROR_FAIL",
+    "SZ_ERROR_THREAD",
+    "UNKNOWN", "UNKNOWN", "UNKNOWN",
+    "SZ_ERROR_ARCHIVE",
+    "SZ_ERROR_NO_ARCHIVE",
+};
+#define SET_ERROR(x) PyErr_SetString(LZMAError, ((x) > 0 && (x) < 17) ? error_codes[(x)] : "UNKNOWN")
+
+static PyObject *LZMAError = NULL;
+static UInt64 crc64_table[256];
+
+static void init_crc_table() {
+    static const UInt64 poly64 = (UInt64)(0xC96C5795D7870F42);
+    for (size_t i = 0; i < 256; ++i) {
+        UInt64 crc64 = i;
+        for (size_t j = 0; j < 8; ++j) {
+            if (crc64 & 1)
+                crc64 = (crc64 >> 1) ^ poly64;
+            else
+                crc64 >>= 1;
+        }
+        crc64_table[i] = crc64;
+    }
+}
+
+static PyObject *
+crc64(PyObject *self, PyObject *args) {
+    unsigned char *data = NULL;
+    Py_ssize_t size = 0;
+    UInt64 crc = 0;
+    if (!PyArg_ParseTuple(args, "s#|K", &data, &size, &crc)) return NULL;
+    crc = ~crc;
+    for (size_t i = 0; i < size; ++i)
+        crc = crc64_table[data[i] ^ (crc & 0xFF)] ^ (crc >> 8);
+
+    return Py_BuildValue("K", ~crc);
+}
+
+static PyObject*
+delta_decode(PyObject *self, PyObject *args) {
+    PyObject *array = NULL, *histarray = NULL;
+    unsigned char *data = NULL, pos = 0, *history = NULL;
+    unsigned int distance = 0;
+    Py_ssize_t datalen = 0;
+    if (!PyArg_ParseTuple(args, "O!O!BB", &PyByteArray_Type, &array, &PyByteArray_Type, &histarray, &pos, &distance)) return NULL;
+    if (PyByteArray_GET_SIZE(histarray) != 256) {
+        PyErr_SetString(PyExc_TypeError, "histarray must be 256 bytes long");
+        return NULL;
+    }
+    data = (unsigned char*)PyByteArray_AS_STRING(array); history = (unsigned char*)PyByteArray_AS_STRING(histarray);
+    datalen = PyBytes_GET_SIZE(array);
+
+    for (Py_ssize_t i = 0; i < datalen; i++) {
+        data[i] += history[(unsigned char)(pos + distance)]; 
+        history[pos--] = data[i];
+    }
+    return Py_BuildValue("B", pos);
+}
+
+static PyObject *
+decompress2(PyObject *self, PyObject *args) {
+    PyObject *read = NULL, *seek = NULL, *write = NULL, *rres = NULL;
+    unsigned long bufsize = 0, bytes_written = 0, bytes_read = 0, inbuf_pos = 0, inbuf_len = 0, leftover = 0;
+    unsigned char props = 0;
+    char *inbuf = NULL, *outbuf = NULL;
+    CLzma2Dec state;
+    SRes res = 0;
+    ELzmaStatus status = LZMA_STATUS_NOT_FINISHED;
+
+    if (!PyArg_ParseTuple(args, "OOOBk", &read, &seek, &write, &props, &bufsize)) return NULL;
+    
+    Lzma2Dec_Construct(&state);
+    res = Lzma2Dec_Allocate(&state, (Byte)props, &allocator);
+    if (res == SZ_ERROR_MEM) { PyErr_NoMemory(); return NULL; }
+    if (res != SZ_OK) { PyErr_SetString(PyExc_TypeError, "Incorrect stream properties"); goto exit; }
+    inbuf = (char*)PyMem_Malloc(bufsize);
+    outbuf = (char*)PyMem_Malloc(bufsize);
+    if (!inbuf || !outbuf) {PyErr_NoMemory(); goto exit;}
+
+    Lzma2Dec_Init(&state);
+
+    while (status != LZMA_STATUS_FINISHED_WITH_MARK) {
+        bytes_written = bufsize; bytes_read = inbuf_len - inbuf_pos;
+        Py_BEGIN_ALLOW_THREADS;
+        res = Lzma2Dec_DecodeToBuf(&state, (Byte*)outbuf, &bytes_written, (Byte*)(inbuf) + inbuf_pos, &bytes_read, LZMA_FINISH_ANY, &status);
+        Py_END_ALLOW_THREADS;
+        if (res != SZ_OK) { SET_ERROR(res); goto exit; }
+        if (bytes_written > 0) {
+            if(!PyObject_CallFunction(write, "s#", outbuf, bytes_written)) goto exit;
+        }
+        if (bytes_read > 0) inbuf_pos += bytes_read;
+        if (status == LZMA_STATUS_NEEDS_MORE_INPUT) {
+            leftover = inbuf_len - inbuf_pos;
+            inbuf_pos = 0;
+            if (!PyObject_CallFunction(seek, "ii", -leftover, SEEK_CUR)) goto exit;
+            rres = PyObject_CallFunction(read, "n", bufsize);
+            if (rres == NULL) goto exit;
+            inbuf_len = PyBytes_GET_SIZE(rres);
+            if (inbuf_len == 0) { PyErr_SetString(PyExc_ValueError, "LZMA2 block was truncated"); goto exit; }
+            memcpy(inbuf, PyBytes_AS_STRING(rres), inbuf_len);
+            Py_DECREF(rres); rres = NULL;
+        } 
+    }
+    leftover = inbuf_len - inbuf_pos;
+    if (leftover > 0) {
+        if (!PyObject_CallFunction(seek, "ii", -leftover, SEEK_CUR)) goto exit;
+    }
+
+
+exit:
+    Lzma2Dec_Free(&state, &allocator);
+    PyMem_Free(inbuf); PyMem_Free(outbuf);
+    if (PyErr_Occurred()) return NULL;
+    Py_RETURN_NONE;
+}
+
+static PyMethodDef lzma_binding_methods[] = {
+    {"decompress2", decompress2, METH_VARARGS,
+        "Decompress an LZMA2 encoded block, of unknown compressed size (reads till LZMA2 EOS marker)"
+    },
+
+    {"crc64", crc64, METH_VARARGS,
+        "crc64(bytes) -> CRC 64 for the provided python bytes object"
+    },
+
+    {"delta_decode", delta_decode, METH_VARARGS,
+        "delta_decode(rawarray, histarray, pos, distance) -> Apply the delta decode filter to the bytearray rawarray"
+    },
+
+    {NULL, NULL, 0, NULL}
+};
+
+
+PyMODINIT_FUNC
+initlzma_binding(void) {
+    PyObject *m = NULL;
+    init_crc_table();
+    LZMAError = PyErr_NewException("lzma_binding.error", NULL, NULL);
+    if (!LZMAError) return;
+    m = Py_InitModule3("lzma_binding", lzma_binding_methods,
+    "Bindings to the LZMA (de)compression C code"
+    );
+    Py_INCREF(LZMAError);
+    PyModule_AddObject(m, "error", LZMAError);
+    PyModule_AddIntMacro(m, SZ_OK);
+    PyModule_AddIntMacro(m, SZ_ERROR_DATA);
+    PyModule_AddIntMacro(m, SZ_ERROR_MEM);
+    PyModule_AddIntMacro(m, SZ_ERROR_CRC);
+    PyModule_AddIntMacro(m, SZ_ERROR_UNSUPPORTED);
+    PyModule_AddIntMacro(m, SZ_ERROR_PARAM);
+    PyModule_AddIntMacro(m, SZ_ERROR_INPUT_EOF);
+    PyModule_AddIntMacro(m, SZ_ERROR_OUTPUT_EOF);
+    PyModule_AddIntMacro(m, SZ_ERROR_READ);
+    PyModule_AddIntMacro(m, SZ_ERROR_WRITE);
+    PyModule_AddIntMacro(m, SZ_ERROR_PROGRESS);
+    PyModule_AddIntMacro(m, SZ_ERROR_FAIL);
+    PyModule_AddIntMacro(m, SZ_ERROR_THREAD);
+    PyModule_AddIntMacro(m, SZ_ERROR_ARCHIVE);
+    PyModule_AddIntMacro(m, SZ_ERROR_NO_ARCHIVE);
+
+    if (m == NULL) return;
+}
--- a/src/lzma/xz.py
+++ b/src/lzma/xz.py
@ -0,0 +1,350 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from collections import namedtuple
+from io import BytesIO
+from hashlib import sha256
+from struct import unpack, error as struct_error
+from binascii import crc32 as _crc32
+
+from calibre.constants import plugins
+from calibre.ptempfile import SpooledTemporaryFile
+
+lzma = plugins['lzma_binding'][0]
+if not lzma:
+    raise RuntimeError('Failed to load lzma_binding module with error: %s' % plugins['lzma_binding'][1])
+
+HEADER_MAGIC = b'\xfd7zXZ\0'
+DELTA_FILTER_ID = 0x03
+LZMA2_FILTER_ID = 0x21
+
+def crc32(raw, start=0):
+    return 0xFFFFFFFF & _crc32(raw, start)
+
+class XZError(ValueError):
+    pass
+
+class NotXZ(XZError):
+    pass
+
+class InvalidXZ(XZError):
+    pass
+
+def decode_var_int(f):
+    ans, i, ch = 0, -1, 0x80
+    while ch >= 0x80:
+        ch = ord(f.read(1))
+        i += 1
+        if ch == 0:
+            return 0
+        ans |= (ch & 0x7f) << (i * 7)
+    return ans
+
+def decode_var_int2(raw, pos):
+    ans, ch, opos = 0, 0x80, pos
+    while ch >= 0x80:
+        ch = ord(raw[pos])
+        if ch == 0:
+            return 0, pos
+        ans |= (ch & 0x7f) << ((pos - opos) * 7)
+        pos += 1
+    return ans, pos
+
+def encode_var_int(num):
+    if num == 0:
+        return b'\0'
+    buf = bytearray()
+    a = buf.append
+    while num != 0:
+        a(0x80 | (num & 0x7F))
+        num >>= 7
+    buf[-1] &= 0x7F
+    return bytes(buf)
+
+def read_stream_header(f):
+    try:
+        magic, stream_flags1, stream_flags2, crc = unpack(b'<6s2BI', f.read(12))
+    except struct_error as e:
+        raise NotXZ('Not an XZ file. Invalid stream header: ' % e)
+    if magic != HEADER_MAGIC:
+        raise NotXZ('Not an XZ file. Header Magic is: %r' % magic)
+    if stream_flags1 != 0:
+        raise InvalidXZ('Stream flags first byte is not null')
+    check_type, reserved = 0x0f & stream_flags2, 0xf0 & stream_flags2
+    if reserved != 0:
+        raise InvalidXZ('Stream flags reserved bits not null')
+    if crc32(bytes(bytearray([stream_flags1, stream_flags2]))) != crc:
+        raise InvalidXZ('Stream flags header CRC incorrect')
+    return check_type
+
+class CRCChecker(object):
+
+    def __init__(self, check_type):
+        self.code = 0
+        if check_type == 0x1:
+            self.func = crc32
+            self.size = 4
+            self.fmt = b'<I'
+        else:
+            self.func = lzma.crc64
+            self.size = 8
+            self.fmt = b'<Q'
+
+    def __call__(self, raw):
+        self.code = self.func(raw, self.code)
+
+    def finish(self):
+        if self.func is not crc32:
+            self.code = 0xFFFFFFFFFFFFFFFFL & self.code
+
+    def check(self, raw):
+        return self.code == unpack(self.fmt, raw)[0]
+
+class Sha256Checker(object):
+
+    def __init__(self):
+        self.h = sha256()
+        self.func = self.h.update
+        self.code = None
+        self.size = 32
+
+    def __call__(self, raw):
+        self.func(raw)
+
+    def finish(self):
+        self.code = self.h.digest()
+        self.h = self.func = None
+
+    def check(self, raw):
+        return self.code == raw
+
+class DummyChecker(object):
+
+    size = 0
+
+    def __call__(self, raw):
+        pass
+
+    def finish(self):
+        pass
+
+class LZMA2Filter(object):
+
+    BUFSIZE = 10  # MB
+
+    def __init__(self, props, check_type, bufsize=None):
+        if len(props) != 1:
+            raise InvalidXZ('Invalid properties length for LZMA2 filter')
+        props = ord(props)
+        self.dictionary_size = props & 0x3F
+        if props & 0xC0 != 0:
+            raise InvalidXZ('Invalid high bytes for LZMA2 filter properties')
+        self.props = props
+        if check_type in (0x1, 0x4):
+            self.crc = CRCChecker(check_type)
+        elif check_type == 0x0A:
+            self.crc = Sha256Checker()
+        else:
+            if check_type:
+                raise InvalidXZ('Unsupported CRC check type: %s' % check_type)
+            self.crc = DummyChecker()
+        if bufsize is None:
+            bufsize = self.BUFSIZE
+        self.bufsize = int(bufsize * 1024 * 1024)
+
+    def __call__(self, f, outfile, filters):
+        w = outfile.write
+        c = self.crc
+        def write(raw):
+            if filters:
+                raw = bytearray(raw)
+                for flt in filters:
+                    raw = flt(raw)
+                raw = bytes(raw)
+            w(raw), c(raw)
+        try:
+            lzma.decompress2(f.read, f.seek, write, self.props, self.bufsize)
+        except lzma.error as e:
+            raise InvalidXZ('Failed to decode LZMA2 block with error code: %s' % e.message)
+        self.crc.finish()
+
+class DeltaFilter(object):
+
+    def __init__(self, props, *args):
+        if len(props) != 1:
+            raise InvalidXZ('Invalid properties length for Delta filter')
+        self.distance = ord(props) + 1
+        self.pos = 0
+        self.history = bytearray(256)
+
+    def __call__(self, raw):
+        self.pos = lzma.delta_decode(raw, self.history, self.pos, self.distance)
+        return raw
+
+def test_delta_filter():
+    raw = b'\xA1\xB1\x01\x02\x01\x02\x01\x02'
+    draw = b'\xA1\xB1\xA2\xB3\xA3\xB5\xA4\xB7'
+    def eq(s, d):
+        if s != d:
+            raise ValueError('%r != %r' % (s, d))
+    eq(draw, bytes(DeltaFilter(b'\x01')(bytearray(raw))))
+    f = DeltaFilter(b'\x01')
+    for ch, dch in zip(raw, draw):
+        eq(dch, bytes(f(bytearray(ch))))
+
+
+Block = namedtuple('Block', 'unpadded_size uncompressed_size')
+
+def read_block_header(f, block_header_size_, check_type):
+    block_header_size = 4 * (ord(block_header_size_) + 1)
+    if block_header_size < 8:
+        raise InvalidXZ('Invalid block header size: %d' % block_header_size)
+    header, crc = unpack(b'<%dsI' % (block_header_size - 5), f.read(block_header_size - 1))
+    if crc != crc32(block_header_size_ + header):
+        raise InvalidXZ('Block header CRC mismatch')
+    block_flags = ord(header[0])
+    number_of_filters = (0x03 & block_flags) + 1
+    if not (0 < number_of_filters <= 4):
+        raise InvalidXZ('Invalid number of filters: %d' % number_of_filters)
+    if block_flags & 0x3c != 0:
+        raise InvalidXZ('Non-zero reserved bits in block flags')
+    has_compressed_size = block_flags & 0x40
+    has_uncompressed_size = block_flags & 0x80
+    compressed_size = uncompressed_size = None
+    pos = 1
+    if has_compressed_size:
+        compressed_size, pos = decode_var_int2(header, pos)
+    if has_uncompressed_size:
+        uncompressed_size, pos = decode_var_int2(header, pos)
+    filters = []
+    while number_of_filters:
+        number_of_filters -= 1
+        filter_id, pos = decode_var_int2(header, pos)
+        size_of_properties, pos = decode_var_int2(header, pos)
+        if filter_id >= 0x4000000000000000:
+            raise InvalidXZ('Invalid filter id: %d' % filter_id)
+        if filter_id not in (LZMA2_FILTER_ID, DELTA_FILTER_ID):
+            raise InvalidXZ('Unsupported filter ID: 0x%x' % filter_id)
+        props = header[pos:pos+size_of_properties]
+        pos += size_of_properties
+        if len(props) != size_of_properties:
+            raise InvalidXZ('Incomplete filter properties')
+        if filter_id == LZMA2_FILTER_ID and number_of_filters:
+            raise InvalidXZ('LZMA2 filter must be the last filter')
+        elif filter_id == DELTA_FILTER_ID and not number_of_filters:
+            raise InvalidXZ('Delta filter cannot be the last filter')
+        filters.append((LZMA2Filter if filter_id == LZMA2_FILTER_ID else DeltaFilter)(props, check_type))
+    padding = header[pos:]
+    if padding.lstrip(b'\0'):
+        raise InvalidXZ('Non-null block header padding: %r' % padding)
+    return filters, compressed_size, uncompressed_size
+
+def read_block(f, block_header_size_, check_type, outfile):
+    start_pos = f.tell() - 1
+    filters, compressed_size, uncompressed_size = read_block_header(f, block_header_size_, check_type)
+    fpos, opos = f.tell(), outfile.tell()
+    filters.reverse()
+    filters[0](f, outfile, filters[1:])
+    actual_compressed_size = f.tell() - fpos
+    uncompressed_actual_size = outfile.tell() - opos
+    if uncompressed_size is not None and uncompressed_size != uncompressed_actual_size:
+        raise InvalidXZ('Uncompressed size for block does not match')
+    if compressed_size is not None and compressed_size != actual_compressed_size:
+        raise InvalidXZ('Compressed size for block does not match')
+    padding_count = f.tell() % 4
+    if padding_count:
+        padding_count = 4 - padding_count
+        padding = f.read(padding_count)
+        if len(padding) != padding_count:
+            raise InvalidXZ('Block is not aligned')
+        if padding.lstrip(b'\0'):
+            raise InvalidXZ('Block padding has non null bytes')
+    if check_type:
+        q = f.read(filters[0].crc.size)
+        if not filters[0].crc.check(q):
+            raise InvalidXZ('CRC for data does not match')
+    return Block(f.tell() - padding_count - start_pos, uncompressed_actual_size)
+
+def read_index(f):
+    pos = f.tell() - 1
+    number_of_records = decode_var_int(f)
+    while number_of_records:
+        number_of_records -= 1
+        unpadded_size = decode_var_int(f)
+        if unpadded_size < 1:
+            raise InvalidXZ('Invalid unpadded size in index: %d' % unpadded_size)
+        yield Block(unpadded_size, decode_var_int(f))
+    if f.tell() % 4:
+        padding_count = 4 - f.tell() % 4
+        padding = f.read(padding_count)
+        if len(padding) != padding_count or padding.lstrip(b'\0'):
+            raise InvalidXZ('Incorrect Index padding')
+    epos = f.tell()
+    f.seek(pos)
+    raw = f.read(epos - pos)
+    crc, = unpack(b'<I', f.read(4))
+    if crc != crc32(raw):
+        raise InvalidXZ('Index field CRC mismatch')
+
+def read_stream_footer(f, check_type, index_size):
+    crc, = unpack(b'<I', f.read(4))
+    raw = f.read(6)
+    backward_size, stream_flags1, stream_flags2 = unpack(b'<I2B', raw)
+    if stream_flags1 != 0 or stream_flags2 & 0xf0 != 0 or stream_flags2 & 0xf != check_type:
+        raise InvalidXZ('Footer stream flags != header stream flags')
+    backward_size = 4 * (1 + backward_size)
+    if backward_size != index_size:
+        raise InvalidXZ('Footer backward size != actual index size')
+    if f.read(2) != b'YZ':
+        raise InvalidXZ('Stream footer has incorrect magic bytes')
+    if crc != crc32(raw):
+        raise InvalidXZ('Stream footer CRC mismatch')
+
+def read_stream(f, outfile):
+    check_type = read_stream_header(f)
+    blocks, index = [], None
+    index_size = 0
+    while True:
+        sz = f.read(1)
+        if sz == b'\0':
+            pos = f.tell() - 1
+            index = tuple(read_index(f))
+            index_size = f.tell() - pos
+            break
+        else:
+            blocks.append(read_block(f, sz, check_type, outfile))
+    if index != tuple(blocks):
+        raise InvalidXZ('Index does not match actual blocks in file')
+    read_stream_footer(f, check_type, index_size)
+
+def decompress(raw, outfile=None):
+    if isinstance(raw, bytes):
+        raw = BytesIO(raw)
+    outfile = outfile or SpooledTemporaryFile(50 * 1024 * 1024, '_xz_decompress')
+    outfile.seek(0)
+    while True:
+        read_stream(raw, outfile)
+        pos = raw.tell()
+        trail = raw.read(1024)
+        if len(trail) < 20:
+            break
+        idx = trail.find(HEADER_MAGIC)
+        if idx == -1:
+            break
+        if idx > -1:
+            # Found another stream
+            raw.seek(pos)
+            if idx:
+                padding = raw.read(idx)
+                if padding.lstrip(b'\0') or len(padding) % 4:
+                    raise InvalidXZ('Found trailing garbage between streams')
+    return outfile
+
+if __name__ == '__main__':
+    import sys
+    decompress(open(sys.argv[-1], 'rb'))