Implement decoding of .xz files

LZMA decoding code taken from the public domain LZMA SDK by Igor Pavlov.
This commit is contained in:
Kovid Goyal 2015-08-06 13:14:37 +05:30
parent df3f850407
commit 50e0da0804
13 changed files with 2638 additions and 2 deletions

View File

@ -8,8 +8,8 @@ Files: src/duktape/*
Copyright: Various
License: MIT
Files: resources/rapydscript/compiler.js
Copyright: Alexander Tsepkov
Files: resources/rapydscript/*
Copyright: Various
License: BSD
Files: src/unrar/*
@ -21,6 +21,14 @@ Files: src/html5lib/*
Copyright: Copyright (c) 2006-2013 James Graham and other contributors
License: Expat
Files: src/lzma/*
Copyright: Igor Pavlov
License: Public Domain
Files: src/lzma/*.py src/lzma/lzma_binding.c
Copyright: Kovid Goyal
License: GPLv3
Files: src/templite/*
Copyright: Copyright (c) 2009 joonis new media, Thimo Kraemer
License: GPL-2+

View File

@ -66,6 +66,11 @@ if iswindows:
extensions = [
Extension('lzma_binding',
glob.glob(os.path.join(SRC, 'lzma', '*.c')),
headers=glob.glob(os.path.join(SRC, 'lzma', '*.h')),
),
Extension('dukpy',
['duktape/%s.c' % x for x in 'errors context conversions proxy module duktape/duktape'.split()],
headers=['duktape/dukpy.h', 'duktape/duktape/duktape.h'],

View File

@ -147,6 +147,7 @@ class Plugins(collections.Mapping):
'tokenizer',
'certgen',
'dukpy',
'lzma_binding',
]
if iswindows:
plugins.extend(['winutil', 'wpd', 'winfonts'])

256
src/lzma/7zTypes.h Normal file
View File

@ -0,0 +1,256 @@
/* 7zTypes.h -- Basic types
2013-11-12 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H
#define __7Z_TYPES_H
#ifdef _WIN32
/* #include <windows.h> */
#endif
#include <stddef.h>
#ifndef EXTERN_C_BEGIN
#ifdef __cplusplus
#define EXTERN_C_BEGIN extern "C" {
#define EXTERN_C_END }
#else
#define EXTERN_C_BEGIN
#define EXTERN_C_END
#endif
#endif
EXTERN_C_BEGIN
#define SZ_OK 0
#define SZ_ERROR_DATA 1
#define SZ_ERROR_MEM 2
#define SZ_ERROR_CRC 3
#define SZ_ERROR_UNSUPPORTED 4
#define SZ_ERROR_PARAM 5
#define SZ_ERROR_INPUT_EOF 6
#define SZ_ERROR_OUTPUT_EOF 7
#define SZ_ERROR_READ 8
#define SZ_ERROR_WRITE 9
#define SZ_ERROR_PROGRESS 10
#define SZ_ERROR_FAIL 11
#define SZ_ERROR_THREAD 12
#define SZ_ERROR_ARCHIVE 16
#define SZ_ERROR_NO_ARCHIVE 17
typedef int SRes;
#ifdef _WIN32
/* typedef DWORD WRes; */
typedef unsigned WRes;
#else
typedef int WRes;
#endif
#ifndef RINOK
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
#endif
typedef unsigned char Byte;
typedef short Int16;
typedef unsigned short UInt16;
#ifdef _LZMA_UINT32_IS_ULONG
typedef long Int32;
typedef unsigned long UInt32;
#else
typedef int Int32;
typedef unsigned int UInt32;
#endif
#ifdef _SZ_NO_INT_64
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
NOTES: Some code will work incorrectly in that case! */
typedef long Int64;
typedef unsigned long UInt64;
#else
#if defined(_MSC_VER) || defined(__BORLANDC__)
typedef __int64 Int64;
typedef unsigned __int64 UInt64;
#define UINT64_CONST(n) n
#else
typedef long long int Int64;
typedef unsigned long long int UInt64;
#define UINT64_CONST(n) n ## ULL
#endif
#endif
#ifdef _LZMA_NO_SYSTEM_SIZE_T
typedef UInt32 SizeT;
#else
typedef size_t SizeT;
#endif
typedef int Bool;
#define True 1
#define False 0
#ifdef _WIN32
#define MY_STD_CALL __stdcall
#else
#define MY_STD_CALL
#endif
#ifdef _MSC_VER
#if _MSC_VER >= 1300
#define MY_NO_INLINE __declspec(noinline)
#else
#define MY_NO_INLINE
#endif
#define MY_CDECL __cdecl
#define MY_FAST_CALL __fastcall
#else
#define MY_NO_INLINE
#define MY_CDECL
#define MY_FAST_CALL
#endif
/* The following interfaces use first parameter as pointer to structure */
typedef struct
{
Byte (*Read)(void *p); /* reads one byte, returns 0 in case of EOF or error */
} IByteIn;
typedef struct
{
void (*Write)(void *p, Byte b);
} IByteOut;
typedef struct
{
SRes (*Read)(void *p, void *buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) < input(*size)) is allowed */
} ISeqInStream;
/* it can return SZ_ERROR_INPUT_EOF */
SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size);
SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType);
SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf);
typedef struct
{
size_t (*Write)(void *p, const void *buf, size_t size);
/* Returns: result - the number of actually written bytes.
(result < size) means error */
} ISeqOutStream;
typedef enum
{
SZ_SEEK_SET = 0,
SZ_SEEK_CUR = 1,
SZ_SEEK_END = 2
} ESzSeek;
typedef struct
{
SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */
SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
} ISeekInStream;
typedef struct
{
SRes (*Look)(void *p, const void **buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) > input(*size)) is not allowed
(output(*size) < input(*size)) is allowed */
SRes (*Skip)(void *p, size_t offset);
/* offset must be <= output(*size) of Look */
SRes (*Read)(void *p, void *buf, size_t *size);
/* reads directly (without buffer). It's same as ISeqInStream::Read */
SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
} ILookInStream;
SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size);
SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset);
/* reads via ILookInStream::Read */
SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType);
SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size);
#define LookToRead_BUF_SIZE (1 << 14)
typedef struct
{
ILookInStream s;
ISeekInStream *realStream;
size_t pos;
size_t size;
Byte buf[LookToRead_BUF_SIZE];
} CLookToRead;
void LookToRead_CreateVTable(CLookToRead *p, int lookahead);
void LookToRead_Init(CLookToRead *p);
typedef struct
{
ISeqInStream s;
ILookInStream *realStream;
} CSecToLook;
void SecToLook_CreateVTable(CSecToLook *p);
typedef struct
{
ISeqInStream s;
ILookInStream *realStream;
} CSecToRead;
void SecToRead_CreateVTable(CSecToRead *p);
typedef struct
{
SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize);
/* Returns: result. (result != SZ_OK) means break.
Value (UInt64)(Int64)-1 for size means unknown value. */
} ICompressProgress;
typedef struct
{
void *(*Alloc)(void *p, size_t size);
void (*Free)(void *p, void *address); /* address can be 0 */
} ISzAlloc;
#define IAlloc_Alloc(p, size) (p)->Alloc((p), size)
#define IAlloc_Free(p, a) (p)->Free((p), a)
#ifdef _WIN32
#define CHAR_PATH_SEPARATOR '\\'
#define WCHAR_PATH_SEPARATOR L'\\'
#define STRING_PATH_SEPARATOR "\\"
#define WSTRING_PATH_SEPARATOR L"\\"
#else
#define CHAR_PATH_SEPARATOR '/'
#define WCHAR_PATH_SEPARATOR L'/'
#define STRING_PATH_SEPARATOR "/"
#define WSTRING_PATH_SEPARATOR L"/"
#endif
EXTERN_C_END
#endif

31
src/lzma/Compiler.h Normal file
View File

@ -0,0 +1,31 @@
/* Compiler.h
2015-03-25 : Igor Pavlov : Public domain */
#ifndef __7Z_COMPILER_H
#define __7Z_COMPILER_H
#ifdef _MSC_VER
#ifdef UNDER_CE
#define RPC_NO_WINDOWS_H
/* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
#pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
#pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
#endif
#if _MSC_VER >= 1300
#pragma warning(disable : 4996) // This function or variable may be unsafe
#else
#pragma warning(disable : 4511) // copy constructor could not be generated
#pragma warning(disable : 4512) // assignment operator could not be generated
#pragma warning(disable : 4702) // unreachable code
#pragma warning(disable : 4710) // not inlined
#pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
#endif
#endif
#define UNUSED_VAR(x) (void)x;
/* #define UNUSED_VAR(x) x=x; */
#endif

378
src/lzma/Lzma2Dec.c Normal file
View File

@ -0,0 +1,378 @@
/* Lzma2Dec.c -- LZMA2 Decoder
2014-10-29 : Igor Pavlov : Public domain */
/* #define SHOW_DEBUG_INFO */
#include "Precomp.h"
#ifdef SHOW_DEBUG_INFO
#include <stdio.h>
#endif
#include <string.h>
#include "Lzma2Dec.h"
/*
00000000 - EOS
00000001 U U - Uncompressed Reset Dic
00000010 U U - Uncompressed No Reset
100uuuuu U U P P - LZMA no reset
101uuuuu U U P P - LZMA reset state
110uuuuu U U P P S - LZMA reset state + new prop
111uuuuu U U P P S - LZMA reset state + new prop + reset dic
u, U - Unpack Size
P - Pack Size
S - Props
*/
#define LZMA2_CONTROL_LZMA (1 << 7)
#define LZMA2_CONTROL_COPY_NO_RESET 2
#define LZMA2_CONTROL_COPY_RESET_DIC 1
#define LZMA2_CONTROL_EOF 0
#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & LZMA2_CONTROL_LZMA) == 0)
#define LZMA2_GET_LZMA_MODE(p) (((p)->control >> 5) & 3)
#define LZMA2_IS_THERE_PROP(mode) ((mode) >= 2)
#define LZMA2_LCLP_MAX 4
#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
#ifdef SHOW_DEBUG_INFO
#define PRF(x) x
#else
#define PRF(x)
#endif
typedef enum
{
LZMA2_STATE_CONTROL,
LZMA2_STATE_UNPACK0,
LZMA2_STATE_UNPACK1,
LZMA2_STATE_PACK0,
LZMA2_STATE_PACK1,
LZMA2_STATE_PROP,
LZMA2_STATE_DATA,
LZMA2_STATE_DATA_CONT,
LZMA2_STATE_FINISHED,
LZMA2_STATE_ERROR
} ELzma2State;
static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props)
{
UInt32 dicSize;
if (prop > 40)
return SZ_ERROR_UNSUPPORTED;
dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop);
props[0] = (Byte)LZMA2_LCLP_MAX;
props[1] = (Byte)(dicSize);
props[2] = (Byte)(dicSize >> 8);
props[3] = (Byte)(dicSize >> 16);
props[4] = (Byte)(dicSize >> 24);
return SZ_OK;
}
SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAlloc *alloc)
{
Byte props[LZMA_PROPS_SIZE];
RINOK(Lzma2Dec_GetOldProps(prop, props));
return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
}
SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAlloc *alloc)
{
Byte props[LZMA_PROPS_SIZE];
RINOK(Lzma2Dec_GetOldProps(prop, props));
return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
}
void Lzma2Dec_Init(CLzma2Dec *p)
{
p->state = LZMA2_STATE_CONTROL;
p->needInitDic = True;
p->needInitState = True;
p->needInitProp = True;
LzmaDec_Init(&p->decoder);
}
static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
{
switch (p->state)
{
case LZMA2_STATE_CONTROL:
p->control = b;
PRF(printf("\n %4X ", p->decoder.dicPos));
PRF(printf(" %2X", b));
if (p->control == 0)
return LZMA2_STATE_FINISHED;
if (LZMA2_IS_UNCOMPRESSED_STATE(p))
{
if ((p->control & 0x7F) > 2)
return LZMA2_STATE_ERROR;
p->unpackSize = 0;
}
else
p->unpackSize = (UInt32)(p->control & 0x1F) << 16;
return LZMA2_STATE_UNPACK0;
case LZMA2_STATE_UNPACK0:
p->unpackSize |= (UInt32)b << 8;
return LZMA2_STATE_UNPACK1;
case LZMA2_STATE_UNPACK1:
p->unpackSize |= (UInt32)b;
p->unpackSize++;
PRF(printf(" %8d", p->unpackSize));
return (LZMA2_IS_UNCOMPRESSED_STATE(p)) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0;
case LZMA2_STATE_PACK0:
p->packSize = (UInt32)b << 8;
return LZMA2_STATE_PACK1;
case LZMA2_STATE_PACK1:
p->packSize |= (UInt32)b;
p->packSize++;
PRF(printf(" %8d", p->packSize));
return LZMA2_IS_THERE_PROP(LZMA2_GET_LZMA_MODE(p)) ? LZMA2_STATE_PROP:
(p->needInitProp ? LZMA2_STATE_ERROR : LZMA2_STATE_DATA);
case LZMA2_STATE_PROP:
{
unsigned lc, lp;
if (b >= (9 * 5 * 5))
return LZMA2_STATE_ERROR;
lc = b % 9;
b /= 9;
p->decoder.prop.pb = b / 5;
lp = b % 5;
if (lc + lp > LZMA2_LCLP_MAX)
return LZMA2_STATE_ERROR;
p->decoder.prop.lc = lc;
p->decoder.prop.lp = lp;
p->needInitProp = False;
return LZMA2_STATE_DATA;
}
}
return LZMA2_STATE_ERROR;
}
static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)
{
memcpy(p->dic + p->dicPos, src, size);
p->dicPos += size;
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size)
p->checkDicSize = p->prop.dicSize;
p->processedPos += (UInt32)size;
}
void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState);
SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT inSize = *srcLen;
*srcLen = 0;
*status = LZMA_STATUS_NOT_SPECIFIED;
while (p->state != LZMA2_STATE_FINISHED)
{
SizeT dicPos = p->decoder.dicPos;
if (p->state == LZMA2_STATE_ERROR)
return SZ_ERROR_DATA;
if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY)
{
*status = LZMA_STATUS_NOT_FINISHED;
return SZ_OK;
}
if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
{
if (*srcLen == inSize)
{
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
(*srcLen)++;
p->state = Lzma2Dec_UpdateState(p, *src++);
if (dicPos == dicLimit && p->state != LZMA2_STATE_FINISHED)
{
p->state = LZMA2_STATE_ERROR;
return SZ_ERROR_DATA;
}
continue;
}
{
SizeT destSizeCur = dicLimit - dicPos;
SizeT srcSizeCur = inSize - *srcLen;
ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY;
if (p->unpackSize <= destSizeCur)
{
destSizeCur = (SizeT)p->unpackSize;
curFinishMode = LZMA_FINISH_END;
}
if (LZMA2_IS_UNCOMPRESSED_STATE(p))
{
if (*srcLen == inSize)
{
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
if (p->state == LZMA2_STATE_DATA)
{
Bool initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC);
if (initDic)
p->needInitProp = p->needInitState = True;
else if (p->needInitDic)
{
p->state = LZMA2_STATE_ERROR;
return SZ_ERROR_DATA;
}
p->needInitDic = False;
LzmaDec_InitDicAndState(&p->decoder, initDic, False);
}
if (srcSizeCur > destSizeCur)
srcSizeCur = destSizeCur;
if (srcSizeCur == 0)
{
p->state = LZMA2_STATE_ERROR;
return SZ_ERROR_DATA;
}
LzmaDec_UpdateWithUncompressed(&p->decoder, src, srcSizeCur);
src += srcSizeCur;
*srcLen += srcSizeCur;
p->unpackSize -= (UInt32)srcSizeCur;
p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
}
else
{
SizeT outSizeProcessed;
SRes res;
if (p->state == LZMA2_STATE_DATA)
{
unsigned mode = LZMA2_GET_LZMA_MODE(p);
Bool initDic = (mode == 3);
Bool initState = (mode != 0);
if ((!initDic && p->needInitDic) || (!initState && p->needInitState))
{
p->state = LZMA2_STATE_ERROR;
return SZ_ERROR_DATA;
}
LzmaDec_InitDicAndState(&p->decoder, initDic, initState);
p->needInitDic = False;
p->needInitState = False;
p->state = LZMA2_STATE_DATA_CONT;
}
if (srcSizeCur > p->packSize)
srcSizeCur = (SizeT)p->packSize;
res = LzmaDec_DecodeToDic(&p->decoder, dicPos + destSizeCur, src, &srcSizeCur, curFinishMode, status);
src += srcSizeCur;
*srcLen += srcSizeCur;
p->packSize -= (UInt32)srcSizeCur;
outSizeProcessed = p->decoder.dicPos - dicPos;
p->unpackSize -= (UInt32)outSizeProcessed;
RINOK(res);
if (*status == LZMA_STATUS_NEEDS_MORE_INPUT)
return res;
if (srcSizeCur == 0 && outSizeProcessed == 0)
{
if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
|| p->unpackSize != 0
|| p->packSize != 0)
{
p->state = LZMA2_STATE_ERROR;
return SZ_ERROR_DATA;
}
p->state = LZMA2_STATE_CONTROL;
}
if (*status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK)
*status = LZMA_STATUS_NOT_FINISHED;
}
}
}
*status = LZMA_STATUS_FINISHED_WITH_MARK;
return SZ_OK;
}
SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT outSize = *destLen, inSize = *srcLen;
*srcLen = *destLen = 0;
for (;;)
{
SizeT srcSizeCur = inSize, outSizeCur, dicPos;
ELzmaFinishMode curFinishMode;
SRes res;
if (p->decoder.dicPos == p->decoder.dicBufSize)
p->decoder.dicPos = 0;
dicPos = p->decoder.dicPos;
if (outSize > p->decoder.dicBufSize - dicPos)
{
outSizeCur = p->decoder.dicBufSize;
curFinishMode = LZMA_FINISH_ANY;
}
else
{
outSizeCur = dicPos + outSize;
curFinishMode = finishMode;
}
res = Lzma2Dec_DecodeToDic(p, outSizeCur, src, &srcSizeCur, curFinishMode, status);
src += srcSizeCur;
inSize -= srcSizeCur;
*srcLen += srcSizeCur;
outSizeCur = p->decoder.dicPos - dicPos;
memcpy(dest, p->decoder.dic + dicPos, outSizeCur);
dest += outSizeCur;
outSize -= outSizeCur;
*destLen += outSizeCur;
if (res != 0)
return res;
if (outSizeCur == 0 || outSize == 0)
return SZ_OK;
}
}
SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc)
{
CLzma2Dec p;
SRes res;
SizeT outSize = *destLen, inSize = *srcLen;
*destLen = *srcLen = 0;
*status = LZMA_STATUS_NOT_SPECIFIED;
Lzma2Dec_Construct(&p);
RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc));
p.decoder.dic = dest;
p.decoder.dicBufSize = outSize;
Lzma2Dec_Init(&p);
*srcLen = inSize;
res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
*destLen = p.decoder.dicPos;
if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
res = SZ_ERROR_INPUT_EOF;
Lzma2Dec_FreeProbs(&p, alloc);
return res;
}

80
src/lzma/Lzma2Dec.h Normal file
View File

@ -0,0 +1,80 @@
/* Lzma2Dec.h -- LZMA2 Decoder
2015-05-13 : Igor Pavlov : Public domain */
#ifndef __LZMA2_DEC_H
#define __LZMA2_DEC_H
#include "LzmaDec.h"
EXTERN_C_BEGIN
/* ---------- State Interface ---------- */
typedef struct
{
CLzmaDec decoder;
UInt32 packSize;
UInt32 unpackSize;
unsigned state;
Byte control;
Bool needInitDic;
Bool needInitState;
Bool needInitProp;
} CLzma2Dec;
#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder)
#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc);
#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc);
SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAlloc *alloc);
SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAlloc *alloc);
void Lzma2Dec_Init(CLzma2Dec *p);
/*
finishMode:
It has meaning only if the decoding reaches output limit (*destLen or dicLimit).
LZMA_FINISH_ANY - use smallest number of input bytes
LZMA_FINISH_END - read EndOfStream marker after decoding
Returns:
SZ_OK
status:
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED
LZMA_STATUS_NEEDS_MORE_INPUT
SZ_ERROR_DATA - Data error
*/
SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
/* ---------- One Call Interface ---------- */
/*
finishMode:
It has meaning only if the decoding reaches output limit (*destLen).
LZMA_FINISH_ANY - use smallest number of input bytes
LZMA_FINISH_END - read EndOfStream marker after decoding
Returns:
SZ_OK
status:
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED
SZ_ERROR_DATA - Data error
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
*/
SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc);
EXTERN_C_END
#endif

1091
src/lzma/LzmaDec.c Normal file

File diff suppressed because it is too large Load Diff

227
src/lzma/LzmaDec.h Normal file
View File

@ -0,0 +1,227 @@
/* LzmaDec.h -- LZMA Decoder
2013-01-18 : Igor Pavlov : Public domain */
#ifndef __LZMA_DEC_H
#define __LZMA_DEC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/* #define _LZMA_PROB32 */
/* _LZMA_PROB32 can increase the speed on some CPUs,
but memory usage for CLzmaDec::probs will be doubled in that case */
#ifdef _LZMA_PROB32
#define CLzmaProb UInt32
#else
#define CLzmaProb UInt16
#endif
/* ---------- LZMA Properties ---------- */
#define LZMA_PROPS_SIZE 5
typedef struct _CLzmaProps
{
unsigned lc, lp, pb;
UInt32 dicSize;
} CLzmaProps;
/* LzmaProps_Decode - decodes properties
Returns:
SZ_OK
SZ_ERROR_UNSUPPORTED - Unsupported properties
*/
SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
/* ---------- LZMA Decoder state ---------- */
/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
#define LZMA_REQUIRED_INPUT_MAX 20
typedef struct
{
CLzmaProps prop;
CLzmaProb *probs;
Byte *dic;
const Byte *buf;
UInt32 range, code;
SizeT dicPos;
SizeT dicBufSize;
UInt32 processedPos;
UInt32 checkDicSize;
unsigned state;
UInt32 reps[4];
unsigned remainLen;
int needFlush;
int needInitState;
UInt32 numProbs;
unsigned tempBufSize;
Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
} CLzmaDec;
#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; }
void LzmaDec_Init(CLzmaDec *p);
/* There are two types of LZMA streams:
0) Stream with end mark. That end mark adds about 6 bytes to compressed size.
1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */
typedef enum
{
LZMA_FINISH_ANY, /* finish at any point */
LZMA_FINISH_END /* block must be finished at the end */
} ELzmaFinishMode;
/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
You must use LZMA_FINISH_END, when you know that current output buffer
covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
and output value of destLen will be less than output buffer size limit.
You can check status result also.
You can use multiple checks to test data integrity after full decompression:
1) Check Result and "status" variable.
2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
You must use correct finish mode in that case. */
typedef enum
{
LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */
LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */
LZMA_STATUS_NOT_FINISHED, /* stream was not finished */
LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */
} ELzmaStatus;
/* ELzmaStatus is used only as output value for function call */
/* ---------- Interfaces ---------- */
/* There are 3 levels of interfaces:
1) Dictionary Interface
2) Buffer Interface
3) One Call Interface
You can select any of these interfaces, but don't mix functions from different
groups for same object. */
/* There are two variants to allocate state for Dictionary Interface:
1) LzmaDec_Allocate / LzmaDec_Free
2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
You can use variant 2, if you set dictionary buffer manually.
For Buffer Interface you must always use variant 1.
LzmaDec_Allocate* can return:
SZ_OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
*/
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc);
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc);
SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc);
void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc);
/* ---------- Dictionary Interface ---------- */
/* You can use it, if you want to eliminate the overhead for data copying from
dictionary to some other external buffer.
You must work with CLzmaDec variables directly in this interface.
STEPS:
LzmaDec_Constr()
LzmaDec_Allocate()
for (each new stream)
{
LzmaDec_Init()
while (it needs more decompression)
{
LzmaDec_DecodeToDic()
use data from CLzmaDec::dic and update CLzmaDec::dicPos
}
}
LzmaDec_Free()
*/
/* LzmaDec_DecodeToDic
The decoding to internal dictionary buffer (CLzmaDec::dic).
You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
finishMode:
It has meaning only if the decoding reaches output limit (dicLimit).
LZMA_FINISH_ANY - Decode just dicLimit bytes.
LZMA_FINISH_END - Stream must be finished after dicLimit.
Returns:
SZ_OK
status:
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED
LZMA_STATUS_NEEDS_MORE_INPUT
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
SZ_ERROR_DATA - Data error
*/
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
/* ---------- Buffer Interface ---------- */
/* It's zlib-like interface.
See LzmaDec_DecodeToDic description for information about STEPS and return results,
but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
to work with CLzmaDec variables manually.
finishMode:
It has meaning only if the decoding reaches output limit (*destLen).
LZMA_FINISH_ANY - Decode just destLen bytes.
LZMA_FINISH_END - Stream must be finished after (*destLen).
*/
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
/* ---------- One Call Interface ---------- */
/* LzmaDecode
finishMode:
It has meaning only if the decoding reaches output limit (*destLen).
LZMA_FINISH_ANY - Decode just destLen bytes.
LZMA_FINISH_END - Stream must be finished after (*destLen).
Returns:
SZ_OK
status:
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
SZ_ERROR_DATA - Data error
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
*/
SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
ELzmaStatus *status, ISzAlloc *alloc);
EXTERN_C_END
#endif

10
src/lzma/Precomp.h Normal file
View File

@ -0,0 +1,10 @@
/* Precomp.h -- StdAfx
2013-11-12 : Igor Pavlov : Public domain */
#ifndef __7Z_PRECOMP_H
#define __7Z_PRECOMP_H
#include "Compiler.h"
/* #include "7zTypes.h" */
#endif

10
src/lzma/__init__.py Normal file
View File

@ -0,0 +1,10 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'

189
src/lzma/lzma_binding.c Normal file
View File

@ -0,0 +1,189 @@
/*
* lzma_binding.c
* Copyright (C) 2015 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#define PY_SSIZE_T_CLEAN
#define UNICODE
#include "Python.h"
#include "Lzma2Dec.h"
static void *Alloc(void *p, size_t size) { p = p; return PyMem_Malloc(size); }
static void Free(void *p, void *address) { p = p; PyMem_Free(address); }
static ISzAlloc allocator = { Alloc, Free };
static const char* error_codes[18] = {
"OK",
"SZ_ERROR_DATA",
"SZ_ERROR_MEM",
"SZ_ERROR_CRC",
"SZ_ERROR_UNSUPPORTED",
"SZ_ERROR_PARAM",
"SZ_ERROR_INPUT_EOF",
"SZ_ERROR_OUTPUT_EOF",
"SZ_ERROR_READ",
"SZ_ERROR_WRITE",
"SZ_ERROR_PROGRESS",
"SZ_ERROR_FAIL",
"SZ_ERROR_THREAD",
"UNKNOWN", "UNKNOWN", "UNKNOWN",
"SZ_ERROR_ARCHIVE",
"SZ_ERROR_NO_ARCHIVE",
};
#define SET_ERROR(x) PyErr_SetString(LZMAError, ((x) > 0 && (x) < 17) ? error_codes[(x)] : "UNKNOWN")
static PyObject *LZMAError = NULL;
static UInt64 crc64_table[256];
static void init_crc_table() {
static const UInt64 poly64 = (UInt64)(0xC96C5795D7870F42);
for (size_t i = 0; i < 256; ++i) {
UInt64 crc64 = i;
for (size_t j = 0; j < 8; ++j) {
if (crc64 & 1)
crc64 = (crc64 >> 1) ^ poly64;
else
crc64 >>= 1;
}
crc64_table[i] = crc64;
}
}
static PyObject *
crc64(PyObject *self, PyObject *args) {
unsigned char *data = NULL;
Py_ssize_t size = 0;
UInt64 crc = 0;
if (!PyArg_ParseTuple(args, "s#|K", &data, &size, &crc)) return NULL;
crc = ~crc;
for (size_t i = 0; i < size; ++i)
crc = crc64_table[data[i] ^ (crc & 0xFF)] ^ (crc >> 8);
return Py_BuildValue("K", ~crc);
}
static PyObject*
delta_decode(PyObject *self, PyObject *args) {
PyObject *array = NULL, *histarray = NULL;
unsigned char *data = NULL, pos = 0, *history = NULL;
unsigned int distance = 0;
Py_ssize_t datalen = 0;
if (!PyArg_ParseTuple(args, "O!O!BB", &PyByteArray_Type, &array, &PyByteArray_Type, &histarray, &pos, &distance)) return NULL;
if (PyByteArray_GET_SIZE(histarray) != 256) {
PyErr_SetString(PyExc_TypeError, "histarray must be 256 bytes long");
return NULL;
}
data = (unsigned char*)PyByteArray_AS_STRING(array); history = (unsigned char*)PyByteArray_AS_STRING(histarray);
datalen = PyBytes_GET_SIZE(array);
for (Py_ssize_t i = 0; i < datalen; i++) {
data[i] += history[(unsigned char)(pos + distance)];
history[pos--] = data[i];
}
return Py_BuildValue("B", pos);
}
static PyObject *
decompress2(PyObject *self, PyObject *args) {
PyObject *read = NULL, *seek = NULL, *write = NULL, *rres = NULL;
unsigned long bufsize = 0, bytes_written = 0, bytes_read = 0, inbuf_pos = 0, inbuf_len = 0, leftover = 0;
unsigned char props = 0;
char *inbuf = NULL, *outbuf = NULL;
CLzma2Dec state;
SRes res = 0;
ELzmaStatus status = LZMA_STATUS_NOT_FINISHED;
if (!PyArg_ParseTuple(args, "OOOBk", &read, &seek, &write, &props, &bufsize)) return NULL;
Lzma2Dec_Construct(&state);
res = Lzma2Dec_Allocate(&state, (Byte)props, &allocator);
if (res == SZ_ERROR_MEM) { PyErr_NoMemory(); return NULL; }
if (res != SZ_OK) { PyErr_SetString(PyExc_TypeError, "Incorrect stream properties"); goto exit; }
inbuf = (char*)PyMem_Malloc(bufsize);
outbuf = (char*)PyMem_Malloc(bufsize);
if (!inbuf || !outbuf) {PyErr_NoMemory(); goto exit;}
Lzma2Dec_Init(&state);
while (status != LZMA_STATUS_FINISHED_WITH_MARK) {
bytes_written = bufsize; bytes_read = inbuf_len - inbuf_pos;
Py_BEGIN_ALLOW_THREADS;
res = Lzma2Dec_DecodeToBuf(&state, (Byte*)outbuf, &bytes_written, (Byte*)(inbuf) + inbuf_pos, &bytes_read, LZMA_FINISH_ANY, &status);
Py_END_ALLOW_THREADS;
if (res != SZ_OK) { SET_ERROR(res); goto exit; }
if (bytes_written > 0) {
if(!PyObject_CallFunction(write, "s#", outbuf, bytes_written)) goto exit;
}
if (bytes_read > 0) inbuf_pos += bytes_read;
if (status == LZMA_STATUS_NEEDS_MORE_INPUT) {
leftover = inbuf_len - inbuf_pos;
inbuf_pos = 0;
if (!PyObject_CallFunction(seek, "ii", -leftover, SEEK_CUR)) goto exit;
rres = PyObject_CallFunction(read, "n", bufsize);
if (rres == NULL) goto exit;
inbuf_len = PyBytes_GET_SIZE(rres);
if (inbuf_len == 0) { PyErr_SetString(PyExc_ValueError, "LZMA2 block was truncated"); goto exit; }
memcpy(inbuf, PyBytes_AS_STRING(rres), inbuf_len);
Py_DECREF(rres); rres = NULL;
}
}
leftover = inbuf_len - inbuf_pos;
if (leftover > 0) {
if (!PyObject_CallFunction(seek, "ii", -leftover, SEEK_CUR)) goto exit;
}
exit:
Lzma2Dec_Free(&state, &allocator);
PyMem_Free(inbuf); PyMem_Free(outbuf);
if (PyErr_Occurred()) return NULL;
Py_RETURN_NONE;
}
static PyMethodDef lzma_binding_methods[] = {
{"decompress2", decompress2, METH_VARARGS,
"Decompress an LZMA2 encoded block, of unknown compressed size (reads till LZMA2 EOS marker)"
},
{"crc64", crc64, METH_VARARGS,
"crc64(bytes) -> CRC 64 for the provided python bytes object"
},
{"delta_decode", delta_decode, METH_VARARGS,
"delta_decode(rawarray, histarray, pos, distance) -> Apply the delta decode filter to the bytearray rawarray"
},
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC
initlzma_binding(void) {
PyObject *m = NULL;
init_crc_table();
LZMAError = PyErr_NewException("lzma_binding.error", NULL, NULL);
if (!LZMAError) return;
m = Py_InitModule3("lzma_binding", lzma_binding_methods,
"Bindings to the LZMA (de)compression C code"
);
Py_INCREF(LZMAError);
PyModule_AddObject(m, "error", LZMAError);
PyModule_AddIntMacro(m, SZ_OK);
PyModule_AddIntMacro(m, SZ_ERROR_DATA);
PyModule_AddIntMacro(m, SZ_ERROR_MEM);
PyModule_AddIntMacro(m, SZ_ERROR_CRC);
PyModule_AddIntMacro(m, SZ_ERROR_UNSUPPORTED);
PyModule_AddIntMacro(m, SZ_ERROR_PARAM);
PyModule_AddIntMacro(m, SZ_ERROR_INPUT_EOF);
PyModule_AddIntMacro(m, SZ_ERROR_OUTPUT_EOF);
PyModule_AddIntMacro(m, SZ_ERROR_READ);
PyModule_AddIntMacro(m, SZ_ERROR_WRITE);
PyModule_AddIntMacro(m, SZ_ERROR_PROGRESS);
PyModule_AddIntMacro(m, SZ_ERROR_FAIL);
PyModule_AddIntMacro(m, SZ_ERROR_THREAD);
PyModule_AddIntMacro(m, SZ_ERROR_ARCHIVE);
PyModule_AddIntMacro(m, SZ_ERROR_NO_ARCHIVE);
if (m == NULL) return;
}

350
src/lzma/xz.py Normal file
View File

@ -0,0 +1,350 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
from collections import namedtuple
from io import BytesIO
from hashlib import sha256
from struct import unpack, error as struct_error
from binascii import crc32 as _crc32
from calibre.constants import plugins
from calibre.ptempfile import SpooledTemporaryFile
lzma = plugins['lzma_binding'][0]
if not lzma:
raise RuntimeError('Failed to load lzma_binding module with error: %s' % plugins['lzma_binding'][1])
HEADER_MAGIC = b'\xfd7zXZ\0'
DELTA_FILTER_ID = 0x03
LZMA2_FILTER_ID = 0x21
def crc32(raw, start=0):
return 0xFFFFFFFF & _crc32(raw, start)
class XZError(ValueError):
pass
class NotXZ(XZError):
pass
class InvalidXZ(XZError):
pass
def decode_var_int(f):
ans, i, ch = 0, -1, 0x80
while ch >= 0x80:
ch = ord(f.read(1))
i += 1
if ch == 0:
return 0
ans |= (ch & 0x7f) << (i * 7)
return ans
def decode_var_int2(raw, pos):
ans, ch, opos = 0, 0x80, pos
while ch >= 0x80:
ch = ord(raw[pos])
if ch == 0:
return 0, pos
ans |= (ch & 0x7f) << ((pos - opos) * 7)
pos += 1
return ans, pos
def encode_var_int(num):
if num == 0:
return b'\0'
buf = bytearray()
a = buf.append
while num != 0:
a(0x80 | (num & 0x7F))
num >>= 7
buf[-1] &= 0x7F
return bytes(buf)
def read_stream_header(f):
try:
magic, stream_flags1, stream_flags2, crc = unpack(b'<6s2BI', f.read(12))
except struct_error as e:
raise NotXZ('Not an XZ file. Invalid stream header: ' % e)
if magic != HEADER_MAGIC:
raise NotXZ('Not an XZ file. Header Magic is: %r' % magic)
if stream_flags1 != 0:
raise InvalidXZ('Stream flags first byte is not null')
check_type, reserved = 0x0f & stream_flags2, 0xf0 & stream_flags2
if reserved != 0:
raise InvalidXZ('Stream flags reserved bits not null')
if crc32(bytes(bytearray([stream_flags1, stream_flags2]))) != crc:
raise InvalidXZ('Stream flags header CRC incorrect')
return check_type
class CRCChecker(object):
def __init__(self, check_type):
self.code = 0
if check_type == 0x1:
self.func = crc32
self.size = 4
self.fmt = b'<I'
else:
self.func = lzma.crc64
self.size = 8
self.fmt = b'<Q'
def __call__(self, raw):
self.code = self.func(raw, self.code)
def finish(self):
if self.func is not crc32:
self.code = 0xFFFFFFFFFFFFFFFFL & self.code
def check(self, raw):
return self.code == unpack(self.fmt, raw)[0]
class Sha256Checker(object):
def __init__(self):
self.h = sha256()
self.func = self.h.update
self.code = None
self.size = 32
def __call__(self, raw):
self.func(raw)
def finish(self):
self.code = self.h.digest()
self.h = self.func = None
def check(self, raw):
return self.code == raw
class DummyChecker(object):
size = 0
def __call__(self, raw):
pass
def finish(self):
pass
class LZMA2Filter(object):
BUFSIZE = 10 # MB
def __init__(self, props, check_type, bufsize=None):
if len(props) != 1:
raise InvalidXZ('Invalid properties length for LZMA2 filter')
props = ord(props)
self.dictionary_size = props & 0x3F
if props & 0xC0 != 0:
raise InvalidXZ('Invalid high bytes for LZMA2 filter properties')
self.props = props
if check_type in (0x1, 0x4):
self.crc = CRCChecker(check_type)
elif check_type == 0x0A:
self.crc = Sha256Checker()
else:
if check_type:
raise InvalidXZ('Unsupported CRC check type: %s' % check_type)
self.crc = DummyChecker()
if bufsize is None:
bufsize = self.BUFSIZE
self.bufsize = int(bufsize * 1024 * 1024)
def __call__(self, f, outfile, filters):
w = outfile.write
c = self.crc
def write(raw):
if filters:
raw = bytearray(raw)
for flt in filters:
raw = flt(raw)
raw = bytes(raw)
w(raw), c(raw)
try:
lzma.decompress2(f.read, f.seek, write, self.props, self.bufsize)
except lzma.error as e:
raise InvalidXZ('Failed to decode LZMA2 block with error code: %s' % e.message)
self.crc.finish()
class DeltaFilter(object):
def __init__(self, props, *args):
if len(props) != 1:
raise InvalidXZ('Invalid properties length for Delta filter')
self.distance = ord(props) + 1
self.pos = 0
self.history = bytearray(256)
def __call__(self, raw):
self.pos = lzma.delta_decode(raw, self.history, self.pos, self.distance)
return raw
def test_delta_filter():
raw = b'\xA1\xB1\x01\x02\x01\x02\x01\x02'
draw = b'\xA1\xB1\xA2\xB3\xA3\xB5\xA4\xB7'
def eq(s, d):
if s != d:
raise ValueError('%r != %r' % (s, d))
eq(draw, bytes(DeltaFilter(b'\x01')(bytearray(raw))))
f = DeltaFilter(b'\x01')
for ch, dch in zip(raw, draw):
eq(dch, bytes(f(bytearray(ch))))
Block = namedtuple('Block', 'unpadded_size uncompressed_size')
def read_block_header(f, block_header_size_, check_type):
block_header_size = 4 * (ord(block_header_size_) + 1)
if block_header_size < 8:
raise InvalidXZ('Invalid block header size: %d' % block_header_size)
header, crc = unpack(b'<%dsI' % (block_header_size - 5), f.read(block_header_size - 1))
if crc != crc32(block_header_size_ + header):
raise InvalidXZ('Block header CRC mismatch')
block_flags = ord(header[0])
number_of_filters = (0x03 & block_flags) + 1
if not (0 < number_of_filters <= 4):
raise InvalidXZ('Invalid number of filters: %d' % number_of_filters)
if block_flags & 0x3c != 0:
raise InvalidXZ('Non-zero reserved bits in block flags')
has_compressed_size = block_flags & 0x40
has_uncompressed_size = block_flags & 0x80
compressed_size = uncompressed_size = None
pos = 1
if has_compressed_size:
compressed_size, pos = decode_var_int2(header, pos)
if has_uncompressed_size:
uncompressed_size, pos = decode_var_int2(header, pos)
filters = []
while number_of_filters:
number_of_filters -= 1
filter_id, pos = decode_var_int2(header, pos)
size_of_properties, pos = decode_var_int2(header, pos)
if filter_id >= 0x4000000000000000:
raise InvalidXZ('Invalid filter id: %d' % filter_id)
if filter_id not in (LZMA2_FILTER_ID, DELTA_FILTER_ID):
raise InvalidXZ('Unsupported filter ID: 0x%x' % filter_id)
props = header[pos:pos+size_of_properties]
pos += size_of_properties
if len(props) != size_of_properties:
raise InvalidXZ('Incomplete filter properties')
if filter_id == LZMA2_FILTER_ID and number_of_filters:
raise InvalidXZ('LZMA2 filter must be the last filter')
elif filter_id == DELTA_FILTER_ID and not number_of_filters:
raise InvalidXZ('Delta filter cannot be the last filter')
filters.append((LZMA2Filter if filter_id == LZMA2_FILTER_ID else DeltaFilter)(props, check_type))
padding = header[pos:]
if padding.lstrip(b'\0'):
raise InvalidXZ('Non-null block header padding: %r' % padding)
return filters, compressed_size, uncompressed_size
def read_block(f, block_header_size_, check_type, outfile):
start_pos = f.tell() - 1
filters, compressed_size, uncompressed_size = read_block_header(f, block_header_size_, check_type)
fpos, opos = f.tell(), outfile.tell()
filters.reverse()
filters[0](f, outfile, filters[1:])
actual_compressed_size = f.tell() - fpos
uncompressed_actual_size = outfile.tell() - opos
if uncompressed_size is not None and uncompressed_size != uncompressed_actual_size:
raise InvalidXZ('Uncompressed size for block does not match')
if compressed_size is not None and compressed_size != actual_compressed_size:
raise InvalidXZ('Compressed size for block does not match')
padding_count = f.tell() % 4
if padding_count:
padding_count = 4 - padding_count
padding = f.read(padding_count)
if len(padding) != padding_count:
raise InvalidXZ('Block is not aligned')
if padding.lstrip(b'\0'):
raise InvalidXZ('Block padding has non null bytes')
if check_type:
q = f.read(filters[0].crc.size)
if not filters[0].crc.check(q):
raise InvalidXZ('CRC for data does not match')
return Block(f.tell() - padding_count - start_pos, uncompressed_actual_size)
def read_index(f):
pos = f.tell() - 1
number_of_records = decode_var_int(f)
while number_of_records:
number_of_records -= 1
unpadded_size = decode_var_int(f)
if unpadded_size < 1:
raise InvalidXZ('Invalid unpadded size in index: %d' % unpadded_size)
yield Block(unpadded_size, decode_var_int(f))
if f.tell() % 4:
padding_count = 4 - f.tell() % 4
padding = f.read(padding_count)
if len(padding) != padding_count or padding.lstrip(b'\0'):
raise InvalidXZ('Incorrect Index padding')
epos = f.tell()
f.seek(pos)
raw = f.read(epos - pos)
crc, = unpack(b'<I', f.read(4))
if crc != crc32(raw):
raise InvalidXZ('Index field CRC mismatch')
def read_stream_footer(f, check_type, index_size):
crc, = unpack(b'<I', f.read(4))
raw = f.read(6)
backward_size, stream_flags1, stream_flags2 = unpack(b'<I2B', raw)
if stream_flags1 != 0 or stream_flags2 & 0xf0 != 0 or stream_flags2 & 0xf != check_type:
raise InvalidXZ('Footer stream flags != header stream flags')
backward_size = 4 * (1 + backward_size)
if backward_size != index_size:
raise InvalidXZ('Footer backward size != actual index size')
if f.read(2) != b'YZ':
raise InvalidXZ('Stream footer has incorrect magic bytes')
if crc != crc32(raw):
raise InvalidXZ('Stream footer CRC mismatch')
def read_stream(f, outfile):
check_type = read_stream_header(f)
blocks, index = [], None
index_size = 0
while True:
sz = f.read(1)
if sz == b'\0':
pos = f.tell() - 1
index = tuple(read_index(f))
index_size = f.tell() - pos
break
else:
blocks.append(read_block(f, sz, check_type, outfile))
if index != tuple(blocks):
raise InvalidXZ('Index does not match actual blocks in file')
read_stream_footer(f, check_type, index_size)
def decompress(raw, outfile=None):
if isinstance(raw, bytes):
raw = BytesIO(raw)
outfile = outfile or SpooledTemporaryFile(50 * 1024 * 1024, '_xz_decompress')
outfile.seek(0)
while True:
read_stream(raw, outfile)
pos = raw.tell()
trail = raw.read(1024)
if len(trail) < 20:
break
idx = trail.find(HEADER_MAGIC)
if idx == -1:
break
if idx > -1:
# Found another stream
raw.seek(pos)
if idx:
padding = raw.read(idx)
if padding.lstrip(b'\0') or len(padding) % 4:
raise InvalidXZ('Found trailing garbage between streams')
return outfile
if __name__ == '__main__':
import sys
decompress(open(sys.argv[-1], 'rb'))