mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Speedup processing of RAR/CBR files by avoiding an extra file copy
This commit is contained in:
parent
e12164eb81
commit
b96ec77417
@ -11,6 +11,7 @@ let g:syntastic_cpp_include_dirs = [
|
||||
\'/usr/include/freetype2',
|
||||
\'/usr/include/fontconfig',
|
||||
\'src/qtcurve/common', 'src/qtcurve',
|
||||
\'src/unrar',
|
||||
\'/usr/include/ImageMagick',
|
||||
\]
|
||||
let g:syntastic_c_include_dirs = g:syntastic_cpp_include_dirs
|
||||
|
@ -47,6 +47,13 @@ class Extension(object):
|
||||
self.ldflags = kwargs.get('ldflags', [])
|
||||
self.optional = kwargs.get('optional', False)
|
||||
self.needs_ddk = kwargs.get('needs_ddk', False)
|
||||
of = kwargs.get('optimize_level', None)
|
||||
if of is None:
|
||||
of = '/Ox' if iswindows else '-O3'
|
||||
else:
|
||||
flag = '/O%d' if iswindows else '-O%d'
|
||||
of = flag % of
|
||||
self.cflags.insert(0, of)
|
||||
|
||||
def preflight(self, obj_dir, compiler, linker, builder, cflags, ldflags):
|
||||
pass
|
||||
@ -176,6 +183,24 @@ extensions = [
|
||||
sip_files = ['calibre/gui2/progress_indicator/QProgressIndicator.sip']
|
||||
),
|
||||
|
||||
Extension('unrar',
|
||||
['unrar/%s.cpp'%(x.partition('.')[0]) for x in '''
|
||||
rar.o strlist.o strfn.o pathfn.o savepos.o smallfn.o global.o file.o
|
||||
filefn.o filcreat.o archive.o arcread.o unicode.o system.o
|
||||
isnt.o crypt.o crc.o rawread.o encname.o resource.o match.o
|
||||
timefn.o rdwrfn.o consio.o options.o ulinks.o errhnd.o rarvm.o
|
||||
secpassword.o rijndael.o getbits.o sha1.o extinfo.o extract.o
|
||||
volume.o list.o find.o unpack.o cmddata.o filestr.o scantree.o
|
||||
'''.split()] + ['calibre/utils/unrar.cpp'],
|
||||
inc_dirs=['unrar'],
|
||||
cflags = [('/' if iswindows else '-') + x for x in (
|
||||
'DSILENT', 'DRARDLL', 'DUNRAR')] + (
|
||||
[] if iswindows else ['-D_FILE_OFFSET_BITS=64',
|
||||
'-D_LARGEFILE_SOURCE']),
|
||||
optimize_level=2,
|
||||
libraries=['User32', 'Advapi32', 'kernel32', 'Shell32'] if iswindows else []
|
||||
),
|
||||
|
||||
]
|
||||
|
||||
|
||||
@ -239,7 +264,7 @@ if isunix:
|
||||
cxx = os.environ.get('CXX', 'g++')
|
||||
cflags = os.environ.get('OVERRIDE_CFLAGS',
|
||||
# '-Wall -DNDEBUG -ggdb -fno-strict-aliasing -pipe')
|
||||
'-O3 -Wall -DNDEBUG -fno-strict-aliasing -pipe')
|
||||
'-Wall -DNDEBUG -fno-strict-aliasing -pipe')
|
||||
cflags = shlex.split(cflags) + ['-fPIC']
|
||||
ldflags = os.environ.get('OVERRIDE_LDFLAGS', '-Wall')
|
||||
ldflags = shlex.split(ldflags)
|
||||
@ -274,7 +299,7 @@ if isosx:
|
||||
|
||||
if iswindows:
|
||||
cc = cxx = msvc.cc
|
||||
cflags = '/c /nologo /Ox /MD /W3 /EHsc /DNDEBUG'.split()
|
||||
cflags = '/c /nologo /MD /W3 /EHsc /DNDEBUG'.split()
|
||||
ldflags = '/DLL /nologo /INCREMENTAL:NO /NODEFAULTLIB:libcmt.lib'.split()
|
||||
#cflags = '/c /nologo /Ox /MD /W3 /EHsc /Zi'.split()
|
||||
#ldflags = '/DLL /nologo /INCREMENTAL:NO /DEBUG'.split()
|
||||
|
@ -43,7 +43,6 @@ class LinuxFreeze(Command):
|
||||
'/usr/lib/liblcms2.so.2',
|
||||
'/usr/lib/libstlport.so.5.1',
|
||||
'/tmp/calibre-mount-helper',
|
||||
'/usr/lib/libunrar.so',
|
||||
'/usr/lib/libchm.so.0',
|
||||
'/usr/lib/libsqlite3.so.0',
|
||||
'/usr/lib/libmng.so.1',
|
||||
|
@ -32,7 +32,6 @@ binary_includes = [
|
||||
'/usr/lib/liblcms.so.1',
|
||||
'/usr/lib/liblzma.so.0',
|
||||
'/usr/lib/libexpat.so.1',
|
||||
'/usr/lib/libunrar.so',
|
||||
'/usr/lib/libsqlite3.so.0',
|
||||
'/usr/lib/libmng.so.1',
|
||||
'/usr/lib/libpodofo.so.0.9.1',
|
||||
|
@ -437,8 +437,8 @@ class Py2App(object):
|
||||
|
||||
@flush
|
||||
def add_misc_libraries(self):
|
||||
for x in ('usb-1.0.0', 'mtp.9', 'unrar', 'readline.6.1',
|
||||
'wmflite-0.2.7', 'chm.0', 'sqlite3.0'):
|
||||
for x in ('usb-1.0.0', 'mtp.9', 'readline.6.1', 'wmflite-0.2.7',
|
||||
'chm.0', 'sqlite3.0'):
|
||||
info('\nAdding', x)
|
||||
x = 'lib%s.dylib'%x
|
||||
shutil.copy2(join(SW, 'lib', x), self.frameworks_dir)
|
||||
|
@ -388,7 +388,7 @@ def main():
|
||||
'dist_dir' : 'build/py2app',
|
||||
'argv_emulation' : True,
|
||||
'iconfile' : icon,
|
||||
'frameworks': ['libusb.dylib', 'libunrar.dylib'],
|
||||
'frameworks': ['libusb.dylib'],
|
||||
'includes' : ['sip', 'pkg_resources', 'PyQt4.QtXml',
|
||||
'PyQt4.QtSvg', 'PyQt4.QtWebKit', 'commands',
|
||||
'mechanize', 'ClientForm', 'usbobserver',
|
||||
|
@ -17,7 +17,6 @@ ICU_DIR = os.environ.get('ICU_DIR', r'Q:\icu')
|
||||
OPENSSL_DIR = os.environ.get('OPENSSL_DIR', r'Q:\openssl')
|
||||
QT_DIR = os.environ.get('QT_DIR', 'Q:\\Qt\\4.8.2')
|
||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||
LIBUNRAR = os.environ.get('UNRARDLL', 'C:\\Program Files\\UnrarDLL\\unrar.dll')
|
||||
SW = r'C:\cygwin\home\kovid\sw'
|
||||
IMAGEMAGICK = os.path.join(SW, 'build',
|
||||
'ImageMagick-*\\VisualMagick\\bin')
|
||||
@ -261,9 +260,6 @@ class Win32Freeze(Command, WixMixIn):
|
||||
|
||||
print
|
||||
print 'Adding third party dependencies'
|
||||
print '\tAdding unrar'
|
||||
shutil.copyfile(LIBUNRAR, os.path.join(self.dll_dir,
|
||||
os.path.basename(LIBUNRAR).replace('64', '')))
|
||||
|
||||
print '\tAdding misc binary deps'
|
||||
bindir = os.path.join(SW, 'bin')
|
||||
|
@ -240,23 +240,6 @@ Run make (note that you must have GNU make installed in cygwin)
|
||||
|
||||
Optionally run make check
|
||||
|
||||
Libunrar
|
||||
----------
|
||||
|
||||
Get the source from http://www.rarlab.com/rar_add.htm
|
||||
|
||||
Open UnrarDll.vcproj, change build type to release.
|
||||
If building 64 bit change Win32 to x64.
|
||||
|
||||
Build the Solution, find the dll in the build subdir. As best as I can tell,
|
||||
the vcproj already defines the SILENT preprocessor directive, but you should
|
||||
test this.
|
||||
|
||||
.. http://www.rarlab.com/rar/UnRARDLL.exe install and add C:\Program Files\UnrarDLL to PATH
|
||||
|
||||
TODO: 64-bit check that SILENT is defined and that the ctypes bindings actuall
|
||||
work
|
||||
|
||||
zlib
|
||||
------
|
||||
|
||||
|
@ -264,7 +264,7 @@ def extract(path, dir):
|
||||
with open(path, 'rb') as f:
|
||||
id_ = f.read(3)
|
||||
if id_ == b'Rar':
|
||||
from calibre.libunrar import extract as rarextract
|
||||
from calibre.utils.unrar import extract as rarextract
|
||||
extractor = rarextract
|
||||
elif id_.startswith(b'PK'):
|
||||
from calibre.libunzip import extract as zipextract
|
||||
@ -276,7 +276,7 @@ def extract(path, dir):
|
||||
from calibre.libunzip import extract as zipextract
|
||||
extractor = zipextract
|
||||
elif ext in ['cbr', 'rar']:
|
||||
from calibre.libunrar import extract as rarextract
|
||||
from calibre.utils.unrar import extract as rarextract
|
||||
extractor = rarextract
|
||||
if extractor is None:
|
||||
raise Exception('Unknown archive type')
|
||||
|
@ -85,6 +85,7 @@ class Plugins(collections.Mapping):
|
||||
'speedup',
|
||||
'freetype',
|
||||
'woff',
|
||||
'unrar',
|
||||
]
|
||||
if iswindows:
|
||||
plugins.extend(['winutil', 'wpd', 'winfonts'])
|
||||
|
@ -140,7 +140,7 @@ class ComicMetadataReader(MetadataReaderPlugin):
|
||||
elif id_.startswith(b'PK'):
|
||||
ftype = 'cbz'
|
||||
if ftype == 'cbr':
|
||||
from calibre.libunrar import extract_first_alphabetically as extract_first
|
||||
from calibre.utils.unrar import extract_first_alphabetically as extract_first
|
||||
extract_first
|
||||
else:
|
||||
from calibre.libunzip import extract_member
|
||||
|
@ -48,12 +48,13 @@ class ArchiveExtract(FileTypePlugin):
|
||||
def run(self, archive):
|
||||
is_rar = archive.lower().endswith('.rar')
|
||||
if is_rar:
|
||||
from calibre.libunrar import extract_member, names
|
||||
from calibre.utils.unrar import extract_member, names
|
||||
else:
|
||||
zf = ZipFile(archive, 'r')
|
||||
|
||||
if is_rar:
|
||||
fnames = names(archive)
|
||||
with open(archive, 'rb') as rf:
|
||||
fnames = list(names(rf))
|
||||
else:
|
||||
fnames = zf.namelist()
|
||||
|
||||
@ -76,7 +77,8 @@ class ArchiveExtract(FileTypePlugin):
|
||||
of = self.temporary_file('_archive_extract.'+ext)
|
||||
with closing(of):
|
||||
if is_rar:
|
||||
data = extract_member(archive, match=None, name=fname)[1]
|
||||
with open(archive, 'rb') as f:
|
||||
data = extract_member(f, match=None, name=fname)[1]
|
||||
of.write(data)
|
||||
else:
|
||||
of.write(zf.read(fname))
|
||||
|
@ -8,35 +8,27 @@ Read metadata from RAR archives
|
||||
'''
|
||||
|
||||
import os
|
||||
from io import BytesIO
|
||||
|
||||
from calibre.ptempfile import PersistentTemporaryFile, TemporaryDirectory
|
||||
from calibre.libunrar import extract_member, names
|
||||
from calibre import CurrentDir
|
||||
from calibre.utils.unrar import extract_member, names
|
||||
|
||||
def get_metadata(stream):
|
||||
from calibre.ebooks.metadata.archive import is_comic
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
|
||||
path = getattr(stream, 'name', False)
|
||||
if not path:
|
||||
pt = PersistentTemporaryFile('_rar-meta.rar')
|
||||
pt.write(stream.read())
|
||||
pt.close()
|
||||
path = pt.name
|
||||
path = os.path.abspath(path)
|
||||
file_names = list(names(path))
|
||||
file_names = list(names(stream))
|
||||
if is_comic(file_names):
|
||||
return get_metadata(stream, 'cbr')
|
||||
for f in file_names:
|
||||
stream_type = os.path.splitext(f)[1].lower()
|
||||
if stream_type:
|
||||
stream_type = stream_type[1:]
|
||||
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
||||
'rb', 'imp', 'pdf', 'lrf', 'azw', 'azw1', 'azw3'):
|
||||
with TemporaryDirectory() as tdir:
|
||||
with CurrentDir(tdir):
|
||||
stream = extract_member(path, match=None, name=f,
|
||||
as_file=True)[1]
|
||||
if stream_type in {'lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
||||
'rb', 'imp', 'pdf', 'lrf', 'azw', 'azw1',
|
||||
'azw3'}:
|
||||
name, data = extract_member(stream, match=None, name=f)
|
||||
stream = BytesIO(data)
|
||||
stream.name = os.path.basename(name)
|
||||
return get_metadata(stream, stream_type)
|
||||
raise ValueError('No ebook found in RAR archive')
|
||||
|
||||
|
@ -1,292 +0,0 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
"""
|
||||
This module provides a thin ctypes based wrapper around libunrar.
|
||||
|
||||
See ftp://ftp.rarlabs.com/rar/unrarsrc-3.7.5.tar.gz
|
||||
"""
|
||||
import os, ctypes, sys, re
|
||||
from ctypes import Structure as _Structure, c_char_p, c_uint, c_void_p, POINTER, \
|
||||
byref, c_wchar_p, c_int, c_char, c_wchar
|
||||
from tempfile import NamedTemporaryFile
|
||||
from StringIO import StringIO
|
||||
|
||||
from calibre import iswindows, load_library, CurrentDir
|
||||
from calibre.ptempfile import TemporaryDirectory, PersistentTemporaryFile
|
||||
|
||||
_librar_name = 'libunrar'
|
||||
cdll = ctypes.cdll
|
||||
if iswindows:
|
||||
class Structure(_Structure):
|
||||
_pack_ = 1
|
||||
_librar_name = 'unrar'
|
||||
cdll = ctypes.windll
|
||||
else:
|
||||
Structure = _Structure
|
||||
if hasattr(sys, 'frozen') and iswindows:
|
||||
lp = os.path.join(os.path.dirname(sys.executable), 'DLLs', 'unrar.dll')
|
||||
_libunrar = cdll.LoadLibrary(lp)
|
||||
elif hasattr(sys, 'frozen_path'):
|
||||
lp = os.path.join(sys.frozen_path, 'lib', 'libunrar.so')
|
||||
_libunrar = cdll.LoadLibrary(lp)
|
||||
else:
|
||||
_libunrar = load_library(_librar_name, cdll)
|
||||
|
||||
RAR_OM_LIST = 0
|
||||
RAR_OM_EXTRACT = 1
|
||||
|
||||
ERAR_END_ARCHIVE = 10
|
||||
ERAR_NO_MEMORY = 11
|
||||
ERAR_BAD_DATA = 12
|
||||
ERAR_BAD_ARCHIVE = 13
|
||||
ERAR_UNKNOWN_FORMAT = 14
|
||||
ERAR_EOPEN = 15
|
||||
ERAR_ECREATE = 16
|
||||
ERAR_ECLOSE = 17
|
||||
ERAR_EREAD = 18
|
||||
ERAR_EWRITE = 19
|
||||
ERAR_SMALL_BUF = 20
|
||||
ERAR_UNKNOWN = 21
|
||||
ERAR_MISSING_PASSWORD = 22
|
||||
|
||||
RAR_VOL_ASK = 0
|
||||
RAR_VOL_NOTIFY = 1
|
||||
|
||||
RAR_SKIP = 0
|
||||
RAR_TEST = 1
|
||||
RAR_EXTRACT = 2
|
||||
|
||||
class UnRARException(Exception):
|
||||
pass
|
||||
|
||||
class RAROpenArchiveDataEx(Structure):
|
||||
_fields_ = [
|
||||
('ArcName', c_char_p),
|
||||
('ArcNameW', c_wchar_p),
|
||||
('OpenMode', c_uint),
|
||||
('OpenResult', c_uint),
|
||||
('CmtBuf', c_char_p),
|
||||
('CmtBufSize', c_uint),
|
||||
('CmtSize', c_uint),
|
||||
('CmtState', c_uint),
|
||||
('Flags', c_uint),
|
||||
('Reserved', c_uint * 32)
|
||||
]
|
||||
|
||||
class RARHeaderDataEx(Structure):
|
||||
_fields_ = [
|
||||
('ArcName', c_char*1024),
|
||||
('ArcNameW', c_wchar*1024),
|
||||
('FileName', c_char*1024),
|
||||
('FileNameW', c_wchar*1024),
|
||||
('Flags', c_uint),
|
||||
('PackSize', c_uint),
|
||||
('PackSizeHigh', c_uint),
|
||||
('UnpSize', c_uint),
|
||||
('UnpSizeHigh', c_uint),
|
||||
('HostOS', c_uint),
|
||||
('FileCRC', c_uint),
|
||||
('FileTime', c_uint),
|
||||
('UnpVer', c_uint),
|
||||
('Method', c_uint),
|
||||
('FileAttr', c_uint),
|
||||
('CmtBuf', c_char_p),
|
||||
('CmtBufSize', c_uint),
|
||||
('CmtSize', c_uint),
|
||||
('CmtState', c_uint),
|
||||
('Reserved', c_uint*1024)
|
||||
]
|
||||
|
||||
# Define a callback function
|
||||
#CALLBACK_FUNC = CFUNCTYPE(c_int, c_uint, c_long, c_char_p, c_long)
|
||||
#def py_callback_func(msg, user_data, p1, p2):
|
||||
# return 0
|
||||
|
||||
#callback_func = CALLBACK_FUNC(py_callback_func)
|
||||
|
||||
_libunrar.RAROpenArchiveEx.argtypes = [POINTER(RAROpenArchiveDataEx)]
|
||||
_libunrar.RAROpenArchiveEx.restype = c_void_p
|
||||
_libunrar.RARReadHeaderEx.argtypes = [c_void_p, POINTER(RARHeaderDataEx)]
|
||||
_libunrar.RARReadHeaderEx.restype = c_int
|
||||
_libunrar.RARProcessFileW.argtypes = [c_void_p, c_int, c_wchar_p, c_wchar_p]
|
||||
_libunrar.RARProcessFileW.restype = c_int
|
||||
_libunrar.RARCloseArchive.argtypes = [c_void_p]
|
||||
_libunrar.RARCloseArchive.restype = c_int
|
||||
_libunrar.RARSetPassword.argtypes = [c_void_p, c_char_p]
|
||||
#_libunrar.RARSetCallback.argtypes = [c_void_p, CALLBACK_FUNC, c_long]
|
||||
|
||||
|
||||
def _interpret_open_error(code, path):
|
||||
msg = 'Unknown error.'
|
||||
if code == ERAR_NO_MEMORY:
|
||||
msg = "Not enough memory to process " + path
|
||||
elif code == ERAR_BAD_DATA:
|
||||
msg = "Archive header broken: " + path
|
||||
elif code == ERAR_BAD_ARCHIVE:
|
||||
msg = path + ' is not a RAR archive.'
|
||||
elif code == ERAR_EOPEN:
|
||||
msg = 'Cannot open ' + path
|
||||
return msg
|
||||
|
||||
def _interpret_process_file_error(code):
|
||||
msg = 'Unknown Error'
|
||||
if code == ERAR_UNKNOWN_FORMAT:
|
||||
msg = 'Unknown archive format'
|
||||
elif code == ERAR_BAD_ARCHIVE:
|
||||
msg = 'Bad volume'
|
||||
elif code == ERAR_ECREATE:
|
||||
msg = 'File create error'
|
||||
elif code == ERAR_EOPEN:
|
||||
msg = 'Volume open error'
|
||||
elif code == ERAR_ECLOSE:
|
||||
msg = 'File close error'
|
||||
elif code == ERAR_EREAD:
|
||||
msg = 'Read error'
|
||||
elif code == ERAR_EWRITE:
|
||||
msg = 'Write error'
|
||||
elif code == ERAR_BAD_DATA:
|
||||
msg = 'CRC error'
|
||||
elif code == ERAR_MISSING_PASSWORD:
|
||||
msg = 'Password is required.'
|
||||
return msg
|
||||
|
||||
def get_archive_info(flags):
|
||||
ios = StringIO()
|
||||
print >>ios, 'Volume:\t\t', 'yes' if (flags & 1) else 'no'
|
||||
print >>ios, 'Comment:\t', 'yes' if (flags & 2) else 'no'
|
||||
print >>ios, 'Locked:\t\t', 'yes' if (flags & 4) else 'no'
|
||||
print >>ios, 'Solid:\t\t', 'yes' if (flags & 8) else 'no'
|
||||
print >>ios, 'New naming:\t', 'yes' if (flags & 16) else 'no'
|
||||
print >>ios, 'Authenticity:\t', 'yes' if (flags & 32) else 'no'
|
||||
print >>ios, 'Recovery:\t', 'yes' if (flags & 64) else 'no'
|
||||
print >>ios, 'Encr.headers:\t', 'yes' if (flags & 128) else 'no'
|
||||
print >>ios, 'First Volume:\t', 'yes' if (flags & 256) else 'no or older than 3.0'
|
||||
return ios.getvalue()
|
||||
|
||||
def extract(path, dir):
|
||||
"""
|
||||
Extract archive C{filename} into directory C{dir}
|
||||
"""
|
||||
open_archive_data = RAROpenArchiveDataEx(ArcName=path, OpenMode=RAR_OM_EXTRACT, CmtBuf=None)
|
||||
arc_data = _libunrar.RAROpenArchiveEx(byref(open_archive_data))
|
||||
cwd = os.getcwdu()
|
||||
if not os.path.isdir( dir ):
|
||||
os.mkdir( dir )
|
||||
os.chdir( dir )
|
||||
try:
|
||||
if open_archive_data.OpenResult != 0:
|
||||
raise UnRARException(_interpret_open_error(open_archive_data.OpenResult, path))
|
||||
#prints('Archive:', path)
|
||||
#print get_archive_info(open_archive_data.Flags)
|
||||
header_data = RARHeaderDataEx(CmtBuf=None)
|
||||
#_libunrar.RARSetCallback(arc_data, callback_func, mode)
|
||||
while True:
|
||||
RHCode = _libunrar.RARReadHeaderEx(arc_data, byref(header_data))
|
||||
if RHCode != 0:
|
||||
break
|
||||
PFCode = _libunrar.RARProcessFileW(arc_data, RAR_EXTRACT, None, None)
|
||||
if PFCode != 0:
|
||||
raise UnRARException(_interpret_process_file_error(PFCode))
|
||||
if RHCode == ERAR_BAD_DATA:
|
||||
raise UnRARException('File header broken')
|
||||
finally:
|
||||
os.chdir(cwd)
|
||||
_libunrar.RARCloseArchive(arc_data)
|
||||
|
||||
def names(path):
|
||||
if hasattr(path, 'read'):
|
||||
data = path.read()
|
||||
f = NamedTemporaryFile(suffix='.rar')
|
||||
f.write(data)
|
||||
f.flush()
|
||||
path = f.name
|
||||
open_archive_data = RAROpenArchiveDataEx(ArcName=path, OpenMode=RAR_OM_LIST, CmtBuf=None)
|
||||
arc_data = _libunrar.RAROpenArchiveEx(byref(open_archive_data))
|
||||
try:
|
||||
if open_archive_data.OpenResult != 0:
|
||||
raise UnRARException(_interpret_open_error(open_archive_data.OpenResult, path))
|
||||
header_data = RARHeaderDataEx(CmtBuf=None)
|
||||
while True:
|
||||
if _libunrar.RARReadHeaderEx(arc_data, byref(header_data)) != 0:
|
||||
break
|
||||
PFCode = _libunrar.RARProcessFileW(arc_data, RAR_SKIP, None, None)
|
||||
if PFCode != 0:
|
||||
raise UnRARException(_interpret_process_file_error(PFCode))
|
||||
yield header_data.FileNameW
|
||||
finally:
|
||||
_libunrar.RARCloseArchive(arc_data)
|
||||
|
||||
def _extract_member(path, match, name):
|
||||
|
||||
def is_match(fname):
|
||||
return (name is not None and fname == name) or \
|
||||
(match is not None and match.search(fname) is not None)
|
||||
|
||||
open_archive_data = RAROpenArchiveDataEx(ArcName=path, OpenMode=RAR_OM_EXTRACT, CmtBuf=None)
|
||||
arc_data = _libunrar.RAROpenArchiveEx(byref(open_archive_data))
|
||||
try:
|
||||
if open_archive_data.OpenResult != 0:
|
||||
raise UnRARException(_interpret_open_error(open_archive_data.OpenResult, path))
|
||||
header_data = RARHeaderDataEx(CmtBuf=None)
|
||||
first = True
|
||||
while True:
|
||||
if _libunrar.RARReadHeaderEx(arc_data, byref(header_data)) != 0:
|
||||
raise UnRARException('%s has no files'%path if first
|
||||
else 'No match found in %s'%path)
|
||||
file_name = header_data.FileNameW
|
||||
if is_match(file_name):
|
||||
PFCode = _libunrar.RARProcessFileW(arc_data, RAR_EXTRACT, None, None)
|
||||
if PFCode != 0:
|
||||
raise UnRARException(_interpret_process_file_error(PFCode))
|
||||
abspath = os.path.abspath(os.path.join(*file_name.split('/')))
|
||||
return abspath
|
||||
else:
|
||||
PFCode = _libunrar.RARProcessFileW(arc_data, RAR_SKIP, None, None)
|
||||
if PFCode != 0:
|
||||
raise UnRARException(_interpret_process_file_error(PFCode))
|
||||
first = False
|
||||
|
||||
finally:
|
||||
_libunrar.RARCloseArchive(arc_data)
|
||||
|
||||
def extract_member(path, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I),
|
||||
name=None, as_file=False):
|
||||
if hasattr(path, 'read'):
|
||||
data = path.read()
|
||||
f = NamedTemporaryFile(suffix='.rar')
|
||||
f.write(data)
|
||||
f.flush()
|
||||
path = f.name
|
||||
|
||||
path = os.path.abspath(path)
|
||||
if as_file:
|
||||
path = _extract_member(path, match, name)
|
||||
return path, open(path, 'rb')
|
||||
else:
|
||||
with TemporaryDirectory('_libunrar') as tdir:
|
||||
with CurrentDir(tdir):
|
||||
path = _extract_member(path, match, name)
|
||||
return path, open(path, 'rb').read()
|
||||
|
||||
def extract_first_alphabetically(path):
|
||||
remove_path = False
|
||||
if hasattr(path, 'read'):
|
||||
data = path.read()
|
||||
with PersistentTemporaryFile('.rar') as f:
|
||||
f.write(data)
|
||||
path = f.name
|
||||
remove_path = True
|
||||
|
||||
names_ = [x for x in names(path) if os.path.splitext(x)[1][1:].lower() in
|
||||
('png', 'jpg', 'jpeg', 'gif')]
|
||||
names_.sort()
|
||||
ans = extract_member(path, name=names_[0], match=None)
|
||||
try:
|
||||
if remove_path:
|
||||
os.remove(path)
|
||||
except:
|
||||
pass
|
||||
return ans
|
||||
|
||||
|
@ -87,9 +87,8 @@ def test_imaging():
|
||||
print ('PIL OK!')
|
||||
|
||||
def test_unrar():
|
||||
from calibre.libunrar import _libunrar
|
||||
if not _libunrar:
|
||||
raise RuntimeError('Failed to load libunrar')
|
||||
from calibre.utils.unrar import test_basic
|
||||
test_basic()
|
||||
print ('Unrar OK!')
|
||||
|
||||
def test_icu():
|
||||
|
506
src/calibre/utils/unrar.cpp
Normal file
506
src/calibre/utils/unrar.cpp
Normal file
@ -0,0 +1,506 @@
|
||||
/*
|
||||
* unrar.cpp
|
||||
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
|
||||
*
|
||||
* Distributed under terms of the GPL3 license.
|
||||
*/
|
||||
|
||||
#define _UNICODE
|
||||
#define UNICODE
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
|
||||
#ifndef RARDLL // Needed for syntastic
|
||||
#define RARDLL
|
||||
#endif
|
||||
|
||||
#include <rar.hpp>
|
||||
#include <dll.hpp>
|
||||
#include <errno.h>
|
||||
#include <new>
|
||||
|
||||
static PyObject *UNRARError = NULL;
|
||||
|
||||
#ifndef _MSC_VER
|
||||
static int wcscpy_s(wchar_t *dest, size_t sz, const wchar_t *src) {
|
||||
if (dest == NULL || src == NULL) return EINVAL;
|
||||
if (wcslen(src) >= sz) return ERANGE;
|
||||
wcscpy(dest, src);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static wchar_t *unicode_to_wchar(PyObject *o) {
|
||||
wchar_t *buf;
|
||||
Py_ssize_t len;
|
||||
if (o == NULL) return NULL;
|
||||
if (!PyUnicode_Check(o)) {PyErr_Format(PyExc_TypeError, "The python object must be a unicode object"); return NULL;}
|
||||
len = PyUnicode_GET_SIZE(o);
|
||||
buf = (wchar_t *)calloc(len+2, sizeof(wchar_t));
|
||||
if (buf == NULL) { PyErr_NoMemory(); return NULL; }
|
||||
len = PyUnicode_AsWideChar((PyUnicodeObject*)o, buf, len);
|
||||
if (len == -1) { free(buf); PyErr_Format(PyExc_TypeError, "Invalid python unicode object."); return NULL; }
|
||||
return buf;
|
||||
}
|
||||
|
||||
static PyObject *wchar_to_unicode(const wchar_t *o) {
|
||||
PyObject *ans;
|
||||
if (o == NULL) return NULL;
|
||||
ans = PyUnicode_FromWideChar(o, wcslen(o));
|
||||
if (ans == NULL) PyErr_NoMemory();
|
||||
return ans;
|
||||
}
|
||||
|
||||
class PyArchive : public Archive { // {{{
|
||||
public:
|
||||
PyArchive(PyObject *f, wchar_t *name, RAROptions *Cmd) : Archive(Cmd), file(f) {
|
||||
Py_XINCREF(f);
|
||||
wcscpy_s(FileNameW, NM, (wcslen(name) < NM-1) ? name : L"<stream>");
|
||||
if (wcstombs(FileName, FileNameW, NM-1) == (size_t)-1)
|
||||
memcpy(FileName, "<stream>", strlen("<stream>\0"));
|
||||
}
|
||||
|
||||
~PyArchive() { Py_XDECREF(file); }
|
||||
|
||||
virtual bool is_archive() {
|
||||
return IsArchive(false);
|
||||
}
|
||||
|
||||
virtual bool IsOpened() { return true; }
|
||||
|
||||
virtual int DirectRead(void *data, size_t size) {
|
||||
// printf("direct read()\n");
|
||||
char *buf;
|
||||
Py_ssize_t sz = 0;
|
||||
int ret = 0;
|
||||
|
||||
PyObject *res = PyObject_CallMethod(file, (char*)"read", (char*)"(k)", size);
|
||||
if (res == NULL) return -1;
|
||||
|
||||
ret = PyBytes_AsStringAndSize(res, &buf, &sz);
|
||||
if (ret != -1) {
|
||||
memcpy(data, buf, (size_t)sz);
|
||||
ret = (int)sz;
|
||||
}
|
||||
Py_XDECREF(res);
|
||||
return ret;
|
||||
}
|
||||
|
||||
virtual int Read(void *data, size_t size) {
|
||||
int ret = DirectRead(data, size);
|
||||
if (ret == -1) {
|
||||
ErrHandler.ReadError(FileName, FileNameW);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
virtual bool RawSeek(int64 offset, int method) {
|
||||
// printf("raw seek(%lld, %d)\n", offset, method);
|
||||
PyObject *res = PyObject_CallMethod(file, (char*)"seek", (char*)"Li", offset, method);
|
||||
if (res == NULL) return false;
|
||||
Py_XDECREF(res);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void Seek(int64 offset, int method) {
|
||||
if (!RawSeek(offset, method))
|
||||
ErrHandler.SeekError(FileName, FileNameW);
|
||||
}
|
||||
|
||||
virtual bool Close() { return true; }
|
||||
|
||||
virtual int64 Tell() {
|
||||
// printf("tell()\n");
|
||||
PyObject *res = PyObject_CallMethod(file, (char*)"tell", NULL);
|
||||
if (res == NULL) {
|
||||
ErrHandler.SeekError(FileName, FileNameW);
|
||||
}
|
||||
Py_ssize_t pos = PyInt_AsSsize_t(res);
|
||||
Py_XDECREF(res);
|
||||
return (int64)pos;
|
||||
}
|
||||
|
||||
virtual byte GetByte() {
|
||||
// printf("get byte()\n");
|
||||
byte b = 0;
|
||||
DirectRead(&b, 1);
|
||||
return b;
|
||||
}
|
||||
|
||||
virtual int64 FileLength() {
|
||||
// printf("file length()\n");
|
||||
int64 pos = Tell();
|
||||
Seek(0, SEEK_END);
|
||||
int64 ans = Tell();
|
||||
Seek(pos, SEEK_SET);
|
||||
return ans;
|
||||
}
|
||||
|
||||
virtual bool IsDevice() { return false; }
|
||||
|
||||
private:
|
||||
PyObject *file;
|
||||
}; // }}}
|
||||
|
||||
static
|
||||
PyMethodDef methods[] = {
|
||||
|
||||
{NULL, NULL, 0, NULL}
|
||||
};
|
||||
|
||||
// RARArchive object definition {{{
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
// Type-specific fields go here.
|
||||
PyArchive *archive;
|
||||
PyObject *comment;
|
||||
int header_size;
|
||||
RAROptions Cmd;
|
||||
ComprDataIO DataIO;
|
||||
Unpack *Unp;
|
||||
size_t file_count;
|
||||
|
||||
} RARArchive;
|
||||
|
||||
static void
|
||||
RAR_dealloc(RARArchive* self) {
|
||||
Py_XDECREF(self->comment); self->comment = NULL;
|
||||
|
||||
if (self->Unp != NULL) { delete self->Unp; self->Unp = NULL; }
|
||||
|
||||
if (self->archive != NULL) {
|
||||
self->archive->Close();
|
||||
delete self->archive;
|
||||
self->archive = NULL;
|
||||
}
|
||||
|
||||
self->ob_type->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
static void handle_rar_error(RAR_EXIT errcode) {
|
||||
if (!PyErr_Occurred())
|
||||
PyErr_Format(UNRARError, "RAR error code: %d", errcode);
|
||||
}
|
||||
|
||||
static int CALLBACK callback(UINT msg, LPARAM data, LPARAM p1, LPARAM p2) {
|
||||
PyObject *c = (PyObject*)data, *ret;
|
||||
if (msg == UCM_PROCESSDATA) {
|
||||
ret = PyObject_CallMethod(c, (char*)"handle_data", (char*)"(s#)", (char*)p1, (size_t)p2);
|
||||
if (ret == NULL) return -1;
|
||||
Py_DECREF(ret);
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
RAR_init(RARArchive *self, PyObject *args, PyObject *kwds) {
|
||||
PyObject *file, *name, *get_comment = Py_False, *pycallback;
|
||||
wchar_t *cname;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "OOO|O", &file, &name, &pycallback, &get_comment)) return -1;
|
||||
if (!PyObject_HasAttrString(file, "read") || !PyObject_HasAttrString(file, "seek") || !PyObject_HasAttrString(file, "tell")) {
|
||||
PyErr_SetString(PyExc_TypeError, "file must be a file like object");
|
||||
return -1;
|
||||
}
|
||||
cname = unicode_to_wchar(name);
|
||||
if (cname == NULL) return -1;
|
||||
|
||||
self->Cmd.Callback = (UNRARCALLBACK)callback;
|
||||
self->Cmd.UserData = (LPARAM)pycallback;
|
||||
|
||||
self->archive = new (std::nothrow) PyArchive(file, cname, &self->Cmd);
|
||||
if (self->archive == NULL) { PyErr_NoMemory(); return -1; }
|
||||
free(cname);
|
||||
|
||||
self->DataIO.UnpArcSize=self->archive->FileLength();
|
||||
self->DataIO.UnpVolume=false;
|
||||
|
||||
self->Unp = new (std::nothrow) Unpack(&self->DataIO);
|
||||
if (self->Unp == NULL) { PyErr_NoMemory(); return -1; }
|
||||
self->file_count = 0;
|
||||
|
||||
try {
|
||||
self->Unp->Init();
|
||||
if (!self->archive->is_archive()) {
|
||||
if (!PyErr_Occurred())
|
||||
PyErr_SetString(UNRARError, "Not a RAR archive");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (PyObject_IsTrue(get_comment)) {
|
||||
Array<byte> cdata;
|
||||
if (self->archive->GetComment(&cdata, NULL)) {
|
||||
self->comment = PyBytes_FromStringAndSize((const char*)&cdata[0], cdata.Size());
|
||||
if (self->comment == NULL) { PyErr_NoMemory(); return -1; }
|
||||
} else {
|
||||
self->comment = Py_None;
|
||||
Py_INCREF(self->comment);
|
||||
}
|
||||
|
||||
} else {
|
||||
self->comment = Py_None;
|
||||
Py_INCREF(self->comment);
|
||||
}
|
||||
|
||||
} catch (RAR_EXIT errcode) {
|
||||
handle_rar_error(errcode);
|
||||
return -1;
|
||||
} catch (std::bad_alloc) {
|
||||
if (!PyErr_Occurred())
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Properties {{{
|
||||
|
||||
// RARArchive.friendly_name {{{
|
||||
static PyObject *
|
||||
RAR_comment(RARArchive *self, void *closure) {
|
||||
Py_INCREF(self->comment); return self->comment;
|
||||
} // }}}
|
||||
|
||||
static PyGetSetDef RAR_getsetters[] = {
|
||||
{(char *)"comment",
|
||||
(getter)RAR_comment, NULL,
|
||||
(char *)"The RAR archive comment or None",
|
||||
NULL},
|
||||
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
// }}}
|
||||
|
||||
static PyObject *
|
||||
RAR_current_item(RARArchive *self, PyObject *args) {
|
||||
PyObject *filename = Py_None;
|
||||
try {
|
||||
self->header_size = (int) self->archive->SearchBlock(FILE_HEAD);
|
||||
|
||||
if (self->header_size <= 0) {
|
||||
if (self->archive->Volume && self->archive->GetHeaderType() == ENDARC_HEAD &&
|
||||
self->archive->EndArcHead.Flags & EARC_NEXT_VOLUME) {
|
||||
PyErr_SetString(UNRARError, "This is a multivolume RAR archive. Not supported.");
|
||||
return NULL;
|
||||
}
|
||||
if (self->archive->BrokenFileHeader) {
|
||||
PyErr_SetString(UNRARError, "This archive has a broken file header.");
|
||||
return NULL;
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
if (self->archive->NewLhd.Flags & LHD_SPLIT_BEFORE) {
|
||||
PyErr_SetString(UNRARError, "This is a split RAR archive. Not supported.");
|
||||
return NULL;
|
||||
}
|
||||
} catch (RAR_EXIT errcode) {
|
||||
handle_rar_error(errcode);
|
||||
return NULL;
|
||||
} catch (std::bad_alloc) {
|
||||
if (!PyErr_Occurred())
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
FileHeader fh = self->archive->NewLhd;
|
||||
|
||||
if (*(fh.FileNameW)) {
|
||||
filename = wchar_to_unicode(fh.FileNameW);
|
||||
} else {
|
||||
Py_INCREF(filename);
|
||||
}
|
||||
|
||||
return Py_BuildValue("{s:s, s:s#, s:N, s:H, s:I, s:I, s:I, s:I, s:b, s:I, s:I, s:b, s:b, s:I, s:O, s:O, s:O, s:O}",
|
||||
"arcname", self->archive->FileName
|
||||
,"filename", fh.FileName, fh.NameSize
|
||||
,"filenamew", filename
|
||||
,"flags", fh.Flags
|
||||
,"pack_size", fh.PackSize
|
||||
,"pack_size_high", fh.HighPackSize
|
||||
,"unpack_size", fh.UnpSize
|
||||
,"unpack_size_high", fh.HighUnpSize
|
||||
,"host_os", fh.HostOS
|
||||
,"file_crc", fh.FileCRC
|
||||
,"file_time", fh.FileTime
|
||||
,"unpack_ver", fh.UnpVer
|
||||
,"method", fh.Method
|
||||
,"file_attr", fh.FileAttr
|
||||
,"is_directory", (self->archive->IsArcDir()) ? Py_True : Py_False
|
||||
,"is_symlink", (IsLink(fh.FileAttr)) ? Py_True : Py_False
|
||||
,"has_password", ((fh.Flags & LHD_PASSWORD) != 0) ? Py_True : Py_False
|
||||
,"is_label", (self->archive->IsArcLabel()) ? Py_True : Py_False
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
static File unrar_dummy_output = File();
|
||||
|
||||
static PyObject *
|
||||
RAR_process_item(RARArchive *self, PyObject *args) {
|
||||
PyObject *extract = Py_False;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "|O", &extract)) return NULL;
|
||||
self->file_count++;
|
||||
try {
|
||||
if (PyObject_IsTrue(extract)) {
|
||||
self->DataIO.UnpVolume = false;
|
||||
self->DataIO.NextVolumeMissing=false;
|
||||
self->DataIO.CurUnpRead=0;
|
||||
self->DataIO.CurUnpWrite=0;
|
||||
self->DataIO.UnpFileCRC=self->archive->OldFormat ? 0 : 0xffffffff;
|
||||
self->DataIO.PackedCRC=0xffffffff;
|
||||
// self->DataIO.SetEncryption(0, NULL, NULL, false, self->archive->NewLhd.UnpVer>=36);
|
||||
self->DataIO.SetPackedSizeToRead(self->archive->NewLhd.FullPackSize);
|
||||
self->DataIO.SetFiles(self->archive, &unrar_dummy_output);
|
||||
self->DataIO.SetTestMode(false);
|
||||
self->DataIO.SetSkipUnpCRC(false);
|
||||
self->DataIO.SetTestMode(true); // We set this so that the Write method is not called on the output file by UnpWrite()
|
||||
self->Cmd.DllOpMode = RAR_EXTRACT;
|
||||
|
||||
if (IsLink(self->archive->NewLhd.FileAttr)) {
|
||||
char LinkTarget[NM];
|
||||
int datasz = Min(self->archive->NewLhd.PackSize, NM-1);
|
||||
self->DataIO.UnpRead((byte *)LinkTarget, datasz);
|
||||
LinkTarget[datasz]=0;
|
||||
self->DataIO.UnpWrite((byte*)LinkTarget, datasz);
|
||||
self->archive->SeekToNext();
|
||||
} else if (self->archive->IsArcDir() || self->archive->NewLhd.FullUnpSize < 1) {
|
||||
self->archive->SeekToNext();
|
||||
} else {
|
||||
// Implementation from the ExtractCurrentFile() method in the unrar source code
|
||||
if (self->archive->NewLhd.Method == 0x30) {
|
||||
Array<byte> Buffer(0x10000);
|
||||
int64 DestUnpSize = self->archive->NewLhd.FullUnpSize;
|
||||
uint Code = 0;
|
||||
while (true)
|
||||
{
|
||||
Code = self->DataIO.UnpRead(&Buffer[0], Buffer.Size());
|
||||
if (Code==0 || (int)Code==-1) break;
|
||||
Code = (Code < DestUnpSize) ? Code:(uint)DestUnpSize;
|
||||
self->DataIO.UnpWrite(&Buffer[0], Code);
|
||||
if (DestUnpSize >= 0) DestUnpSize -= Code;
|
||||
}
|
||||
} else {
|
||||
self->Unp->SetDestSize(self->archive->NewLhd.FullUnpSize);
|
||||
if (self->archive->NewLhd.UnpVer<=15)
|
||||
self->Unp->DoUnpack(15,self->file_count>1 && self->archive->Solid);
|
||||
else
|
||||
self->Unp->DoUnpack(self->archive->NewLhd.UnpVer,(self->archive->NewLhd.Flags & LHD_SOLID)!=0);
|
||||
}
|
||||
self->archive->SeekToNext();
|
||||
bool ValidCRC = (self->archive->OldFormat && GET_UINT32(self->DataIO.UnpFileCRC)==GET_UINT32(self->archive->NewLhd.FileCRC)) ||
|
||||
(!self->archive->OldFormat && GET_UINT32(self->DataIO.UnpFileCRC)==GET_UINT32(self->archive->NewLhd.FileCRC^0xffffffff));
|
||||
if (!ValidCRC) {
|
||||
PyErr_SetString(UNRARError, "Invalid CRC while extracting item");
|
||||
return NULL;
|
||||
}
|
||||
// Comes from ProcessFile in dll.cpp
|
||||
while(self->archive->IsOpened() && self->archive->ReadHeader() != 0 && self->archive->GetHeaderType() == NEWSUB_HEAD) {
|
||||
// Skip extra file information
|
||||
self->archive->SeekToNext();
|
||||
}
|
||||
self->archive->Seek(self->archive->CurBlockPos, SEEK_SET);
|
||||
}
|
||||
} else {
|
||||
if (self->archive->Volume && self->archive->GetHeaderType() == FILE_HEAD && self->archive->NewLhd.Flags & LHD_SPLIT_AFTER) {
|
||||
PyErr_SetString(UNRARError, "This is a split RAR archive. Not supported.");
|
||||
return NULL;
|
||||
}
|
||||
self->archive->SeekToNext();
|
||||
}
|
||||
} catch(RAR_EXIT errcode) {
|
||||
handle_rar_error(errcode);
|
||||
return NULL;
|
||||
} catch (std::bad_alloc) {
|
||||
if (!PyErr_Occurred())
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyMethodDef RAR_methods[] = {
|
||||
{"current_item", (PyCFunction)RAR_current_item, METH_VARARGS,
|
||||
"current_item() -> Return the current item in this RAR file."
|
||||
},
|
||||
|
||||
{"process_item", (PyCFunction)RAR_process_item, METH_VARARGS,
|
||||
"process_item(extract=False) -> Process the current item."
|
||||
},
|
||||
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static PyTypeObject RARArchiveType = { // {{{
|
||||
PyObject_HEAD_INIT(NULL)
|
||||
0, /*ob_size*/
|
||||
"unrar.RARArchive", /*tp_name*/
|
||||
sizeof(RARArchive), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)RAR_dealloc, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
0, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
0, /*tp_compare*/
|
||||
0, /*tp_repr*/
|
||||
0, /*tp_as_number*/
|
||||
0, /*tp_as_sequence*/
|
||||
0, /*tp_as_mapping*/
|
||||
0, /*tp_hash */
|
||||
0, /*tp_call*/
|
||||
0, /*tp_str*/
|
||||
0, /*tp_getattro*/
|
||||
0, /*tp_setattro*/
|
||||
0, /*tp_as_buffer*/
|
||||
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||
"RARArchive", /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
RAR_methods, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
RAR_getsetters, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
(initproc)RAR_init, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
0, /* tp_new */
|
||||
}; // }}}
|
||||
|
||||
// }}} End RARArchive
|
||||
|
||||
|
||||
PyMODINIT_FUNC
|
||||
initunrar(void) {
|
||||
PyObject *m;
|
||||
|
||||
RARArchiveType.tp_new = PyType_GenericNew;
|
||||
if (PyType_Ready(&RARArchiveType) < 0)
|
||||
return;
|
||||
|
||||
m = Py_InitModule3(
|
||||
"unrar", methods,
|
||||
"Support for reading RAR archives"
|
||||
);
|
||||
if (m == NULL) return;
|
||||
|
||||
UNRARError = PyErr_NewException((char*)"unrar.UNRARError", NULL, NULL);
|
||||
if (UNRARError == NULL) return;
|
||||
PyModule_AddObject(m, "UNRARError", UNRARError);
|
||||
|
||||
Py_INCREF(&RARArchiveType);
|
||||
PyModule_AddObject(m, "RARArchive", (PyObject *)&RARArchiveType);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
271
src/calibre/utils/unrar.py
Normal file
271
src/calibre/utils/unrar.py
Normal file
@ -0,0 +1,271 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, sys, re
|
||||
from io import BytesIO
|
||||
|
||||
try:
|
||||
from calibre import force_unicode
|
||||
from calibre.constants import filesystem_encoding
|
||||
filesystem_encoding, force_unicode
|
||||
except ImportError:
|
||||
filesystem_encoding = sys.getfilesystemencoding()
|
||||
def force_unicode(x, enc=filesystem_encoding):
|
||||
if isinstance(x, bytes):
|
||||
x = x.decode(enc, 'replace')
|
||||
return x
|
||||
|
||||
class UNRARError(Exception):
|
||||
pass
|
||||
|
||||
class DevNull:
|
||||
def write(self, x): pass
|
||||
|
||||
class RARStream(object):
|
||||
|
||||
def __init__(self, stream, unrar, get_comment=False):
|
||||
self.stream = stream
|
||||
self.unrar = unrar
|
||||
self._current_cache = None
|
||||
try:
|
||||
self.r = unrar.RARArchive(stream, force_unicode(
|
||||
getattr(stream, 'name', '<stream>'), filesystem_encoding),
|
||||
self, get_comment)
|
||||
except unrar.UNRARError as e:
|
||||
raise UNRARError(type(u'')(e))
|
||||
self.comment = self.r.comment
|
||||
|
||||
def handle_data(self, raw):
|
||||
if self._current_dest is not None:
|
||||
self._current_dest.write(raw)
|
||||
|
||||
def populate_header(self):
|
||||
c = self._current_cache
|
||||
if c['filenamew'] is None:
|
||||
c['filenamew'] = self._decode(c['filename'])
|
||||
c['filename'] = c.pop('filenamew').replace('\\', '/')
|
||||
|
||||
def _decode(self, raw):
|
||||
for enc in ('utf-8', 'utf-16le'):
|
||||
try:
|
||||
return raw.decode(enc)
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
return raw.decode('windows-1252', 'replace')
|
||||
|
||||
@property
|
||||
def current_item(self):
|
||||
if self._current_cache is None:
|
||||
try:
|
||||
self._current_cache = self.r.current_item()
|
||||
except self.unrar.UNRARError as e:
|
||||
raise UNRARError(type(u'')(e))
|
||||
if self._current_cache is None:
|
||||
raise EOFError('End of RAR file')
|
||||
self.populate_header()
|
||||
return self._current_cache
|
||||
|
||||
def process_current_item(self, extract_to=None):
|
||||
self._current_cache = None
|
||||
self._current_dest = extract_to
|
||||
try:
|
||||
ans = self.r.process_item(extract_to is not None)
|
||||
except self.unrar.UNRARError as e:
|
||||
raise UNRARError(type(u'')(e))
|
||||
return ans
|
||||
|
||||
def test(self, print_names=False):
|
||||
null = DevNull()
|
||||
while True:
|
||||
try:
|
||||
h = self.current_item
|
||||
except EOFError:
|
||||
break
|
||||
if print_names:
|
||||
print (h['filename'].encode(sys.stdout.encoding))
|
||||
self.process_current_item(null)
|
||||
|
||||
|
||||
def RARFile(stream, get_comment=False):
|
||||
try:
|
||||
from calibre.constants import plugins
|
||||
except ImportError:
|
||||
unrar, err = sys.modules['unrar'], None
|
||||
else:
|
||||
unrar, err = plugins['unrar']
|
||||
if err:
|
||||
raise RuntimeError('Failed to load unrar module with error: %s'
|
||||
%err)
|
||||
return RARStream(stream, unrar, get_comment=get_comment)
|
||||
|
||||
class SaveStream(object):
|
||||
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
|
||||
def __enter__(self):
|
||||
self.stream.seek(0)
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.stream.seek(0)
|
||||
|
||||
def safe_path(base, relpath):
|
||||
base = os.path.abspath(base)
|
||||
path = os.path.abspath(os.path.join(base, relpath))
|
||||
if (os.path.normcase(path) == os.path.normcase(base) or not
|
||||
os.path.normcase(path).startswith(os.path.normcase(base))):
|
||||
return None
|
||||
return path
|
||||
|
||||
def is_useful(h):
|
||||
return not (h['is_label'] or h['is_symlink'] or h['has_password'] or
|
||||
h['is_directory'])
|
||||
|
||||
def stream_extract(stream, location):
|
||||
location = os.path.abspath(location)
|
||||
if not os.path.exists(location):
|
||||
os.makedirs(location)
|
||||
|
||||
with SaveStream(stream):
|
||||
f = RARFile(stream)
|
||||
while True:
|
||||
try:
|
||||
h = f.current_item
|
||||
except EOFError:
|
||||
break
|
||||
if not is_useful(h):
|
||||
f.process_current_item() # Skip these
|
||||
if h['is_directory']:
|
||||
try:
|
||||
os.makedirs(safe_path(location, h['filename']))
|
||||
except:
|
||||
# We ignore create directory errors since we dont
|
||||
# care about missing empty dirs
|
||||
pass
|
||||
else:
|
||||
path = safe_path(location, h['filename'])
|
||||
if path is not None:
|
||||
base, fname = os.path.split(path)
|
||||
if not os.path.exists(base):
|
||||
os.makedirs(base)
|
||||
with open(path, 'wb') as dest:
|
||||
f.process_current_item(dest)
|
||||
|
||||
def extract(path, location):
|
||||
with open(path, 'rb') as stream:
|
||||
stream_extract(stream, location)
|
||||
|
||||
def names(stream):
|
||||
with SaveStream(stream):
|
||||
f = RARFile(stream)
|
||||
while True:
|
||||
try:
|
||||
h = f.current_item
|
||||
except EOFError:
|
||||
break
|
||||
f.process_current_item()
|
||||
if is_useful(h):
|
||||
yield h['filename']
|
||||
|
||||
def extract_member(stream, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I),
|
||||
name=None):
|
||||
|
||||
def is_match(fname):
|
||||
return (name is not None and fname == name) or \
|
||||
(match is not None and match.search(fname) is not None)
|
||||
|
||||
with SaveStream(stream):
|
||||
f = RARFile(stream)
|
||||
while True:
|
||||
try:
|
||||
h = f.current_item
|
||||
except EOFError:
|
||||
break
|
||||
if (not is_useful(h) or not is_match(h['filename'])):
|
||||
f.process_current_item()
|
||||
continue
|
||||
|
||||
et = BytesIO()
|
||||
f.process_current_item(et)
|
||||
return h['filename'], et.getvalue()
|
||||
|
||||
def extract_first_alphabetically(stream):
|
||||
names_ = [x for x in names(stream) if os.path.splitext(x)[1][1:].lower() in
|
||||
{'png', 'jpg', 'jpeg', 'gif'}]
|
||||
names_.sort()
|
||||
return extract_member(stream, name=names_[0], match=None)
|
||||
|
||||
# Test normal RAR file {{{
|
||||
def test_basic():
|
||||
|
||||
stream = BytesIO(b"Rar!\x1a\x07\x00\xcf\x90s\x00\x00\r\x00\x00\x00\x00\x00\x00\x00\x14\xe7z\x00\x80#\x00\x17\x00\x00\x00\r\x00\x00\x00\x03\xc2\xb3\x96o\x00\x00\x00\x00\x1d3\x03\x00\x00\x00\x00\x00CMT\x0c\x00\x8b\xec\x8e\xef\x14\xf6\xe6h\x04\x17\xff\xcd\x0f\xffk9b\x11]^\x80\xd3dt \x90+\x00\x14\x00\x00\x00\x08\x00\x00\x00\x03\xf1\x84\x93\\\xb9]yA\x1d3\t\x00\xa4\x81\x00\x001\\sub-one\x00\xc0\x0c\x00\x8f\xec\x89\xfe.JM\x86\x82\x0c_\xfd\xfd\xd7\x11\x1a\xef@\x9eHt \x80'\x00\x0e\x00\x00\x00\x04\x00\x00\x00\x03\x9f\xa8\x17\xf8\xaf]yA\x1d3\x07\x00\xa4\x81\x00\x00one.txt\x00\x08\xbf\x08\xae\xf3\xca\x87\xfeo\xfe\xd2n\x80-Ht \x82:\x00\x18\x00\x00\x00\x10\x00\x00\x00\x03\xa86\x81\xdf\xf9fyA\x1d3\x1a\x00\xa4\x81\x00\x00\xe8\xaf\xb6\xe6\xaf\x94\xe5\xb1\x81.txt\x00\x8bh\xf6\xd4kA\\.\x00txt\x0c\x00\x8b\xec\x8e\xef\x14\xf6\xe2l\x91\x189\xff\xdf\xfe\xc2\xd3:g\x9a\x19F=cYt \x928\x00\x11\x00\x00\x00\x08\x00\x00\x00\x03\x7f\xd6\xb6\x7f\xeafyA\x1d3\x16\x00\xa4\x81\x00\x00F\xc3\xbc\xc3\x9fe.txt\x00\x01\x00F\xfc\xdfe\x00.txt\x00\xc0<D\xfe\xc8\xef\xbc\xd1\x04I?\xfd\xff\xdbF)]\xe8\xb9\xe1t \x90/\x00\x13\x00\x00\x00\x08\x00\x00\x00\x03\x1a$\x932\xc2]yA\x1d3\r\x00\xa4\x81\x00\x002\\sub-two.txt\x00\xc0\x10\x00S\xec\xcb\x7f\x8b\xa5(\x0b\x01\xcb\xef\xdf\xf6t\x89\x97z\x0eft \x90)\x00\r\x00\x00\x00\r\x00\x00\x00\x03c\x89K\xd3\xc8fyA\x140\x07\x00\xff\xa1\x00\x00symlink\x00\xc02/sub-two.txt\xeb\x86t\xe0\x90#\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\xb9]yA\x140\x01\x00\xedA\x00\x001\x00\xc0\xe0Dt\xe0\x90#\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\xc2]yA\x140\x01\x00\xedA\x00\x002\x00\xc0u\xa1t \x80,\x00\r\x00\x00\x00\r\x00\x00\x00\x03T\xea\x04\xca\xe6\x84yA\x140\x0c\x00\xa4\x81\x00\x00uncompresseduncompressed\n\xda\x10t \x900\x00\x0e\x00\x00\x00\x04\x00\x00\x00\x035K.\xa6\x18\x85yA\x1d5\x0e\x00\xa4\x81\x00\x00max-compressed\x00\xc0\x00\x08\xbf\x08\xae\xf2\xcc\x01s\xf8\xff\xec\x96\xe8\xc4={\x00@\x07\x00")
|
||||
tdata = {u'1': b'',
|
||||
u'1/sub-one': b'sub-one\n',
|
||||
u'2': b'',
|
||||
u'2/sub-two.txt': b'sub-two\n',
|
||||
u'F\xfc\xdfe.txt': b'unicode\n',
|
||||
u'max-compressed': b'max\n',
|
||||
u'one.txt': b'one\n',
|
||||
u'symlink': b'2/sub-two.txt',
|
||||
u'uncompressed': b'uncompressed\n',
|
||||
u'\u8bf6\u6bd4\u5c41.txt': b'chinese unicode\n'}
|
||||
f = RARFile(stream, True)
|
||||
names = set()
|
||||
data = {}
|
||||
if f.comment != b'some comment\n':
|
||||
raise ValueError('Comment not read: %r != %r'%(
|
||||
f.comment, b'some comment\n'))
|
||||
while True:
|
||||
try:
|
||||
h = f.current_item
|
||||
except EOFError:
|
||||
break
|
||||
isdir = h['is_directory']
|
||||
if isdir and h['filename'] not in {'1', '2'}:
|
||||
raise ValueError('Incorrect identification of a directory')
|
||||
if h['is_symlink'] and h['filename'] != 'symlink':
|
||||
raise ValueError('Incorrect identification of a symlink')
|
||||
names.add(h['filename'])
|
||||
et = BytesIO()
|
||||
f.process_current_item(et)
|
||||
data[h['filename']] = et.getvalue()
|
||||
|
||||
if names != {'1/sub-one', 'one.txt', '2/sub-two.txt',
|
||||
'1', '2', '诶比屁.txt', 'Füße.txt', 'symlink',
|
||||
'uncompressed', 'max-compressed'}:
|
||||
raise ValueError('Name list does not match')
|
||||
if data != tdata:
|
||||
raise ValueError('Some data was not read correctly')
|
||||
|
||||
from calibre.utils.mem import memory
|
||||
import gc
|
||||
del f
|
||||
for i in xrange(3): gc.collect()
|
||||
num = 300
|
||||
start = memory()
|
||||
s = SaveStream(stream)
|
||||
for i in xrange(num):
|
||||
with s:
|
||||
f = RARFile(stream)
|
||||
f.test()
|
||||
del f
|
||||
del s
|
||||
for i in xrange(3): gc.collect()
|
||||
used = memory() - start
|
||||
if used > 1:
|
||||
raise ValueError('Leaked %s MB for %d calls'%(used, num))
|
||||
# }}}
|
||||
|
||||
def test_rar(path):
|
||||
with open(path, 'rb') as stream:
|
||||
f = RARFile(stream)
|
||||
f.test(print_names=True)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_basic()
|
Loading…
x
Reference in New Issue
Block a user