mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
LIT fixes from llasram
This commit is contained in:
commit
06df1da2ec
@ -107,11 +107,12 @@ class UnBinary(object):
|
|||||||
AMPERSAND_RE = re.compile(
|
AMPERSAND_RE = re.compile(
|
||||||
r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
|
r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
|
||||||
|
|
||||||
def __init__(self, bin, manifest, map=OPF_MAP):
|
def __init__(self, bin, path, manifest, map=OPF_MAP):
|
||||||
self.manifest = manifest
|
self.manifest = manifest
|
||||||
self.tag_map, self.attr_map, self.tag_to_attr_map = map
|
self.tag_map, self.attr_map, self.tag_to_attr_map = map
|
||||||
self.opf = map is OPF_MAP
|
self.opf = map is OPF_MAP
|
||||||
self.bin = bin
|
self.bin = bin
|
||||||
|
self.dir = os.path.dirname(path)
|
||||||
self.buf = cStringIO.StringIO()
|
self.buf = cStringIO.StringIO()
|
||||||
self.binary_to_text()
|
self.binary_to_text()
|
||||||
self.raw = self.buf.getvalue().lstrip().decode('utf-8')
|
self.raw = self.buf.getvalue().lstrip().decode('utf-8')
|
||||||
@ -122,9 +123,19 @@ class UnBinary(object):
|
|||||||
|
|
||||||
def item_path(self, internal_id):
|
def item_path(self, internal_id):
|
||||||
try:
|
try:
|
||||||
return self.manifest[internal_id].path
|
target = self.manifest[internal_id].path
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return internal_id
|
return internal_id
|
||||||
|
if not self.dir:
|
||||||
|
return target
|
||||||
|
target = target.split('/')
|
||||||
|
base = self.dir.split('/')
|
||||||
|
for index in xrange(min(len(base), len(target))):
|
||||||
|
if base[index] != target[index]: break
|
||||||
|
else:
|
||||||
|
index += 1
|
||||||
|
relpath = (['..'] * (len(base) - index)) + target[index:]
|
||||||
|
return '/'.join(relpath)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return self.raw
|
return self.raw
|
||||||
@ -147,7 +158,7 @@ class UnBinary(object):
|
|||||||
continue
|
continue
|
||||||
elif c == '\v':
|
elif c == '\v':
|
||||||
c = '\n'
|
c = '\n'
|
||||||
self.buf.write(c.encode('utf-8'))
|
self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
|
||||||
|
|
||||||
elif state == 'get flags':
|
elif state == 'get flags':
|
||||||
if oc == 0:
|
if oc == 0:
|
||||||
@ -206,7 +217,7 @@ class UnBinary(object):
|
|||||||
state = 'get attr length'
|
state = 'get attr length'
|
||||||
continue
|
continue
|
||||||
attr = None
|
attr = None
|
||||||
if oc in current_map and current_map[oc]:
|
if current_map and oc in current_map and current_map[oc]:
|
||||||
attr = current_map[oc]
|
attr = current_map[oc]
|
||||||
elif oc in self.attr_map:
|
elif oc in self.attr_map:
|
||||||
attr = self.attr_map[oc]
|
attr = self.attr_map[oc]
|
||||||
@ -247,7 +258,8 @@ class UnBinary(object):
|
|||||||
state = 'get attr'
|
state = 'get attr'
|
||||||
elif count > 0:
|
elif count > 0:
|
||||||
if not in_censorship:
|
if not in_censorship:
|
||||||
self.buf.write(unicode(c).encode('utf-8'))
|
self.buf.write(c.encode(
|
||||||
|
'ascii', 'xmlcharrefreplace'))
|
||||||
count -= 1
|
count -= 1
|
||||||
if count == 0:
|
if count == 0:
|
||||||
if not in_censorship:
|
if not in_censorship:
|
||||||
@ -299,7 +311,8 @@ class UnBinary(object):
|
|||||||
path = self.item_path(doc)
|
path = self.item_path(doc)
|
||||||
if m and frag:
|
if m and frag:
|
||||||
path += m + frag
|
path += m + frag
|
||||||
self.buf.write((u'"%s"' % path).encode('utf-8'))
|
self.buf.write((u'"%s"' % path).encode(
|
||||||
|
'ascii', 'xmlcharrefreplace'))
|
||||||
state = 'get attr'
|
state = 'get attr'
|
||||||
return index
|
return index
|
||||||
|
|
||||||
@ -597,15 +610,16 @@ class LitReader(object):
|
|||||||
item.path = os.path.basename(item.path)
|
item.path = os.path.basename(item.path)
|
||||||
|
|
||||||
def _read_meta(self):
|
def _read_meta(self):
|
||||||
|
path = 'content.opf'
|
||||||
raw = self.get_file('/meta')
|
raw = self.get_file('/meta')
|
||||||
try:
|
try:
|
||||||
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
|
xml = OPF_DECL + unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
|
||||||
except LitError:
|
except LitError:
|
||||||
if 'PENGUIN group' not in raw: raise
|
if 'PENGUIN group' not in raw: raise
|
||||||
print "WARNING: attempting PENGUIN malformed OPF fix"
|
print "WARNING: attempting PENGUIN malformed OPF fix"
|
||||||
raw = raw.replace(
|
raw = raw.replace(
|
||||||
'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
|
'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
|
||||||
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
|
xml = OPF_DECL + unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
|
||||||
self.meta = xml
|
self.meta = xml
|
||||||
|
|
||||||
def _read_drm(self):
|
def _read_drm(self):
|
||||||
@ -645,13 +659,6 @@ class LitReader(object):
|
|||||||
key[i % 8] ^= ord(digest[i])
|
key[i % 8] ^= ord(digest[i])
|
||||||
return ''.join(chr(x) for x in key)
|
return ''.join(chr(x) for x in key)
|
||||||
|
|
||||||
def get_markup_file(self, name):
|
|
||||||
raw = self.get_file(name)
|
|
||||||
decl, map = (OPF_DECL, OPF_MAP) \
|
|
||||||
if name == '/meta' else (HTML_DECL, HTML_MAP)
|
|
||||||
xml = decl + unicode(UnBinary(raw, self.manifest, map))
|
|
||||||
return xml
|
|
||||||
|
|
||||||
def get_file(self, name):
|
def get_file(self, name):
|
||||||
entry = self.entries[name]
|
entry = self.entries[name]
|
||||||
if entry.section == 0:
|
if entry.section == 0:
|
||||||
@ -748,6 +755,20 @@ class LitReader(object):
|
|||||||
raise LitError("Failed to completely decompress section")
|
raise LitError("Failed to completely decompress section")
|
||||||
return ''.join(result)
|
return ''.join(result)
|
||||||
|
|
||||||
|
def get_entry_content(self, entry):
|
||||||
|
if 'spine' in entry.state:
|
||||||
|
name = '/'.join(('/data', entry.internal, 'content'))
|
||||||
|
path = entry.path
|
||||||
|
raw = self.get_file(name)
|
||||||
|
decl, map = (OPF_DECL, OPF_MAP) \
|
||||||
|
if name == '/meta' else (HTML_DECL, HTML_MAP)
|
||||||
|
content = decl + unicode(UnBinary(raw, path, self.manifest, map))
|
||||||
|
content = content.encode('utf-8')
|
||||||
|
else:
|
||||||
|
name = '/'.join(('/data', entry.internal))
|
||||||
|
content = self.get_file(name)
|
||||||
|
return content
|
||||||
|
|
||||||
def extract_content(self, output_dir=os.getcwdu()):
|
def extract_content(self, output_dir=os.getcwdu()):
|
||||||
output_dir = os.path.abspath(output_dir)
|
output_dir = os.path.abspath(output_dir)
|
||||||
try:
|
try:
|
||||||
@ -763,12 +784,7 @@ class LitReader(object):
|
|||||||
path = os.path.join(output_dir, entry.path)
|
path = os.path.join(output_dir, entry.path)
|
||||||
self._ensure_dir(path)
|
self._ensure_dir(path)
|
||||||
with open(path, 'wb') as f:
|
with open(path, 'wb') as f:
|
||||||
if 'spine' in entry.state:
|
f.write(self.get_entry_content(entry))
|
||||||
name = '/'.join(('/data', entry.internal, 'content'))
|
|
||||||
f.write(self.get_markup_file(name).encode('utf-8'))
|
|
||||||
else:
|
|
||||||
name = '/'.join(('/data', entry.internal))
|
|
||||||
f.write(self.get_file(name))
|
|
||||||
|
|
||||||
def _ensure_dir(self, path):
|
def _ensure_dir(self, path):
|
||||||
dir = os.path.dirname(path)
|
dir = os.path.dirname(path)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user