EPUB Input: Add support for EPUB files that use the IDPF font obfuscation algorithm. Apparently, people have started producing these now. Fixes #1008810 (Private bug)

This commit is contained in:
Kovid Goyal 2012-06-05 10:41:11 +05:30
parent 7e4efc5e41
commit 61a3c2aae4
2 changed files with 37 additions and 12 deletions

View File

@ -8,6 +8,8 @@ from itertools import cycle
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
class EPUBInput(InputFormatPlugin): class EPUBInput(InputFormatPlugin):
name = 'EPUB Input' name = 'EPUB Input'
@ -18,18 +20,24 @@ class EPUBInput(InputFormatPlugin):
recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)]) recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])
def decrypt_font(self, key, path): def decrypt_font(self, key, path, algorithm):
raw = open(path, 'rb').read() is_adobe = algorithm == ADOBE_OBFUSCATION
crypt = raw[:1024] crypt_len = 1024 if is_adobe else 1040
key = cycle(iter(key)) with open(path, 'rb') as f:
decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt]) raw = f.read()
crypt = bytearray(raw[:crypt_len])
key = cycle(iter(bytearray(key)))
decrypt = bytes(bytearray(x^key.next() for x in crypt))
with open(path, 'wb') as f: with open(path, 'wb') as f:
f.write(decrypt) f.write(decrypt)
f.write(raw[1024:]) f.write(raw[crypt_len:])
def process_encryption(self, encfile, opf, log): def process_encryption(self, encfile, opf, log):
from lxml import etree from lxml import etree
import uuid import uuid, hashlib
idpf_key = opf.unique_identifier
if idpf_key:
idpf_key = hashlib.sha1(idpf_key).digest()
key = None key = None
for item in opf.identifier_iter(): for item in opf.identifier_iter():
scheme = None scheme = None
@ -39,8 +47,8 @@ class EPUBInput(InputFormatPlugin):
if (scheme and scheme.lower() == 'uuid') or \ if (scheme and scheme.lower() == 'uuid') or \
(item.text and item.text.startswith('urn:uuid:')): (item.text and item.text.startswith('urn:uuid:')):
try: try:
key = str(item.text).rpartition(':')[-1] key = bytes(item.text).rpartition(':')[-1]
key = list(map(ord, uuid.UUID(key).bytes)) key = uuid.UUID(key).bytes
except: except:
import traceback import traceback
traceback.print_exc() traceback.print_exc()
@ -50,14 +58,16 @@ class EPUBInput(InputFormatPlugin):
root = etree.parse(encfile) root = etree.parse(encfile)
for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'): for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
algorithm = em.get('Algorithm', '') algorithm = em.get('Algorithm', '')
if algorithm != 'http://ns.adobe.com/pdf/enc#RC': if algorithm not in {ADOBE_OBFUSCATION,
'http://www.idpf.org/2008/embedding'}:
return False return False
cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0] cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
uri = cr.get('URI') uri = cr.get('URI')
path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/'))) path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
if key is not None and os.path.exists(path): tkey = (key if algorithm == ADOBE_OBFUSCATION else idpf_key)
if (tkey and os.path.exists(path)):
self._encrypted_font_uris.append(uri) self._encrypted_font_uris.append(uri)
self.decrypt_font(key, path) self.decrypt_font(tkey, path, algorithm)
return True return True
except: except:
import traceback import traceback

View File

@ -991,6 +991,21 @@ class OPF(object): # {{{
for item in self.identifier_path(self.metadata): for item in self.identifier_path(self.metadata):
yield item yield item
@property
def unique_identifier(self):
uuid_elem = None
for attr in self.root.attrib:
if attr.endswith('unique-identifier'):
uuid_elem = self.root.attrib[attr]
break
if uuid_elem:
matches = self.root.xpath('//*[@id=%r]'%uuid_elem)
if matches:
for m in matches:
raw = m.text
if raw:
return raw.rpartition(':')[-1]
def guess_cover(self): def guess_cover(self):
''' '''
Try to guess a cover. Needed for some old/badly formed OPF files. Try to guess a cover. Needed for some old/badly formed OPF files.