EPUB Input: Add support for EPUB files that use the IDPF font obfuscation algorithm. Apparently, people have started producing these now. Fixes #1008810 (Private bug)

2025-07-09 03:04:10 -04:00 · 2012-06-05 10:41:11 +05:30 · 2012-06-05 10:41:11 +05:30 · 61a3c2aae4
commit 61a3c2aae4
parent 7e4efc5e41
2 changed files with 37 additions and 12 deletions
--- a/src/calibre/ebooks/conversion/plugins/epub_input.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_input.py
@ -8,6 +8,8 @@ from itertools import cycle

 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation

+ADOBE_OBFUSCATION =  'http://ns.adobe.com/pdf/enc#RC'
+
 class EPUBInput(InputFormatPlugin):

    name        = 'EPUB Input'
@ -18,18 +20,24 @@ class EPUBInput(InputFormatPlugin):

    recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])

-    def decrypt_font(self, key, path):
-        raw = open(path, 'rb').read()
-        crypt = raw[:1024]
-        key = cycle(iter(key))
-        decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
+    def decrypt_font(self, key, path, algorithm):
+        is_adobe = algorithm == ADOBE_OBFUSCATION
+        crypt_len = 1024 if is_adobe else 1040
+        with open(path, 'rb') as f:
+            raw = f.read()
+        crypt = bytearray(raw[:crypt_len])
+        key = cycle(iter(bytearray(key)))
+        decrypt = bytes(bytearray(x^key.next() for x in crypt))
        with open(path, 'wb') as f:
            f.write(decrypt)
-            f.write(raw[1024:])
+            f.write(raw[crypt_len:])

    def process_encryption(self, encfile, opf, log):
        from lxml import etree
-        import uuid
+        import uuid, hashlib
+        idpf_key = opf.unique_identifier
+        if idpf_key:
+            idpf_key = hashlib.sha1(idpf_key).digest()
        key = None
        for item in opf.identifier_iter():
            scheme = None
@ -39,8 +47,8 @@ class EPUBInput(InputFormatPlugin):
            if (scheme and scheme.lower() == 'uuid') or \
                    (item.text and item.text.startswith('urn:uuid:')):
                try:
-                    key = str(item.text).rpartition(':')[-1]
-                    key = list(map(ord, uuid.UUID(key).bytes))
+                    key = bytes(item.text).rpartition(':')[-1]
+                    key = uuid.UUID(key).bytes
                except:
                    import traceback
                    traceback.print_exc()
@ -50,14 +58,16 @@ class EPUBInput(InputFormatPlugin):
            root = etree.parse(encfile)
            for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
                algorithm = em.get('Algorithm', '')
-                if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
+                if algorithm not in {ADOBE_OBFUSCATION,
+                        'http://www.idpf.org/2008/embedding'}:
                    return False
                cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
                uri = cr.get('URI')
                path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
-                if key is not None and os.path.exists(path):
+                tkey = (key if algorithm == ADOBE_OBFUSCATION else idpf_key)
+                if (tkey and os.path.exists(path)):
                    self._encrypted_font_uris.append(uri)
-                    self.decrypt_font(key, path)
+                    self.decrypt_font(tkey, path, algorithm)
            return True
        except:
            import traceback
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -991,6 +991,21 @@ class OPF(object): # {{{
        for item in self.identifier_path(self.metadata):
            yield item

+    @property
+    def unique_identifier(self):
+        uuid_elem = None
+        for attr in self.root.attrib:
+            if attr.endswith('unique-identifier'):
+                uuid_elem = self.root.attrib[attr]
+                break
+        if uuid_elem:
+            matches = self.root.xpath('//*[@id=%r]'%uuid_elem)
+            if matches:
+                for m in matches:
+                    raw = m.text
+                    if raw:
+                        return raw.rpartition(':')[-1]
+
    def guess_cover(self):
        '''
        Try to guess a cover. Needed for some old/badly formed OPF files.