From a37d1ecb82f5e70e840bea2f85cd0c8aa38c3154 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 5 Oct 2008 17:18:21 -0700
Subject: [PATCH] Handle attempts to convert DRMed files more elegantly

---
 src/calibre/ebooks/__init__.py              | 2 ++
 src/calibre/ebooks/epub/from_any.py         | 4 ++++
 src/calibre/ebooks/html.py                  | 5 +++--
 src/calibre/ebooks/lit/reader.py            | 3 ++-
 src/calibre/ebooks/lrf/epub/convert_from.py | 4 +++-
 src/calibre/ebooks/lrf/pdf/convert_from.py  | 4 ++--
 src/calibre/ebooks/metadata/epub.py         | 6 ++++--
 src/calibre/ebooks/metadata/toc.py          | 5 ++++-
 src/calibre/ebooks/mobi/reader.py           | 3 ++-
 src/calibre/gui2/main.py                    | 8 +++++++-
 src/calibre/gui2/tags.py                    | 2 --
 11 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py
index d804ccc58e..58dff6490c 100644
--- a/src/calibre/ebooks/__init__.py
+++ b/src/calibre/ebooks/__init__.py
@@ -15,6 +15,8 @@ class ConversionError(Exception):
 class UnknownFormatError(Exception):
     pass
 
+class DRMError(ValueError):
+    pass
 
 BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
                    'html', 'xhtml', 'epub', 'pdf', 'prc', 'mobi', 'azw',
diff --git a/src/calibre/ebooks/epub/from_any.py b/src/calibre/ebooks/epub/from_any.py
index 950e73ba1a..6ebf86e81a 100644
--- a/src/calibre/ebooks/epub/from_any.py
+++ b/src/calibre/ebooks/epub/from_any.py
@@ -11,6 +11,7 @@ import sys, os, re
 from contextlib import nested
 
 from calibre import extract, walk
+from calibre.ebooks import DRMError
 from calibre.ebooks.epub import config as common_config
 from calibre.ebooks.epub.from_html import convert as html2epub
 from calibre.ptempfile import TemporaryDirectory
@@ -71,6 +72,9 @@ def pdf2opf(path, tdir, opts):
 def epub2opf(path, tdir, opts):
     zf = ZipFile(path)
     zf.extractall(tdir)
+    print os.listdir(os.path.join(tdir, 'META-INF'))
+    if os.path.exists(os.path.join(tdir, 'META-INF', 'encryption.xml')):
+        raise DRMError(os.path.basename(path))
     for f in walk(tdir):
         if f.lower().endswith('.opf'):
             return f
diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py
index e5e6f3f37a..350c535d05 100644
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@@ -712,7 +712,6 @@ class Processor(Parser):
                     for rule in sheet:
                         self.stylesheet.add(rule)
                 style.getparent().remove(style)
-        
         cache = {}
         class_counter = 0
         for font in self.root.xpath('//font'):
@@ -753,7 +752,9 @@ class Processor(Parser):
         
         css = '\n'.join(['.%s {%s;}'%(cn, setting) for \
                          setting, cn in cache.items()])
-        self.stylesheet = self.css_parser.parseString(self.preprocess_css(css))
+        sheet = self.css_parser.parseString(self.preprocess_css(css))
+        for rule in sheet:
+            self.stylesheet.add(rule)
         css = ''
         if self.opts.override_css:
             css += '\n\n' + self.opts.override_css
diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index febaf280c6..1a08803126 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -14,6 +14,7 @@ from lxml import etree
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
 import calibre.ebooks.lit.mssha1 as mssha1
+from calibre.ebooks import DRMError
 from calibre import plugins
 lzx, lxzerror = plugins['lzx']
 msdes, msdeserror = plugins['msdes']
@@ -653,7 +654,7 @@ class LitReader(object):
                 raise LitError('Unable to decrypt title key!')
             self.bookkey = bookkey[1:9]
         else:
-            raise LitError('Cannot extract content from a DRM protected ebook')
+            raise DRMError()
 
     def _calculate_deskey(self):
         hashfiles = ['/meta', '/DRMStorage/DRMSource']
diff --git a/src/calibre/ebooks/lrf/epub/convert_from.py b/src/calibre/ebooks/lrf/epub/convert_from.py
index 110946824b..071a2cb497 100644
--- a/src/calibre/ebooks/lrf/epub/convert_from.py
+++ b/src/calibre/ebooks/lrf/epub/convert_from.py
@@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import os, sys, shutil, logging
 from tempfile import mkdtemp
 from calibre.ebooks.lrf import option_parser as lrf_option_parser
-from calibre.ebooks import ConversionError
+from calibre.ebooks import ConversionError, DRMError
 from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
 from calibre.ebooks.metadata.opf import OPF
 from calibre.ebooks.metadata.epub import OCFDirReader
@@ -27,6 +27,8 @@ def generate_html(pathtoepub, logger):
     os.rmdir(tdir)
     try:
         ZipFile(pathtoepub).extractall(tdir)
+        if os.path.exists(os.path.join(tdir, 'META-INF', 'encryption.xml')):
+            raise DRMError(os.path.basename(pathtoepub))
     except:
         if os.path.exists(tdir) and os.path.isdir(tdir):
             shutil.rmtree(tdir)
diff --git a/src/calibre/ebooks/lrf/pdf/convert_from.py b/src/calibre/ebooks/lrf/pdf/convert_from.py
index b365f68d17..ac9c4b3ade 100644
--- a/src/calibre/ebooks/lrf/pdf/convert_from.py
+++ b/src/calibre/ebooks/lrf/pdf/convert_from.py
@@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import sys, os, subprocess, logging
 from functools import partial
 from calibre import isosx, setup_cli_handlers, filename_to_utf8, iswindows, islinux
-from calibre.ebooks import ConversionError
+from calibre.ebooks import ConversionError, DRMError
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.ebooks.lrf import option_parser as lrf_option_parser
 from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
@@ -53,7 +53,7 @@ def generate_html(pathtopdf, tdir):
             err = p.stderr.read()
             raise ConversionError, err
         if not os.path.exists(index) or os.stat(index).st_size < 100:
-            raise ConversionError(os.path.basename(pathtopdf) + _(' does not allow copying of text.'), True)
+            raise DRMError()
         
         raw = open(index, 'rb').read()
         open(index, 'wb').write('<!-- created by calibre\'s pdftohtml -->\n'+raw)
diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py
index 4346000289..d08d40080d 100644
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@@ -43,8 +43,9 @@ class Container(dict):
                 raise EPubException("<rootfile/> element malformed")
 
 class OCF(object):
-    MIMETYPE = 'application/epub+zip'
-    CONTAINER_PATH = 'META-INF/container.xml'
+    MIMETYPE        = 'application/epub+zip'
+    CONTAINER_PATH  = 'META-INF/container.xml'
+    ENCRYPTION_PATH = 'META-INF/encryption.xml'
     
     def __init__(self):
         raise NotImplementedError('Abstract base class')
@@ -70,6 +71,7 @@ class OCFReader(OCF):
                 self.opf = OPF(f, self.root)
         except KeyError:
             raise EPubException("missing OPF package file")
+                
 
 class OCFZipReader(OCFReader):
     def __init__(self, stream, mode='r', root=None):
diff --git a/src/calibre/ebooks/metadata/toc.py b/src/calibre/ebooks/metadata/toc.py
index 0583b88242..f50a525264 100644
--- a/src/calibre/ebooks/metadata/toc.py
+++ b/src/calibre/ebooks/metadata/toc.py
@@ -123,7 +123,10 @@ class TOC(list):
                 path = opfreader.manifest.item(toc.lower())
                 path = getattr(path, 'path', path)
                 if path and os.access(path, os.R_OK):
-                    self.read_ncx_toc(path)
+                    try:
+                        self.read_ncx_toc(path)
+                    except Exception, err:
+                        print 'WARNING: Invalid NCX file:', err
                     return
                 cwd = os.path.abspath(self.base_path)
                 m = glob.glob(os.path.join(cwd, '*.ncx'))
diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index 5a4a729bb7..15324dfbbc 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -13,6 +13,7 @@ except ImportError:
     import Image as PILImage
 
 from calibre import __appname__, entity_to_unicode
+from calibre.ebooks import DRMError
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.huffcdic import HuffReader
@@ -165,7 +166,7 @@ class MobiReader(object):
     def extract_content(self, output_dir=os.getcwdu()):
         output_dir = os.path.abspath(output_dir)
         if self.book_header.encryption_type != 0:
-            raise MobiError('Cannot extract content from a DRM protected ebook')
+            raise DRMError(self.name)
         
         processed_records = self.extract_text()
         self.add_anchors()
diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py
index 31f6b891a7..90fe20f0a3 100644
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@@ -1117,7 +1117,13 @@ class Main(MainWindow, Ui_MainWindow):
             self.device_error_dialog.show()
             
     def job_exception(self, job):
-        
+        try:
+            if job.exception[0] == 'DRMError':
+                error_dialog(self, _('Conversion Error'), 
+                    _('<p>Could not convert: %s<p>It is a <a href="http://wiki.mobileread.com/wiki/DRM">DRM</a>ed book. You must first remove the DRM using 3rd party tools.')%job.description.split(':')[-1]).exec_()
+                return
+        except:
+            pass
         only_msg = getattr(job.exception, 'only_msg', False)
         try:
             print job.console_text()
diff --git a/src/calibre/gui2/tags.py b/src/calibre/gui2/tags.py
index dd8b3e076b..d7d6dc62d9 100644
--- a/src/calibre/gui2/tags.py
+++ b/src/calibre/gui2/tags.py
@@ -63,8 +63,6 @@ class TagsModel(QAbstractItemModel):
     def refresh(self):
         old_data = self._data
         self._data = self.db.get_categories(config['sort_by_popularity'])
-        for key in self._data:
-            self._data[key] = self._data[key]
         for key in old_data.keys():
             for tag in old_data[key]:
                 try: