Pull from driver-dev

2025-07-08 02:34:06 -04:00 · 2009-06-20 19:46:33 -07:00 · 2009-06-20 19:46:33 -07:00 · 2d2fadcbd6
commit 2d2fadcbd6
parent fc1f350675 25911a8598
7 changed files with 86 additions and 56 deletions
--- a/src/calibre/devices/prs505/books.py
+++ b/src/calibre/devices/prs505/books.py
@ -403,9 +403,8 @@ def fix_ids(main, carda, cardb):
        for child in db.root_element.childNodes:
            if child.nodeType == child.ELEMENT_NODE and child.hasAttribute('id'):
                id_map[child.getAttribute('id')] = str(cid)
-                child.setAttribute('sourceid', '1')
-                #child.setAttribute("sourceid",
-                #    '0' if getattr(child, 'tagName', '').endswith('playlist') else '1')
+                child.setAttribute("sourceid",
+                    '0' if getattr(child, 'tagName', '').endswith('playlist') else '1')
                child.setAttribute('id', str(cid))
                cid += 1

--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -180,16 +180,25 @@ class PRS505(CLI, Device):

        return zip(paths, sizes, ctimes, cycle([on_card]))

-    @classmethod
-    def add_books_to_metadata(cls, locations, metadata, booklists):
+    def add_books_to_metadata(self, locations, metadata, booklists):
        metadata = iter(metadata)
        for location in locations:
            info = metadata.next()
            path = location[0]
            blist = 2 if location[3] == 'cardb' else 1 if location[3] == 'carda' else 0
-            name = path.rpartition(os.sep)[2]
-            name = (cls.CARD_PATH_PREFIX+'/' if blist else 'database/media/books/') + name
+            
+            if path.startswith(self._main_prefix):
+                name = path.replace(self._main_prefix, '')
+            elif path.startswith(self._card_a_prefix):
+                name = path.replace(self._card_a_prefix, '')
+            elif path.startswith(self._card_b_prefix):
+                name = path.replace(self._card_b_prefix, '')
+
+            name = name.replace('\\', '/')
            name = name.replace('//', '/')
+            if name.startswith('/'):
+                name = name[1:]
+
            booklists[blist].add_book(info, name, *location[1:-1])
        fix_ids(*booklists)

--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -1,12 +1,12 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-from __future__ import with_statement

 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import re, functools
+import functools
+import re

 from calibre import entity_to_unicode

@ -52,7 +52,12 @@ def line_length(raw, percent):
    for line in lines:
        if len(line) > 0:
            lengths.append(len(line))
+
+    if not lengths:
+        return 0
+
    total = sum(lengths)
+    print total
    avg = total / len(lengths)
    max_line = avg * 2

@ -178,10 +183,13 @@ class HTMLPreProcessor(object):
        elif self.is_book_designer(html):
            rules = self.BOOK_DESIGNER
        elif self.is_pdftohtml(html):
-            line_length_rules = [
-                # Un wrap using punctuation
-                (re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % line_length(html, .3), re.UNICODE), wrap_lines),
-            ]
+            length = line_length(html, .3)
+            line_length_rules = []
+            if length:
+                line_length_rules = [
+                    # Un wrap using punctuation
+                    (re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % length, re.UNICODE), wrap_lines),
+                ]

            rules = self.PDFTOHTML + line_length_rules
        else:
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -82,10 +82,6 @@ class FB2MLizer(object):
        return images

    def clean_text(self, text):
-        for entity in set(re.findall('&.+?;', text)):
-            mo = re.search('(%s)' % entity[1:-1], text)
-            text = text.replace(entity, entity_to_unicode(mo))
-
        text = text.replace('&', '')

        return text
--- a/src/calibre/ebooks/pdf/input.py
+++ b/src/calibre/ebooks/pdf/input.py
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'

 import os

-from calibre.customize.conversion import InputFormatPlugin
+from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.pdf.pdftohtml import pdftohtml
 from calibre.ebooks.metadata.opf2 import OPFCreator

@ -17,17 +17,33 @@ class PDFInput(InputFormatPlugin):
    description = 'Convert PDF files to HTML'
    file_types  = set(['pdf'])

+    options = set([
+        OptionRecommendation(name='no_images', recommended_value=False,
+            help=_('Do not extract images from the document')),
+    ])
+
    def convert(self, stream, options, file_ext, log,
                accelerators):
-        html = pdftohtml(stream.name)
-
-        with open('index.html', 'wb') as index:
-            index.write(html)
+        # The main html file will be named index.html
+        pdftohtml(os.getcwd(), stream.name, options.no_images)

        from calibre.ebooks.metadata.meta import get_metadata
        mi = get_metadata(stream, 'pdf')
        opf = OPFCreator(os.getcwd(), mi)
-        opf.create_manifest([('index.html', None)])
+
+        manifest = [('index.html', None)]
+
+        images = os.listdir(os.getcwd())
+        images.remove('index.html')
+        for i in images:
+            # Remove the - from the file name because it causes problems.
+            # The referenec to the image with the - will be changed to not
+            # include it later in the conversion process.
+            new_i = i.replace('-', '')
+            os.rename(i, new_i)
+            manifest.append((new_i, None))
+        opf.create_manifest(manifest)
+
        opf.create_spine(['index.html'])
        with open('metadata.opf', 'wb') as opffile:
            opf.render(opffile)
--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@ -14,7 +14,6 @@ from functools import partial
 from calibre.ebooks import ConversionError, DRMError
 from calibre import isosx, iswindows, islinux
 from calibre import CurrentDir
-from calibre.ptempfile import TemporaryDirectory

 PDFTOHTML = 'pdftohtml'
 popen = subprocess.Popen
@ -26,10 +25,11 @@ if iswindows and hasattr(sys, 'frozen'):
 if islinux and getattr(sys, 'frozen_path', False):
    PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')

-def pdftohtml(pdf_path):
+def pdftohtml(output_dir, pdf_path, no_images):
    '''
    Convert the pdf into html using the pdftohtml app.
-    @return: The HTML as a unicode string.
+    This will write the html as index.html into output_dir.
+    It will also wirte all extracted images to the output_dir
    '''

    if isinstance(pdf_path, unicode):
@ -37,41 +37,41 @@ def pdftohtml(pdf_path):
    if not os.access(pdf_path, os.R_OK):
        raise ConversionError('Cannot read from ' + pdf_path)

-    with TemporaryDirectory('_pdftohtml') as tdir:
-        index = os.path.join(tdir, 'index.html')
+    with CurrentDir(output_dir):
+        index = os.path.join(os.getcwd(), 'index.html')
        # This is neccessary as pdftohtml doesn't always (linux) respect absolute paths
        pdf_path = os.path.abspath(pdf_path)
-        cmd = (PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', '-i', '-q', pdf_path, os.path.basename(index))
-        cwd = os.getcwd()
+        cmd = [PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', '-q', pdf_path, os.path.basename(index)]
+        if no_images:
+            cmd.append('-i')

-        with CurrentDir(tdir):
+        try:
+            p = popen(cmd, stderr=subprocess.PIPE)
+        except OSError, err:
+            if err.errno == 2:
+                raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'))
+            else:
+                raise
+
+        while True:
            try:
-                p = popen(cmd, stderr=subprocess.PIPE)
-            except OSError, err:
-                if err.errno == 2:
-                    raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'))
+                ret = p.wait()
+                break
+            except OSError, e:
+                if e.errno == errno.EINTR:
+                    continue
                else:
                    raise

-            while True:
-                try:
-                    ret = p.wait()
-                    break
-                except OSError, e:
-                    if e.errno == errno.EINTR:
-                        continue
-                    else:
-                        raise
+        if ret != 0:
+            err = p.stderr.read()
+            raise ConversionError(err)
+        if not os.path.exists(index) or os.stat(index).st_size < 100:
+            raise DRMError()

-            if ret != 0:
-                err = p.stderr.read()
-                raise ConversionError(err)
-            if not os.path.exists(index) or os.stat(index).st_size < 100:
-                raise DRMError()
-
-            with open(index, 'rb') as i:
-                raw = i.read()
-            if not '<br' in raw[:4000]:
-                raise ConversionError(os.path.basename(pdf_path) + _(' is an image based PDF. Only conversion of text based PDFs is supported.'))
-
-            return '<!-- created by calibre\'s pdftohtml -->\n' + raw
+        with open(index, 'rb+wb') as i:
+            raw = i.read()
+            raw = '<!-- created by calibre\'s pdftohtml -->\n' + raw
+            i.seek(0)
+            i.truncate()
+            i.write(raw)
--- a/src/calibre/gui2/widgets.py
+++ b/src/calibre/gui2/widgets.py
@ -227,6 +227,7 @@ class LocationModel(QAbstractListModel):
        self.free[1] = fs[1] if fs[1] is not None and cpa is not None else -1
        self.free[2] = fs[2] if fs[2] is not None and cpb is not None else -1
        self.reset()
+        self.emit(SIGNAL('devicesChanged()'))

    def location_changed(self, row):
        self.highlight_row = row
@ -253,6 +254,7 @@ class LocationView(QListView):
        self.connect(self, SIGNAL('entered(QModelIndex)'), self.item_entered)
        self.connect(self, SIGNAL('viewportEntered()'), self.viewport_entered)
        self.connect(self.eject_button, SIGNAL('clicked()'), lambda: self.emit(SIGNAL('umount_device()')))
+        self.connect(self.model(), SIGNAL('devicesChanged()'), self.eject_button.hide)

    def count_changed(self, new_count):
        self.model().count = new_count