diff --git a/src/calibre/devices/prs505/books.py b/src/calibre/devices/prs505/books.py index c10263c6e0..6e268e734a 100644 --- a/src/calibre/devices/prs505/books.py +++ b/src/calibre/devices/prs505/books.py @@ -403,9 +403,8 @@ def fix_ids(main, carda, cardb): for child in db.root_element.childNodes: if child.nodeType == child.ELEMENT_NODE and child.hasAttribute('id'): id_map[child.getAttribute('id')] = str(cid) - child.setAttribute('sourceid', '1') - #child.setAttribute("sourceid", - # '0' if getattr(child, 'tagName', '').endswith('playlist') else '1') + child.setAttribute("sourceid", + '0' if getattr(child, 'tagName', '').endswith('playlist') else '1') child.setAttribute('id', str(cid)) cid += 1 diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py index d88aba00d0..1dca0586f9 100644 --- a/src/calibre/devices/prs505/driver.py +++ b/src/calibre/devices/prs505/driver.py @@ -180,16 +180,25 @@ class PRS505(CLI, Device): return zip(paths, sizes, ctimes, cycle([on_card])) - @classmethod - def add_books_to_metadata(cls, locations, metadata, booklists): + def add_books_to_metadata(self, locations, metadata, booklists): metadata = iter(metadata) for location in locations: info = metadata.next() path = location[0] blist = 2 if location[3] == 'cardb' else 1 if location[3] == 'carda' else 0 - name = path.rpartition(os.sep)[2] - name = (cls.CARD_PATH_PREFIX+'/' if blist else 'database/media/books/') + name + + if path.startswith(self._main_prefix): + name = path.replace(self._main_prefix, '') + elif path.startswith(self._card_a_prefix): + name = path.replace(self._card_a_prefix, '') + elif path.startswith(self._card_b_prefix): + name = path.replace(self._card_b_prefix, '') + + name = name.replace('\\', '/') name = name.replace('//', '/') + if name.startswith('/'): + name = name[1:] + booklists[blist].add_book(info, name, *location[1:-1]) fix_ids(*booklists) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 816dd54ade..15b927115f 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -1,12 +1,12 @@ #!/usr/bin/env python # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re, functools +import functools +import re from calibre import entity_to_unicode @@ -52,7 +52,12 @@ def line_length(raw, percent): for line in lines: if len(line) > 0: lengths.append(len(line)) + + if not lengths: + return 0 + total = sum(lengths) + print total avg = total / len(lengths) max_line = avg * 2 @@ -178,10 +183,13 @@ class HTMLPreProcessor(object): elif self.is_book_designer(html): rules = self.BOOK_DESIGNER elif self.is_pdftohtml(html): - line_length_rules = [ - # Un wrap using punctuation - (re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P)?\s*()\s*(?=(<(i|b|u)>)?[\w\d])' % line_length(html, .3), re.UNICODE), wrap_lines), - ] + length = line_length(html, .3) + line_length_rules = [] + if length: + line_length_rules = [ + # Un wrap using punctuation + (re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P)?\s*()\s*(?=(<(i|b|u)>)?[\w\d])' % length, re.UNICODE), wrap_lines), + ] rules = self.PDFTOHTML + line_length_rules else: diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 429e36eba2..76b9b9a758 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -82,10 +82,6 @@ class FB2MLizer(object): return images def clean_text(self, text): - for entity in set(re.findall('&.+?;', text)): - mo = re.search('(%s)' % entity[1:-1], text) - text = text.replace(entity, entity_to_unicode(mo)) - text = text.replace('&', '') return text diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py index 6aa695c912..97024ea908 100644 --- a/src/calibre/ebooks/pdf/input.py +++ b/src/calibre/ebooks/pdf/input.py @@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en' import os -from calibre.customize.conversion import InputFormatPlugin +from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.ebooks.pdf.pdftohtml import pdftohtml from calibre.ebooks.metadata.opf2 import OPFCreator @@ -17,17 +17,33 @@ class PDFInput(InputFormatPlugin): description = 'Convert PDF files to HTML' file_types = set(['pdf']) + options = set([ + OptionRecommendation(name='no_images', recommended_value=False, + help=_('Do not extract images from the document')), + ]) + def convert(self, stream, options, file_ext, log, accelerators): - html = pdftohtml(stream.name) - - with open('index.html', 'wb') as index: - index.write(html) + # The main html file will be named index.html + pdftohtml(os.getcwd(), stream.name, options.no_images) from calibre.ebooks.metadata.meta import get_metadata mi = get_metadata(stream, 'pdf') opf = OPFCreator(os.getcwd(), mi) - opf.create_manifest([('index.html', None)]) + + manifest = [('index.html', None)] + + images = os.listdir(os.getcwd()) + images.remove('index.html') + for i in images: + # Remove the - from the file name because it causes problems. + # The referenec to the image with the - will be changed to not + # include it later in the conversion process. + new_i = i.replace('-', '') + os.rename(i, new_i) + manifest.append((new_i, None)) + opf.create_manifest(manifest) + opf.create_spine(['index.html']) with open('metadata.opf', 'wb') as opffile: opf.render(opffile) diff --git a/src/calibre/ebooks/pdf/pdftohtml.py b/src/calibre/ebooks/pdf/pdftohtml.py index c88b50e82e..11631c7fe0 100644 --- a/src/calibre/ebooks/pdf/pdftohtml.py +++ b/src/calibre/ebooks/pdf/pdftohtml.py @@ -14,7 +14,6 @@ from functools import partial from calibre.ebooks import ConversionError, DRMError from calibre import isosx, iswindows, islinux from calibre import CurrentDir -from calibre.ptempfile import TemporaryDirectory PDFTOHTML = 'pdftohtml' popen = subprocess.Popen @@ -26,10 +25,11 @@ if iswindows and hasattr(sys, 'frozen'): if islinux and getattr(sys, 'frozen_path', False): PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml') -def pdftohtml(pdf_path): +def pdftohtml(output_dir, pdf_path, no_images): ''' Convert the pdf into html using the pdftohtml app. - @return: The HTML as a unicode string. + This will write the html as index.html into output_dir. + It will also wirte all extracted images to the output_dir ''' if isinstance(pdf_path, unicode): @@ -37,41 +37,41 @@ def pdftohtml(pdf_path): if not os.access(pdf_path, os.R_OK): raise ConversionError('Cannot read from ' + pdf_path) - with TemporaryDirectory('_pdftohtml') as tdir: - index = os.path.join(tdir, 'index.html') + with CurrentDir(output_dir): + index = os.path.join(os.getcwd(), 'index.html') # This is neccessary as pdftohtml doesn't always (linux) respect absolute paths pdf_path = os.path.abspath(pdf_path) - cmd = (PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', '-i', '-q', pdf_path, os.path.basename(index)) - cwd = os.getcwd() + cmd = [PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', '-q', pdf_path, os.path.basename(index)] + if no_images: + cmd.append('-i') - with CurrentDir(tdir): + try: + p = popen(cmd, stderr=subprocess.PIPE) + except OSError, err: + if err.errno == 2: + raise ConversionError(_('Could not find pdftohtml, check it is in your PATH')) + else: + raise + + while True: try: - p = popen(cmd, stderr=subprocess.PIPE) - except OSError, err: - if err.errno == 2: - raise ConversionError(_('Could not find pdftohtml, check it is in your PATH')) + ret = p.wait() + break + except OSError, e: + if e.errno == errno.EINTR: + continue else: raise - while True: - try: - ret = p.wait() - break - except OSError, e: - if e.errno == errno.EINTR: - continue - else: - raise + if ret != 0: + err = p.stderr.read() + raise ConversionError(err) + if not os.path.exists(index) or os.stat(index).st_size < 100: + raise DRMError() - if ret != 0: - err = p.stderr.read() - raise ConversionError(err) - if not os.path.exists(index) or os.stat(index).st_size < 100: - raise DRMError() - - with open(index, 'rb') as i: - raw = i.read() - if not '\n' + raw + with open(index, 'rb+wb') as i: + raw = i.read() + raw = '\n' + raw + i.seek(0) + i.truncate() + i.write(raw) diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py index c369f16894..f692ae24d7 100644 --- a/src/calibre/gui2/widgets.py +++ b/src/calibre/gui2/widgets.py @@ -227,6 +227,7 @@ class LocationModel(QAbstractListModel): self.free[1] = fs[1] if fs[1] is not None and cpa is not None else -1 self.free[2] = fs[2] if fs[2] is not None and cpb is not None else -1 self.reset() + self.emit(SIGNAL('devicesChanged()')) def location_changed(self, row): self.highlight_row = row @@ -253,6 +254,7 @@ class LocationView(QListView): self.connect(self, SIGNAL('entered(QModelIndex)'), self.item_entered) self.connect(self, SIGNAL('viewportEntered()'), self.viewport_entered) self.connect(self.eject_button, SIGNAL('clicked()'), lambda: self.emit(SIGNAL('umount_device()'))) + self.connect(self.model(), SIGNAL('devicesChanged()'), self.eject_button.hide) def count_changed(self, new_count): self.model().count = new_count