From 37b820b0468267b21cc65d5470578c09655e9755 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 18 Apr 2009 07:51:00 -0400 Subject: [PATCH 1/4] Switch from file to open because the use of file is not proper. --- src/calibre/ebooks/pdf/writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py index 0f8cbf50c0..f91dae44fd 100644 --- a/src/calibre/ebooks/pdf/writer.py +++ b/src/calibre/ebooks/pdf/writer.py @@ -126,7 +126,7 @@ class PDFWriter(QObject): try: outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author) for item in self.combine_queue: - inputPDF = PdfFileReader(file(item, 'rb')) + inputPDF = PdfFileReader(open(item, 'rb')) for page in inputPDF.pages: outPDF.addPage(page) outPDF.write(self.out_stream) From fe3d1f5bc74171c78c437efa1cdaf0120b68e72b Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 18 Apr 2009 08:08:48 -0400 Subject: [PATCH 2/4] Rename FileWrapper with a more fitting name. Comment pdf get_cover to remove ambiguity of what and why. --- src/calibre/__init__.py | 2 +- src/calibre/ebooks/metadata/pdf.py | 44 ++++++++++++++++++------------ 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 6299bb8782..9e18af3cf9 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -246,7 +246,7 @@ class CurrentDir(object): os.chdir(self.cwd) -class FileWrapper(object): +class StreamReadWrapper(object): ''' Used primarily with pyPdf to ensure the stream is properly closed. ''' diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index ec713b5adf..4d8516f6c3 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -6,7 +6,7 @@ __copyright__ = '2008, Kovid Goyal ' import sys, os, cStringIO from threading import Thread -from calibre import FileWrapper +from calibre import StreamReadWrapper from calibre.ebooks.metadata import MetaInformation, authors_to_string from calibre.ptempfile import TemporaryDirectory from pyPdf import PdfFileReader, PdfFileWriter @@ -34,7 +34,7 @@ def get_metadata(stream, extract_cover=True): traceback.print_exc() try: - with FileWrapper(stream) as stream: + with StreamReadWrapper(stream) as stream: info = PdfFileReader(stream).getDocumentInfo() if info.title: mi.title = info.title @@ -98,29 +98,39 @@ def get_cover(stream): data = cStringIO.StringIO() try: - pdf = PdfFileReader(stream) - output = PdfFileWriter() + StreamReadWrapper(stream) as stream: + pdf = PdfFileReader(stream) + output = PdfFileWriter() - if len(pdf.pages) >= 1: - output.addPage(pdf.getPage(0)) + # We only need the first page of the pdf file as that will + # be used as the cover. Saving the first page into a new + # pdf will speed up processing with ImageMagick as it will + # try to create an image for every page in the document. + if len(pdf.pages) >= 1: + output.addPage(pdf.getPage(0)) - with TemporaryDirectory('_pdfmeta') as tdir: - cover_path = os.path.join(tdir, 'cover.pdf') + # ImageMagick will only take a file path and save the + # image to a file path. + with TemporaryDirectory('_pdfmeta') as tdir: + cover_path = os.path.join(tdir, 'cover.pdf') - with open(cover_path, "wb") as outputStream: - output.write(outputStream) + with open(cover_path, "wb") as outputStream: + output.write(outputStream) - with ImageMagick(): - wand = NewMagickWand() - MagickReadImage(wand, cover_path) - MagickSetImageFormat(wand, 'JPEG') - MagickWriteImage(wand, '%s.jpg' % cover_path) - + # Use ImageMagick to turn the pdf into a Jpg image. + with ImageMagick(): + wand = NewMagickWand() + MagickReadImage(wand, cover_path) + MagickSetImageFormat(wand, 'JPEG') + MagickWriteImage(wand, '%s.jpg' % cover_path) + + # We need the image as a stream so we can return the + # image as a string for use in a MetaInformation object. img = Image.open('%s.jpg' % cover_path) img.save(data, 'JPEG') except: import traceback traceback.print_exc() + # Return the string in the cStringIO object. return data.getvalue() - From 3fe2c7a2ed4a82a8bfb99d0864ea50e308add82e Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 18 Apr 2009 13:40:24 -0400 Subject: [PATCH 3/4] Better pdftohtml processing rules based on ldolse from mobileread's work. --- src/calibre/ebooks/conversion/preprocess.py | 26 +++++++++++++++++---- src/calibre/ebooks/metadata/pdf.py | 2 +- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 632a7a3291..b105a6c042 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -23,6 +23,14 @@ def sanitize_head(match): x = _span_pat.sub('', x) return '\n'+x+'\n' +def chap_head(match): + chap = match.group('chap') + title = match.group('title') + if not title: + return '

'+chap+'


' + else: + return '

'+chap+'
'+title+'


' + class CSSPreProcessor(object): @@ -54,8 +62,9 @@ class HTMLPreProcessor(object): (re.compile(r'', re.IGNORECASE), lambda match: '
'), # Remove page numbers (re.compile(r'\d+
', re.IGNORECASE), lambda match: ''), - # Remove
and replace

with

+ # Replace

with

(re.compile(r'\s*', re.IGNORECASE), lambda match: '

'), + # Remove
(re.compile(r'(.*)', re.IGNORECASE), lambda match: match.group() if \ re.match('<', match.group(1).lstrip()) or \ @@ -69,15 +78,22 @@ class HTMLPreProcessor(object): # Remove non breaking spaces (re.compile(ur'\u00a0'), lambda match : ' '), + # Detect Chapters to match default XPATH in GUI + (re.compile(r'(]*>)?(]*>)?s*(?P(Chapter|Epilogue|Prologue|Book|Part)\s*(\d+|\w+)?)(]*>|]*>)\n?((?=()?\s*\w+(\s+\w+)?()?(]*>|]*>))((?P.*)(<br[^>]*>|</?p[^>]*>)))?', re.IGNORECASE), chap_head), + (re.compile(r'(<br[^>]*>)?(</?p[^>]*>)?s*(?P<chap>([A-Z \'"!]{5,})\s*(\d+|\w+)?)(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head), + # Have paragraphs show better (re.compile(r'<br.*?>'), lambda match : '<p>'), # Un wrap lines - (re.compile(r'(?<=\w)\s*</(i|b|u)>\s*<p.*?>\s*<(i|b|u)>\s*(?=\w)'), lambda match: ' '), - (re.compile(r'(?<=\w)\s*<p.*?>\s*(?=\w)', re.UNICODE), lambda match: ' '), + (re.compile(r'(?<=[^\.^\^?^!^"^”])\s*(</(i|b|u)>)*\s*<p.*?>\s*(<(i|b|u)>)*\s*(?=[a-z0-9I])', re.UNICODE), lambda match: ' '), + # Clean up spaces - (re.compile(u'(?<=\.|,|:|;|\?|!|”|"|\')[\s^ ]*(?=<)'), lambda match: ' '), - ] + (re.compile(u'(?<=[\.,:;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '), + # Add space before and after italics + (re.compile(r'(?<!“)<i>'), lambda match: ' <i>'), + (re.compile(r'</i>(?=\w)'), lambda match: '</i> '), + ] # Fix Book Designer markup BOOK_DESIGNER = [ diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index 4d8516f6c3..a5ee619937 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -98,7 +98,7 @@ def get_cover(stream): data = cStringIO.StringIO() try: - StreamReadWrapper(stream) as stream: + with StreamReadWrapper(stream) as stream: pdf = PdfFileReader(stream) output = PdfFileWriter() From ac9f766a8d133048fcf46115b60ba3df0e85cc34 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 18 Apr 2009 19:33:59 -0400 Subject: [PATCH 4/4] GUI: Do not enable send to card x when card x is not present. --- src/calibre/gui2/device.py | 19 +++++++++++++++++-- src/calibre/gui2/main.py | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 8d1b7a1b3a..239fd4d37d 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -346,10 +346,25 @@ class DeviceMenu(QMenu): self.action_triggered(action) break - def enable_device_actions(self, enable): + def enable_device_actions(self, enable, card_prefix=(None, None)): for action in self.actions: if action.dest in ('main:', 'carda:0', 'cardb:0'): - action.setEnabled(enable) + if not enable: + action.setEnabled(False) + else: + if action.dest == 'main:': + action.setEnabled(True) + elif action.dest == 'carda:0': + if card_prefix[0] != None: + action.setEnabled(True) + else: + action.setEnabled(False) + elif action.dest == 'cardb:0': + if card_prefix[1] != None: + action.setEnabled(True) + else: + action.setEnabled(False) + class Emailer(Thread): diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 86d1b013e3..21d873db60 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -608,7 +608,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): self.device_manager.device.__class__.__name__+\ _(' detected.'), 3000) self.device_connected = True - self._sync_menu.enable_device_actions(True) + self._sync_menu.enable_device_actions(True, self.device_manager.device.card_prefix()) else: self.device_connected = False self._sync_menu.enable_device_actions(False)