diff --git a/imgsrc/trim.svg b/imgsrc/trim.svg new file mode 100644 index 0000000000..8c8810fc66 --- /dev/null +++ b/imgsrc/trim.svg @@ -0,0 +1,688 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + Oxygen team + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/resources/images/news/fronda.png b/resources/images/news/fronda.png new file mode 100644 index 0000000000..c332bbda49 Binary files /dev/null and b/resources/images/news/fronda.png differ diff --git a/resources/images/news/gazeta_pomorska.png b/resources/images/news/gazeta_pomorska.png new file mode 100644 index 0000000000..1d7099d7f3 Binary files /dev/null and b/resources/images/news/gazeta_pomorska.png differ diff --git a/resources/images/news/legeartis.png b/resources/images/news/legeartis.png new file mode 100644 index 0000000000..fd9001d9ed Binary files /dev/null and b/resources/images/news/legeartis.png differ diff --git a/resources/images/news/michalkiewicz.png b/resources/images/news/michalkiewicz.png index a87f30f5a3..cfa61c7fd6 100644 Binary files a/resources/images/news/michalkiewicz.png and b/resources/images/news/michalkiewicz.png differ diff --git a/resources/images/news/rmf24_ESKN.png b/resources/images/news/rmf24_ESKN.png new file mode 100644 index 0000000000..53ad00078a Binary files /dev/null and b/resources/images/news/rmf24_ESKN.png differ diff --git a/resources/images/news/rmf24_fakty.png b/resources/images/news/rmf24_fakty.png new file mode 100644 index 0000000000..53ad00078a Binary files /dev/null and b/resources/images/news/rmf24_fakty.png differ diff --git a/resources/images/trim.png b/resources/images/trim.png new file mode 100644 index 0000000000..3cb93adfa6 Binary files /dev/null and b/resources/images/trim.png differ diff --git a/resources/recipes/danas.recipe b/resources/recipes/danas.recipe index 3543acd684..1e0e319334 100644 --- a/resources/recipes/danas.recipe +++ b/resources/recipes/danas.recipe @@ -49,7 +49,11 @@ class Danas(BasicNewsRecipe): , 'language' : language } - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + preprocess_regexps = [ + (re.compile(u'\u0110'), lambda match: u'\u00D0') + ,(re.compile(u'\u201c'), lambda match: '"') + ,(re.compile(u'\u201e'), lambda match: '"') + ] keep_only_tags = [dict(name='div', attrs={'id':'left'})] remove_tags = [ diff --git a/resources/recipes/gazeta_pomorska.recipe b/resources/recipes/gazeta_pomorska.recipe new file mode 100644 index 0000000000..083f5cbeed --- /dev/null +++ b/resources/recipes/gazeta_pomorska.recipe @@ -0,0 +1,104 @@ +#!/usr/bin/env python + +# # Przed uzyciem przeczytaj komentarz w sekcji "feeds" + +__license__ = 'GPL v3' +__copyright__ = u'2010, Richard z forum.eksiazki.org' +'''pomorska.pl''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class GazetaPomorska(BasicNewsRecipe): + title = u'Gazeta Pomorska' + publisher = u'Gazeta Pomorska' + description = u'Kujawy i Pomorze - wiadomo\u015bci' + language = 'pl' + __author__ = u'Richard z forum.eksiazki.org' + # # (dziekuje t3d z forum.eksiazki.org za testy) + oldest_article = 2 + max_articles_per_feed = 20 + no_stylesheets = True + remove_javascript = True + preprocess_regexps = [ + (re.compile(r'' +''' +rmf24.pl +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class RMF24_ESKN(BasicNewsRecipe): + title = u'Rmf24.pl - Ekonomia Sport Kultura Nauka' + description = u'Ekonomia, sport, kultura i nauka ze strony rmf24.pl' + language = 'pl' + oldest_article = 7 + max_articles_per_feed = 100 + __author__ = u'Tomasz D\u0142ugosz' + no_stylesheets = True + remove_javascript = True + + feeds = [(u'Ekonomia', u'http://www.rmf24.pl/ekonomia/feed'), + (u'Sport', u'http://www.rmf24.pl/sport/feed'), + (u'Kultura', u'http://www.rmf24.pl/kultura/feed'), + (u'Nauka', u'http://www.rmf24.pl/nauka/feed')] + + keep_only_tags = [dict(name='div', attrs={'class':'box articleSingle print'})] + + remove_tags = [ + dict(name='div', attrs={'class':'toTop'}), + dict(name='div', attrs={'class':'category'}), + dict(name='div', attrs={'class':'REMOVE'}), + dict(name='div', attrs={'class':'embed embedAd'})] + + extra_css = ''' + h1 { font-size: 1.2em; } + ''' + + preprocess_regexps = [ + (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in + [ + (r'

Zdj.cie

', lambda match: ''), + (r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'), + (r'
', lambda match: 'REMOVE">'), + (r' or

tags that contain the words ' diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index e87a8021f9..3b1239814a 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -106,6 +106,52 @@ def line_length(format, raw, percent): return lengths[index] +class Dehyphenator(object): + ''' + Analyzes words to determine whether hyphens should be retained/removed. Uses the document + itself is as a dictionary. This method handles all languages along with uncommon, made-up, and + scientific words. The primary disadvantage is that words appearing only once in the document + retain hyphens. + ''' + + def __init__(self): + # Add common suffixes to the regex below to increase the likelihood of a match - + # don't add suffixes which are also complete words, such as 'able' or 'sex' + self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE) + # remove prefixes if the prefix was not already the point of hyphenation + self.prefixes = re.compile(r'^(un|in|ex)$', re.IGNORECASE) + self.removeprefix = re.compile(r'^(un|in|ex)', re.IGNORECASE) + + def dehyphenate(self, match): + firsthalf = match.group('firstpart') + secondhalf = match.group('secondpart') + hyphenated = str(firsthalf) + "-" + str(secondhalf) + dehyphenated = str(firsthalf) + str(secondhalf) + lookupword = self.removesuffixes.sub('', dehyphenated) + if self.prefixes.match(firsthalf) is None: + lookupword = self.removeprefix.sub('', lookupword) + booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE) + #print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated) + match = booklookup.search(self.html) + if match: + #print "returned dehyphenated word: " + str(dehyphenated) + return dehyphenated + else: + #print "returned hyphenated word: " + str(hyphenated) + return hyphenated + + def __call__(self, html, format, length=1): + self.html = html + if format == 'html': + intextmatch = re.compile(u'(?<=.{%i})(?P[^“"\s>]+)-\s*(?=<)(\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?P[\w\d]+)' % length) + elif format == 'pdf': + intextmatch = re.compile(u'(?<=.{%i})(?P[^“"\s>]+)-\s*(

|\s*

\s*<[iub]>)\s*(?P[\w\d]+)'% length) + elif format == 'individual_words': + intextmatch = re.compile('>[^<]*\b(?P[^"\s>]+)-(?P\s*(?=[[a-z\d])'), lambda match: '')) + # unwrap em/en dashes + end_rules.append((re.compile(u'(?<=[–—])\s*

\s*(?=[[a-z\d])'), lambda match: '')) # unwrap/delete soft hyphens end_rules.append((re.compile(u'[­](\s*

)+\s*(?=[[a-z\d])'), lambda match: '')) # unwrap/delete soft hyphens with formatting @@ -350,7 +395,7 @@ class HTMLPreProcessor(object): # print "The pdf line length returned is " + str(length) end_rules.append( # Un wrap using punctuation - (re.compile(r'(?<=.{%i}([a-z,:)\-IA]|(?)?\s*(\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), + (re.compile(r'(?<=.{%i}([a-z,:)\IA]|(?)?\s*(\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), ) for rule in self.PREPROCESS + start_rules: @@ -380,6 +425,11 @@ class HTMLPreProcessor(object): for rule in rules + end_rules: html = rule[0].sub(rule[1], html) + if is_pdftohtml: + # Dehyphenate + dehyphenator = Dehyphenator() + html = dehyphenator(html,'pdf', length) + #dump(html, 'post-preprocess') # Handle broken XHTML w/ SVG (ugh) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 37fd169cb1..6a5eaa4a34 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -6,7 +6,7 @@ __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' import re -from calibre.ebooks.conversion.preprocess import line_length +from calibre.ebooks.conversion.preprocess import line_length, Dehyphenator from calibre.utils.logging import default_log class PreProcessor(object): @@ -114,7 +114,7 @@ class PreProcessor(object): html = re.sub(ur'\s*\s*', ' ', html) # Get rid of empty span, bold, & italics tags html = re.sub(r"\s*]*>\s*(]>\s*){0,2}\s*\s*", " ", html) - html = re.sub(r"\s*<[ibu]>\s*(<[ibu]>\s*\s*){0,2}\s*", " ", html) + html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*\s*){0,2}\s*", " ", html) html = re.sub(r"\s*]*>\s*(]>\s*){0,2}\s*\s*", " ", html) # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing @@ -132,7 +132,6 @@ class PreProcessor(object): # Arrange line feeds and

tags so the line_length and no_markup functions work correctly html = re.sub(r"\s*

", "

\n", html) html = re.sub(r"\s*

\s*", "\n

", html) - #self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n") # detect chapters/sections to match xpath or splitting logic heading = re.compile(']*>', re.IGNORECASE) self.html_preprocess_sections = len(heading.findall(html)) @@ -140,16 +139,16 @@ class PreProcessor(object): # # Start with most typical chapter headings, get more aggressive until one works if self.html_preprocess_sections < 10: - chapdetect = re.compile(r'(?=]*>)\s*(<[ibu]>){0,2}\s*(]*>)?\s*(<[ibu]>){0,2}\s*(]*>)?\s*(?P(<[ibu]>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(){0,2})\s*()?s*(){0,2}\s*()?\s*()\s*\s*(\s*]*>\s*

){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(]*>)?\s*(?P(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE) + chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</[ibu]>){0,2}\s*(</span>)?\s*(</(p|/?br)>)\s*\s*(\s*<p[^>]*>\s*</p>){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE) html = chapdetect.sub(self.chapter_head, html) if self.html_preprocess_sections < 10: self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters") - chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) html = chapdetect2.sub(self.chapter_head, html) if self.html_preprocess_sections < 10: self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words") - chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) html = chapdetect2.sub(self.chapter_head, html) ###### Unwrap lines ###### @@ -174,10 +173,16 @@ class PreProcessor(object): length = line_length(format, html, getattr(self.extra_opts, 'html_unwrap_factor', 0.4)) self.log("*** Median line length is " + str(length) + ", calculated with " + format + " format ***") + max_length = length * 1.4 + min_max = str("(?<=.{"+str(length)+"})(?<!.{"+str(max_length)+"})") # - # Unwrap and/or delete soft-hyphens, hyphens + # Unwrap em/en dashes, delete soft-hyphens + #self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n") html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html) - html = re.sub(u'(?<=[-\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html) + html = re.sub(u'%s(?<=[\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])' % min_max, '', html) + # Dehyphenate + dehyphenator = Dehyphenator() + html = dehyphenator(html,'html', length) # Unwrap lines using punctation and line length unwrap = re.compile(r"(?<=.{%i}([a-z,;):\IA]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) @@ -186,7 +191,7 @@ class PreProcessor(object): # If still no sections after unwrapping mark split points on lines with no punctuation if self.html_preprocess_sections < 10: self.log("Looking for more split points based on punctuation, currently have " + str(self.html_preprocess_sections)) - chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) + chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) html = chapdetect3.sub(self.chapter_break, html) # search for places where a first or second level heading is immediately followed by another # top level heading. demote the second heading to h3 to prevent splitting between chapter diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 5d5de7b153..23f92d1fd2 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -1574,14 +1574,15 @@ class MobiWriter(object): id = unicode(oeb.metadata.cover[0]) item = oeb.manifest.ids[id] href = item.href - index = self._images[href] - 1 - exth.write(pack('>III', 0xc9, 0x0c, index)) - exth.write(pack('>III', 0xcb, 0x0c, 0)) - nrecs += 2 - index = self._add_thumbnail(item) - if index is not None: - exth.write(pack('>III', 0xca, 0x0c, index - 1)) - nrecs += 1 + if href in self._images: + index = self._images[href] - 1 + exth.write(pack('>III', 0xc9, 0x0c, index)) + exth.write(pack('>III', 0xcb, 0x0c, 0)) + nrecs += 2 + index = self._add_thumbnail(item) + if index is not None: + exth.write(pack('>III', 0xca, 0x0c, index - 1)) + nrecs += 1 exth = exth.getvalue() trail = len(exth) % 4 diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index e58dce5559..c0c7b0a9ed 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -1,7 +1,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' """ The GUI """ -import os, sys +import os, sys, Queue from threading import RLock from PyQt4.Qt import QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, \ @@ -39,7 +39,7 @@ gprefs.defaults['action-layout-context-menu'] = ( 'Edit Metadata', 'Send To Device', 'Save To Disk', 'Connect Share', 'Copy To Library', None, 'Convert Books', 'View', 'Open Folder', 'Show Book Details', - 'Similar Books', None, 'Remove Books', + 'Similar Books', 'Tweak ePub', None, 'Remove Books', ) gprefs.defaults['action-layout-context-menu-device'] = ( @@ -296,6 +296,34 @@ class Dispatcher(QObject): def dispatch(self, args, kwargs): self.func(*args, **kwargs) +class FunctionDispatcher(QObject): + ''' + Convenience class to use Qt signals with arbitrary python functions. + By default, ensures that a function call always happens in the + thread this Dispatcher was created in. + ''' + dispatch_signal = pyqtSignal(object, object, object) + + def __init__(self, func, queued=True, parent=None): + QObject.__init__(self, parent) + self.func = func + typ = Qt.QueuedConnection + if not queued: + typ = Qt.AutoConnection if queued is None else Qt.DirectConnection + self.dispatch_signal.connect(self.dispatch, type=typ) + + def __call__(self, *args, **kwargs): + q = Queue.Queue() + self.dispatch_signal.emit(q, args, kwargs) + return q.get() + + def dispatch(self, q, args, kwargs): + try: + res = self.func(*args, **kwargs) + except: + res = None + q.put(res) + class GetMetadata(QObject): ''' Convenience class to ensure that metadata readers are used only in the @@ -575,18 +603,6 @@ class Application(QApplication): self._file_open_paths = [] self._file_open_lock = RLock() - if islinux: - self.setStyleSheet(''' - QToolTip { - border: 2px solid black; - padding: 5px; - border-radius: 10px; - opacity: 200; - background-color: #e1e1ff; - color: black; - } - ''') - def _send_file_open_events(self): with self._file_open_lock: if self._file_open_paths: diff --git a/src/calibre/gui2/actions/tweak_epub.py b/src/calibre/gui2/actions/tweak_epub.py new file mode 100755 index 0000000000..212aff8019 --- /dev/null +++ b/src/calibre/gui2/actions/tweak_epub.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' +__docformat__ = 'restructuredtext en' + +from calibre.gui2 import error_dialog +from calibre.gui2.actions import InterfaceAction +from calibre.gui2.dialogs.tweak_epub import TweakEpub + +class TweakEpubAction(InterfaceAction): + + name = 'Tweak ePub' + action_spec = (_('Tweak ePub'), 'trim.png', + _('Make small changes to ePub format books'), + _('T')) + dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) + action_type = 'current' + + def genesis(self): + self.qaction.triggered.connect(self.edit_epub_in_situ) + + def edit_epub_in_situ(self, *args): + row = self.gui.library_view.currentIndex() + if not row.isValid(): + return error_dialog(self.gui, _('Cannot tweak ePub'), + _('No book selected'), show=True) + + # Confirm 'EPUB' in formats + book_id = self.gui.library_view.model().id(row) + try: + path_to_epub = self.gui.library_view.model().db.format_abspath( + book_id, 'EPUB', index_is_id=True) + except: + path_to_epub = None + + if not path_to_epub: + return error_dialog(self.gui, _('Cannot tweak ePub'), + _('No ePub available. First convert the book to ePub.'), + show=True) + + # Launch modal dialog waiting for user to tweak or cancel + dlg = TweakEpub(self.gui, path_to_epub) + if dlg.exec_() == dlg.Accepted: + self.update_db(book_id, dlg._output) + dlg.cleanup() + + def update_db(self, book_id, rebuilt): + ''' + Update the calibre db with the tweaked epub + ''' + self.gui.library_view.model().db.add_format(book_id, 'EPUB', + open(rebuilt, 'rb'), index_is_id=True) + diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index 26dbda6ca4..53788809b6 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -300,6 +300,24 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): self.cpixmap = pix self.cover_data = cdata + def trim_cover(self, *args): + from calibre.utils.magick import Image + cdata = self.cover_data + if not cdata: + return + im = Image() + im.load(cdata) + im.trim(10) + cdata = im.export('jpg') + pix = QPixmap() + pix.loadFromData(cdata) + self.cover.setPixmap(pix) + self.cover_changed = True + self.cpixmap = pix + self.cover_data = cdata + + + def sync_formats(self): old_extensions, new_extensions, paths = set(), set(), {} for row in range(self.formats.count()): @@ -380,6 +398,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): self.remove_unused_series) QObject.connect(self.auto_author_sort, SIGNAL('clicked()'), self.deduce_author_sort) + self.trim_cover_button.clicked.connect(self.trim_cover) self.connect(self.author_sort, SIGNAL('textChanged(const QString&)'), self.author_sort_box_changed) self.connect(self.authors, SIGNAL('editTextChanged(const QString&)'), diff --git a/src/calibre/gui2/dialogs/metadata_single.ui b/src/calibre/gui2/dialogs/metadata_single.ui index 74febf9c29..dbf825e706 100644 --- a/src/calibre/gui2/dialogs/metadata_single.ui +++ b/src/calibre/gui2/dialogs/metadata_single.ui @@ -625,6 +625,17 @@ Using this button to create author sort will change author sort from red to gree </property> </widget> </item> + <item> + <widget class="QToolButton" name="trim_cover_button"> + <property name="toolTip"> + <string>Remove border (if any) from cover</string> + </property> + <property name="icon"> + <iconset resource="../../../../resources/images.qrc"> + <normaloff>:/images/trim.png</normaloff>:/images/trim.png</iconset> + </property> + </widget> + </item> <item> <widget class="QToolButton" name="reset_cover"> <property name="toolTip"> diff --git a/src/calibre/gui2/dialogs/tweak_epub.py b/src/calibre/gui2/dialogs/tweak_epub.py new file mode 100755 index 0000000000..db6e93fd7a --- /dev/null +++ b/src/calibre/gui2/dialogs/tweak_epub.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' +__docformat__ = 'restructuredtext en' + +import os, shutil +from contextlib import closing +from zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED + +from PyQt4.Qt import QDialog + +from calibre.gui2 import open_local_file +from calibre.gui2.dialogs.tweak_epub_ui import Ui_Dialog +from calibre.libunzip import extract as zipextract +from calibre.ptempfile import PersistentTemporaryDirectory + +class TweakEpub(QDialog, Ui_Dialog): + ''' + Display controls for tweaking ePubs + + ''' + + def __init__(self, parent, epub): + QDialog.__init__(self, parent) + + self._epub = epub + self._exploded = None + self._output = None + + # Run the dialog setup generated from tweak_epub.ui + self.setupUi(self) + + self.cancel_button.clicked.connect(self.reject) + self.explode_button.clicked.connect(self.explode) + self.rebuild_button.clicked.connect(self.rebuild) + + # Position update dialog overlaying top left of app window + parent_loc = parent.pos() + self.move(parent_loc.x(),parent_loc.y()) + + def cleanup(self): + # Delete directory containing exploded ePub + if self._exploded is not None: + shutil.rmtree(self._exploded, ignore_errors=True) + + + def display_exploded(self): + ''' + Generic subprocess launch of native file browser + User can use right-click to 'Open with ...' + ''' + open_local_file(self._exploded) + + def explode(self, *args): + if self._exploded is None: + self._exploded = PersistentTemporaryDirectory("_exploded", prefix='') + zipextract(self._epub, self._exploded) + self.display_exploded() + self.rebuild_button.setEnabled(True) + self.explode_button.setEnabled(False) + + def rebuild(self, *args): + self._output = os.path.join(self._exploded, 'rebuilt.epub') + with closing(ZipFile(self._output, 'w', compression=ZIP_DEFLATED)) as zf: + # Write mimetype + zf.write(os.path.join(self._exploded,'mimetype'), 'mimetype', compress_type=ZIP_STORED) + # Write everything else + exclude_files = ['.DS_Store','mimetype','iTunesMetadata.plist','rebuilt.epub'] + for root, dirs, files in os.walk(self._exploded): + for fn in files: + if fn in exclude_files: + continue + absfn = os.path.join(root, fn) + zfn = os.path.relpath(absfn, + self._exploded).replace(os.sep, '/') + zf.write(absfn, zfn) + return QDialog.accept(self) + diff --git a/src/calibre/gui2/dialogs/tweak_epub.ui b/src/calibre/gui2/dialogs/tweak_epub.ui new file mode 100644 index 0000000000..ccd33f44ab --- /dev/null +++ b/src/calibre/gui2/dialogs/tweak_epub.ui @@ -0,0 +1,87 @@ +<?xml version="1.0" encoding="UTF-8"?> +<ui version="4.0"> + <class>Dialog</class> + <widget class="QDialog" name="Dialog"> + <property name="windowModality"> + <enum>Qt::NonModal</enum> + </property> + <property name="geometry"> + <rect> + <x>0</x> + <y>0</y> + <width>382</width> + <height>242</height> + </rect> + </property> + <property name="windowTitle"> + <string>Tweak ePub</string> + </property> + <property name="sizeGripEnabled"> + <bool>false</bool> + </property> + <property name="modal"> + <bool>false</bool> + </property> + <layout class="QGridLayout" name="gridLayout"> + <item row="1" column="0"> + <widget class="QPushButton" name="explode_button"> + <property name="statusTip"> + <string>Display contents of exploded ePub</string> + </property> + <property name="text"> + <string>&Explode ePub</string> + </property> + <property name="icon"> + <iconset> + <normaloff>:/images/wizard.png</normaloff>:/images/wizard.png</iconset> + </property> + </widget> + </item> + <item row="2" column="0"> + <widget class="QPushButton" name="rebuild_button"> + <property name="enabled"> + <bool>false</bool> + </property> + <property name="statusTip"> + <string>Rebuild ePub from exploded contents</string> + </property> + <property name="text"> + <string>&Rebuild ePub</string> + </property> + <property name="icon"> + <iconset> + <normaloff>:/images/exec.png</normaloff>:/images/exec.png</iconset> + </property> + </widget> + </item> + <item row="3" column="0"> + <widget class="QPushButton" name="cancel_button"> + <property name="statusTip"> + <string>Discard changes</string> + </property> + <property name="text"> + <string>&Cancel</string> + </property> + <property name="icon"> + <iconset> + <normaloff>:/images/window-close.png</normaloff>:/images/window-close.png</iconset> + </property> + </widget> + </item> + <item row="0" column="0"> + <widget class="QLabel" name="label"> + <property name="text"> + <string>Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window. Rebuild the ePub, updating your calibre library.</string> + </property> + <property name="wordWrap"> + <bool>true</bool> + </property> + </widget> + </item> + </layout> + </widget> + <resources> + <include location="../../../../resources/images.qrc"/> + </resources> + <connections/> +</ui> diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index 3370fd4b75..53f701386b 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -12,7 +12,7 @@ from operator import attrgetter from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \ QModelIndex, QVariant, QDate -from calibre.gui2 import NONE, config, UNDEFINED_QDATE +from calibre.gui2 import NONE, config, UNDEFINED_QDATE, FunctionDispatcher from calibre.utils.pyparsing import ParseException from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_authors from calibre.ptempfile import PersistentTemporaryFile @@ -151,7 +151,7 @@ class BooksModel(QAbstractTableModel): # {{{ self.database_changed.emit(db) if self.cover_cache is not None: self.cover_cache.stop() - self.cover_cache = CoverCache(db) + self.cover_cache = CoverCache(db, FunctionDispatcher(self.db.cover)) self.cover_cache.start() def refresh_cover(event, ids): if event == 'cover' and self.cover_cache is not None: diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py index 7a516bb4ff..2f0452a773 100644 --- a/src/calibre/gui2/tools.py +++ b/src/calibre/gui2/tools.py @@ -217,6 +217,10 @@ def fetch_scheduled_recipe(arg): if 'output_profile' in ps: recs.append(('output_profile', ps['output_profile'], OptionRecommendation.HIGH)) + if ps['output_profile'] == 'kindle': + recs.append(('no_inline_toc', True, + OptionRecommendation.HIGH)) + lf = load_defaults('look_and_feel') if lf.get('base_font_size', 0.0) != 0.0: recs.append(('base_font_size', lf['base_font_size'], diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 211baeb634..58edd89cb2 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import re, itertools +import re, itertools, time from itertools import repeat from datetime import timedelta from threading import Thread, RLock @@ -23,10 +23,11 @@ from calibre import fit_image class CoverCache(Thread): - def __init__(self, db): + def __init__(self, db, cover_func): Thread.__init__(self) self.daemon = True self.db = db + self.cover_func = cover_func self.load_queue = Queue() self.keep_running = True self.cache = {} @@ -37,7 +38,8 @@ class CoverCache(Thread): self.keep_running = False def _image_for_id(self, id_): - img = self.db.cover(id_, index_is_id=True, as_image=True) + time.sleep(0.050) # Limit 20/second to not overwhelm the GUI + img = self.cover_func(id_, index_is_id=True, as_image=True) if img is None: img = QImage() if not img.isNull(): diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index eb6e8336f9..627ab6358b 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -402,7 +402,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): path = path.lower() return path - def set_path(self, index, index_is_id=False, commit=True): + def set_path(self, index, index_is_id=False): ''' Set the path to the directory containing this books files based on its current title and author. If there was a previous directory, its contents @@ -432,7 +432,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if current_path and os.path.exists(spath): # Migrate existing files cdata = self.cover(id, index_is_id=True) if cdata is not None: - open(os.path.join(tpath, 'cover.jpg'), 'wb').write(cdata) + with open(os.path.join(tpath, 'cover.jpg'), 'wb') as f: + f.write(cdata) for format in formats: # Get data as string (can't use file as source and target files may be the same) f = self.format(id, format, index_is_id=True, as_file=False) @@ -442,8 +443,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.add_format(id, format, stream, index_is_id=True, path=tpath, notify=False) self.conn.execute('UPDATE books SET path=? WHERE id=?', (path, id)) - if commit: - self.conn.commit() self.data.set(id, self.FIELD_MAP['path'], path, row_is_id=True) # Delete not needed directories if current_path and os.path.exists(spath): @@ -452,6 +451,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): parent = os.path.dirname(spath) if len(os.listdir(parent)) == 0: self.rmtree(parent, permanent=True) + curpath = self.library_path c1, c2 = current_path.split('/'), path.split('/') if not self.is_case_sensitive and len(c1) == len(c2): @@ -466,13 +466,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): # the directories, so no need to do them here. for oldseg, newseg in zip(c1, c2): if oldseg.lower() == newseg.lower() and oldseg != newseg: - while True: - # need a temp name in the current segment for renames - tempname = os.path.join(curpath, 'TEMP.%f'%time.time()) - if not os.path.exists(tempname): - break - os.rename(os.path.join(curpath, oldseg), tempname) - os.rename(tempname, os.path.join(curpath, newseg)) + try: + os.rename(os.path.join(curpath, oldseg), os.path.join(curpath, newseg)) + except: + break # Fail silently since nothing catastrophic has happened curpath = os.path.join(curpath, newseg) def add_listener(self, listener): @@ -1131,7 +1128,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def set_authors(self, id, authors, notify=True, commit=True): ''' - `authors`: A list of authors. + Note that even if commit is False, the db will still be committed to + because this causes the location of files to change + + :param authors: A list of authors. ''' if not authors: authors = [_('Unknown')] @@ -1163,11 +1163,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): ','.join([a.replace(',', '|') for a in authors]), row_is_id=True) self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True) - self.set_path(id, index_is_id=True, commit=commit) + self.set_path(id, index_is_id=True) if notify: self.notify('metadata', [id]) def set_title(self, id, title, notify=True, commit=True): + ''' + Note that even if commit is False, the db will still be committed to + because this causes the location of files to change + ''' if not title: return if not isinstance(title, unicode): @@ -1178,7 +1182,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.data.set(id, self.FIELD_MAP['sort'], title_sort(title), row_is_id=True) else: self.data.set(id, self.FIELD_MAP['sort'], title, row_is_id=True) - self.set_path(id, index_is_id=True, commit=commit) + self.set_path(id, index_is_id=True) if commit: self.conn.commit() if notify: diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 95794a8c1d..aeba8a3218 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -184,7 +184,7 @@ class ContentServer(object): if path and os.path.exists(path): updated = fromtimestamp(os.stat(path).st_mtime) cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) - return fmt.read() + return fmt # }}} diff --git a/src/calibre/utils/magick/draw.py b/src/calibre/utils/magick/draw.py index ed9e3d3d83..dcf9d7b671 100644 --- a/src/calibre/utils/magick/draw.py +++ b/src/calibre/utils/magick/draw.py @@ -60,15 +60,15 @@ def identify(path): data = open(path, 'rb').read() return identify_data(data) -def add_borders_to_image(path_to_image, left=0, top=0, right=0, bottom=0, - border_color='#ffffff'): +def add_borders_to_image(img_data, left=0, top=0, right=0, bottom=0, + border_color='#ffffff', fmt='jpg'): img = Image() - img.open(path_to_image) + img.load(img_data) lwidth, lheight = img.size canvas = create_canvas(lwidth+left+right, lheight+top+bottom, border_color) canvas.compose(img, left, top) - canvas.save(path_to_image) + return canvas.export(fmt) def create_text_wand(font_size, font_path=None): if font_path is None: diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index a140dfbf05..d1e7866198 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -7,7 +7,7 @@ Defines various abstract base classes that can be subclassed to create powerful __docformat__ = "restructuredtext en" -import os, time, traceback, re, urlparse, sys +import os, time, traceback, re, urlparse, sys, cStringIO from collections import defaultdict from functools import partial from contextlib import nested, closing @@ -27,6 +27,7 @@ from calibre.web.fetch.simple import RecursiveFetcher from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.date import now as nowf +from calibre.utils.magick.draw import save_cover_data_to, add_borders_to_image class LoginFailed(ValueError): pass @@ -948,38 +949,36 @@ class BasicNewsRecipe(Recipe): try: cu = self.get_cover_url() except Exception, err: - cu = None self.log.error(_('Could not download cover: %s')%str(err)) self.log.debug(traceback.format_exc()) - if cu is not None: - ext = cu.split('/')[-1].rpartition('.')[-1] - if '?' in ext: - ext = '' - ext = ext.lower() if ext and '/' not in ext else 'jpg' - cpath = os.path.join(self.output_dir, 'cover.'+ext) + else: + cdata = None if os.access(cu, os.R_OK): - with open(cpath, 'wb') as cfile: - cfile.write(open(cu, 'rb').read()) + cdata = open(cu, 'rb').read() else: self.report_progress(1, _('Downloading cover from %s')%cu) - with nested(open(cpath, 'wb'), closing(self.browser.open(cu))) as (cfile, r): - cfile.write(r.read()) - if self.cover_margins[0] or self.cover_margins[1]: - from calibre.utils.magick.draw import add_borders_to_image - add_borders_to_image(cpath, - left=self.cover_margins[0],right=self.cover_margins[0], - top=self.cover_margins[1],bottom=self.cover_margins[1], - border_color=self.cover_margins[2]) - if ext.lower() == 'pdf': + with closing(self.browser.open(cu)) as r: + cdata = r.read() + if not cdata: + return + ext = cu.split('/')[-1].rpartition('.')[-1].lower().strip() + if ext == 'pdf': from calibre.ebooks.metadata.pdf import get_metadata - stream = open(cpath, 'rb') + stream = cStringIO.StringIO(cdata) + cdata = None mi = get_metadata(stream) - cpath = None if mi.cover_data and mi.cover_data[1]: - cpath = os.path.join(self.output_dir, - 'cover.'+mi.cover_data[0]) - with open(cpath, 'wb') as f: - f.write(mi.cover_data[1]) + cdata = mi.cover_data[1] + if not cdata: + return + if self.cover_margins[0] or self.cover_margins[1]: + cdata = add_borders_to_image(cdata, + left=self.cover_margins[0],right=self.cover_margins[0], + top=self.cover_margins[1],bottom=self.cover_margins[1], + border_color=self.cover_margins[2]) + + cpath = os.path.join(self.output_dir, 'cover.jpg') + save_cover_data_to(cdata, cpath) self.cover_path = cpath def download_cover(self): @@ -1422,7 +1421,6 @@ class CalibrePeriodical(BasicNewsRecipe): return br def download(self): - import cStringIO self.log('Fetching downloaded recipe') try: raw = self.browser.open_novisit(