From dec27fbaa1e9544675d6d10bda566d83fd7a85f2 Mon Sep 17 00:00:00 2001 From: ldolse Date: Sun, 19 Sep 2010 13:02:02 +0800 Subject: [PATCH 01/13] new dehyphenation algorithm, using the document as a dictionary --- src/calibre/ebooks/conversion/preprocess.py | 58 +++++++++++++++++++-- src/calibre/ebooks/conversion/utils.py | 13 +++-- 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 03a0047927..a1e28b2554 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -106,6 +106,50 @@ def line_length(format, raw, percent): return lengths[index] +class Dehyphenator(object): + ''' + Analyzes words to determine whether hyphens should be retained/removed. Uses the document + itself is as a dictionary. This method handles all languages along with uncommon, made-up, and + scientific words. The primary disadvantage is that words appearing only once in the document + retain hyphens. + ''' + + def dehyphenate(self, match): + firsthalf = match.group('firstpart') + secondhalf = match.group('secondpart') + hyphenated = str(firsthalf) + "-" + str(secondhalf) + dehyphenated = str(firsthalf) + str(secondhalf) + # Add common suffixes to the regex below to increase the likelihood of a match - + # don't add suffixes which are also complete words, such as 'able' or 'sex' + removesuffixes = re.compile(r"((ed)?ly|(')?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE) + lookupword = removesuffixes.sub('', dehyphenated) + # remove prefixes if the prefix was not already the point of hyphenation + prefixes = re.compile(r'^(un|in|ex)$', re.IGNORECASE) + removeprefix = re.compile(r'^(un|in|ex)', re.IGNORECASE) + if prefixes.match(firsthalf) is None: + lookupword = removeprefix.sub('', lookupword) + booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE) + #print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated) + match = booklookup.search(self.html) + if match: + #print "returned dehyphenated word: " + str(dehyphenated) + return dehyphenated + else: + #print "returned hyphenated word: " + str(hyphenated) + return hyphenated + + def __call__(self, html, format, length=1): + self.html = html + if format == 'html': + intextmatch = re.compile(u'(?<=.{%i})(?P[^“"\s>]+)-\s*(?=<)(\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?P[\w\d]+)' % length) + elif format == 'pdf': + intextmatch = re.compile(u'(?<=.{%i})(?P[^“"\s>]+)-\s*(

|\s*

\s*<[iub]>)\s*(?P[\w\d]+)'% length) + elif format == 'individual_words': + intextmatch = re.compile('>[^<]*\b(?P[^"\s>]+)-(?P\s*(?=[[a-z\d])'), lambda match: '')) + # unwrap em/en dashes + end_rules.append((re.compile(u'(?<=[–—])\s*

\s*(?=[[a-z\d])'), lambda match: '')) # unwrap/delete soft hyphens end_rules.append((re.compile(u'[­](\s*

)+\s*(?=[[a-z\d])'), lambda match: '')) # unwrap/delete soft hyphens with formatting @@ -350,7 +393,7 @@ class HTMLPreProcessor(object): # print "The pdf line length returned is " + str(length) end_rules.append( # Un wrap using punctuation - (re.compile(r'(?<=.{%i}([a-z,:)\-IA]|(?)?\s*(\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), + (re.compile(r'(?<=.{%i}([a-z,:)\IA]|(?)?\s*(\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), ) for rule in self.PREPROCESS + start_rules: @@ -380,6 +423,11 @@ class HTMLPreProcessor(object): for rule in rules + end_rules: html = rule[0].sub(rule[1], html) + if is_pdftohtml: + # Dehyphenate + dehyphenator = Dehyphenator() + html = dehyphenator(html,'pdf', length) + #dump(html, 'post-preprocess') # Handle broken XHTML w/ SVG (ugh) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 37fd169cb1..f9178ead0b 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -6,7 +6,7 @@ __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' import re -from calibre.ebooks.conversion.preprocess import line_length +from calibre.ebooks.conversion.preprocess import line_length, Dehyphenator from calibre.utils.logging import default_log class PreProcessor(object): @@ -132,7 +132,6 @@ class PreProcessor(object): # Arrange line feeds and

tags so the line_length and no_markup functions work correctly html = re.sub(r"\s*

", "

\n", html) html = re.sub(r"\s*

\s*", "\n

", html) - #self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n") # detect chapters/sections to match xpath or splitting logic heading = re.compile(']*>', re.IGNORECASE) self.html_preprocess_sections = len(heading.findall(html)) @@ -174,10 +173,16 @@ class PreProcessor(object): length = line_length(format, html, getattr(self.extra_opts, 'html_unwrap_factor', 0.4)) self.log("*** Median line length is " + str(length) + ", calculated with " + format + " format ***") + max_length = length * 1.4 + min_max = str("(?<=.{"+str(length)+"})(?\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*', '', html) - html = re.sub(u'(?<=[-\u2013\u2014])\s*(?=<)(\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html) + html = re.sub(u'%s(?<=[\u2013\u2014])\s*(?=<)(\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])' % min_max, '', html) + # Dehyphenate + dehyphenator = Dehyphenator() + html = dehyphenator(html,'html', length) # Unwrap lines using punctation and line length unwrap = re.compile(r"(?<=.{%i}([a-z,;):\IA]|(?\s*()?\s*(?P<(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*\s*)\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) From 053d60331fcfb9f82e141ebc11a625b1acd3e1a4 Mon Sep 17 00:00:00 2001 From: ldolse Date: Sun, 19 Sep 2010 23:07:07 +0800 Subject: [PATCH 02/13] regex optimizations --- src/calibre/ebooks/conversion/preprocess.py | 2 +- src/calibre/ebooks/conversion/utils.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 16bfb42d1f..7f13cefcaa 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -121,7 +121,7 @@ class Dehyphenator(object): dehyphenated = str(firsthalf) + str(secondhalf) # Add common suffixes to the regex below to increase the likelihood of a match - # don't add suffixes which are also complete words, such as 'able' or 'sex' - removesuffixes = re.compile(r"((ed)?ly|(')?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE) + removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE) lookupword = removesuffixes.sub('', dehyphenated) # remove prefixes if the prefix was not already the point of hyphenation prefixes = re.compile(r'^(un|in|ex)$', re.IGNORECASE) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index f9178ead0b..6a5eaa4a34 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -114,7 +114,7 @@ class PreProcessor(object): html = re.sub(ur'\s*\s*', ' ', html) # Get rid of empty span, bold, & italics tags html = re.sub(r"\s*]*>\s*(]>\s*){0,2}\s*\s*", " ", html) - html = re.sub(r"\s*<[ibu]>\s*(<[ibu]>\s*\s*){0,2}\s*", " ", html) + html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*\s*){0,2}\s*", " ", html) html = re.sub(r"\s*]*>\s*(]>\s*){0,2}\s*\s*", " ", html) # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing @@ -139,16 +139,16 @@ class PreProcessor(object): # # Start with most typical chapter headings, get more aggressive until one works if self.html_preprocess_sections < 10: - chapdetect = re.compile(r'(?=]*>)\s*(<[ibu]>){0,2}\s*(]*>)?\s*(<[ibu]>){0,2}\s*(]*>)?\s*(?P(<[ibu]>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(){0,2})\s*()?s*(){0,2}\s*()?\s*()\s*\s*(\s*]*>\s*

){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(]*>)?\s*(?P(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE) + chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</[ibu]>){0,2}\s*(</span>)?\s*(</(p|/?br)>)\s*\s*(\s*<p[^>]*>\s*</p>){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE) html = chapdetect.sub(self.chapter_head, html) if self.html_preprocess_sections < 10: self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters") - chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) html = chapdetect2.sub(self.chapter_head, html) if self.html_preprocess_sections < 10: self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words") - chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) html = chapdetect2.sub(self.chapter_head, html) ###### Unwrap lines ###### @@ -191,7 +191,7 @@ class PreProcessor(object): # If still no sections after unwrapping mark split points on lines with no punctuation if self.html_preprocess_sections < 10: self.log("Looking for more split points based on punctuation, currently have " + str(self.html_preprocess_sections)) - chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) + chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) html = chapdetect3.sub(self.chapter_break, html) # search for places where a first or second level heading is immediately followed by another # top level heading. demote the second heading to h3 to prevent splitting between chapter From 980388f2bde3d4cb4b07673cb9e79c951aabd867 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 19 Sep 2010 09:48:39 -0600 Subject: [PATCH 03/13] Le Journal de Montreal by Luciano Furtado. Fixes #405 (New news feed) --- resources/recipes/le_journal.recipe | 43 +++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 resources/recipes/le_journal.recipe diff --git a/resources/recipes/le_journal.recipe b/resources/recipes/le_journal.recipe new file mode 100644 index 0000000000..24a7d52164 --- /dev/null +++ b/resources/recipes/le_journal.recipe @@ -0,0 +1,43 @@ +__author__ = ' (lrfurtado@yahoo.com.br)' + +from calibre.web.feeds.news import BasicNewsRecipe + +class LeJournalDeMontrealRecipe(BasicNewsRecipe): + + title = u'Le Journal de Montreal' + description = u'Le Journal de Montreal' + __author__ = 'Luciano Furtado' + language = 'fr' + + oldest_article = 7 + use_embedded_content=0 + max_articles_per_feed = 15 + + remove_tags = [ + dict(name='ul',attrs={'id':'mainNav'}), + dict(name='div',attrs={'id':'boxPolitique'}), + dict(name='div',attrs={'id':'boxScoop'}), + dict(name='div',attrs={'id':'DossierSpec'}), + dict(name='div',attrs={'id':'channelBoxes'}), + dict(name='div',attrs={'id':'sectionBoxes'}), + dict(name='div',attrs={'id':'header'}), + dict(name='div',attrs={'id':'footer'}), + dict(name='div',attrs={'id':'navbarCanoe_container'}), + dict(name='div',attrs={'id':'popularCanoe'}), + dict(name='div',attrs={'id':'textAds'}), + dict(name='div',attrs={'id':'24heures'}), + dict(name='div',attrs={'class':'bottomBox clear'}), + dict(name='div',attrs={'class':'articleControls thin'}), + ] + + + feeds = [ + (u'Actualites', + u'http://www.canoe.com/rss/feed/nouvelles/ljm_actualites.xml'), + (u'Arts et spectacle', + u'http://www.canoe.com/rss/feed/nouvelles/ljm_arts.xml'), + (u'Sports', + u'http://www.canoe.com/rss/feed/nouvelles/ljm_sports.xml'), + (u'Chroniques', + u'http://www.canoe.com/rss/feed/nouvelles/ljm_chroniques.xml'), + ] From 23cd4fd7833180d7036aa77c0c1efcbd09ca6a00 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 19 Sep 2010 10:16:41 -0600 Subject: [PATCH 04/13] Content server: Making serving of large files more efficient. --- src/calibre/library/server/content.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 95794a8c1d..aeba8a3218 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -184,7 +184,7 @@ class ContentServer(object): if path and os.path.exists(path): updated = fromtimestamp(os.stat(path).st_mtime) cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) - return fmt.read() + return fmt # }}} From 4f7f7214c13da75ff2dfc4ef0d00da56ad43fdcb Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sun, 19 Sep 2010 20:30:08 +0100 Subject: [PATCH 05/13] Fix incorrect book matching. --- src/calibre/gui2/device.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index ae3141db56..a7e55c4619 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -800,7 +800,7 @@ class DeviceMixin(object): # {{{ # if set_books_in_library did not. if not self.set_books_in_library(self.booklists(), reset=True): self.upload_booklists() - self.book_on_device(None, None, reset=True) + self.book_on_device(None, reset=True) # We need to reset the ondevice flags in the library. Use a big hammer, # so we don't need to worry about whether some succeeded or not. self.refresh_ondevice_info(device_connected=True, reset_only=False) @@ -1309,7 +1309,7 @@ class DeviceMixin(object): # {{{ for f in files: getattr(f, 'close', lambda : True)() - def book_on_device(self, id, format=None, reset=False): + def book_on_device(self, id, reset=False): ''' Return an indication of whether the given book represented by its db id is on the currently connected device. It returns a 5 element list. The @@ -1338,8 +1338,6 @@ class DeviceMixin(object): # {{{ self.book_db_id_cache.append(set()) for book in l: db_id = getattr(book, 'application_id', None) - if db_id is None: - db_id = book.db_id if db_id is not None: # increment the count of books on the device with this # db_id. From f4b885568343944d66950935df887f276eaa3b4f Mon Sep 17 00:00:00 2001 From: Timothy Legge <timlegge@gmail.com> Date: Sun, 19 Sep 2010 21:46:13 -0300 Subject: [PATCH 06/13] KOBO: Fix issue where books that are read were getting their status reset to Unread --- src/calibre/devices/kobo/driver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 762a05d193..1171b74f5c 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -443,9 +443,9 @@ class KOBO(USBMS): # Reset Im_Reading list in the database if oncard == 'carda': - query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\'' + query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID like \'file:///mnt/sd/%\'' elif oncard != 'carda' and oncard != 'cardb': - query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\'' + query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID not like \'file:///mnt/sd/%\'' try: cursor.execute (query) From 0fa7eef131080297085c0f4224907e351fc8e7fb Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 19 Sep 2010 19:01:53 -0600 Subject: [PATCH 07/13] Tagesanzeiger by noxxx --- resources/recipes/tagesan.recipe | 45 ++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 resources/recipes/tagesan.recipe diff --git a/resources/recipes/tagesan.recipe b/resources/recipes/tagesan.recipe new file mode 100644 index 0000000000..8514162598 --- /dev/null +++ b/resources/recipes/tagesan.recipe @@ -0,0 +1,45 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1284927619(BasicNewsRecipe): + title = u'Tagesanzeiger' + publisher = u'Tamedia AG' + oldest_article = 2 + __author__ = 'noxxx' + max_articles_per_feed = 100 + description = 'tagesanzeiger.ch: Nichts verpassen' + category = 'News, Politik, Nachrichten, Schweiz, Zürich' + language = 'de' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + remove_tags = [ + dict(name='img') + ,dict(name='div',attrs={'class':['swissquote ad','boxNews','centerAD','contentTabs2','sbsLabel']}) + ,dict(name='div',attrs={'id':['colRightAd','singleRight','singleSmallRight','MailInfo','metaLine','sidebarSky','contentFooter','commentInfo','commentInfo2','commentInfo3','footerBottom','clear','boxExclusiv','singleLogo','navSearch','headerLogin','headerBottomRight','horizontalNavigation','subnavigation','googleAdSense','footerAd','contentbox','articleGalleryNav']}) + ,dict(name='form',attrs={'id':['articleMailForm','commentform']}) + ,dict(name='div',attrs={'style':['position:absolute']}) + ,dict(name='script',attrs={'type':['text/javascript']}) + ,dict(name='p',attrs={'class':['schreiben','smallPrint','charCounter','caption']}) + ] + feeds = [ + (u'Front', u'http://www.tagesanzeiger.ch/rss.html') + ,(u'Zürich', u'http://www.tagesanzeiger.ch/zuerich/rss.html') + ,(u'Schweiz', u'http://www.tagesanzeiger.ch/schweiz/rss.html') + ,(u'Ausland', u'http://www.tagesanzeiger.ch/ausland/rss.html') + ,(u'Digital', u'http://www.tagesanzeiger.ch/digital/rss.html') + ,(u'Wissen', u'http://www.tagesanzeiger.ch/wissen/rss.html') + ,(u'Panorama', u'http://www.tagesanzeiger.ch/panorama/rss.html') + ,(u'Wirtschaft', u'http://www.tagesanzeiger.ch/wirtschaft/rss.html') + ,(u'Sport', u'http://www.tagesanzeiger.ch/sport/rss.html') + ,(u'Kultur', u'http://www.tagesanzeiger.ch/kultur/rss.html') + ,(u'Leben', u'http://www.tagesanzeiger.ch/leben/rss.html') + ,(u'Auto', u'http://www.tagesanzeiger.ch/auto/rss.html')] + + def print_version(self, url): + return url + '/print.html' + From 77da36f05c3e09654650133671c1d7f904d8a7d0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 19 Sep 2010 23:51:14 -0600 Subject: [PATCH 08/13] Add prologue and epilogue to default chapter detection regex --- src/calibre/ebooks/conversion/plumber.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 3ea2926461..395447edba 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -241,7 +241,7 @@ OptionRecommendation(name='toc_filter', OptionRecommendation(name='chapter', recommended_value="//*[((name()='h1' or name()='h2') and " - r"re:test(., 'chapter|book|section|part\s+', 'i')) or @class " + r"re:test(., 'chapter|book|section|part|prologue|epilogue\s+', 'i')) or @class " "= 'chapter']", level=OptionRecommendation.LOW, help=_('An XPath expression to detect chapter titles. The default ' 'is to consider <h1> or <h2> tags that contain the words ' From 656c88792ddeb760cf7ed562ad54bce81d17f77e Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Mon, 20 Sep 2010 08:29:25 -0600 Subject: [PATCH 09/13] The Marker by Marbs --- resources/recipes/the_marker.recipe | 52 +++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 resources/recipes/the_marker.recipe diff --git a/resources/recipes/the_marker.recipe b/resources/recipes/the_marker.recipe new file mode 100644 index 0000000000..e5f1ffc761 --- /dev/null +++ b/resources/recipes/the_marker.recipe @@ -0,0 +1,52 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1283848012(BasicNewsRecipe): + description = 'TheMarker Financial News in Hebrew' + __author__ = 'TonyTheBookworm, Marbs' + cover_url = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg' + title = u'TheMarker' + language = 'he' + simultaneous_downloads = 5 + remove_javascript = True + timefmt = '[%a, %d %b, %Y]' + oldest_article = 1 + remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']}) ] + max_articles_per_feed = 10 + extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }' + feeds = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'), + (u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'), + (u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'), + (u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'), + (u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'), + (u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'), + (u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'), + (u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'), + (u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'), + (u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'), + (u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')] + + def print_version(self, url): + split1 = url.split("=") + weblinks = url + + if weblinks is not None: + for link in weblinks: + #--------------------------------------------------------- + #here we need some help with some regexpressions + #we are trying to find it.themarker.com in a url + #----------------------------------------------------------- + re1='.*?' # Non-greedy match on filler + re2='(it\\.themarker\\.com)' # Fully Qualified Domain Name 1 + rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL) + m = rg.search(url) + + + if m: + split2 = url.split("article/") + print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1] + + else: + print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml' + + return print_url From bc82ea61032bf7f1d2564674f0a7174df4b3dab4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Mon, 20 Sep 2010 09:28:39 -0600 Subject: [PATCH 10/13] Add button to Edit metadata dialog to trim borders from the cover --- imgsrc/trim.svg | 688 ++++++++++++++++++++ resources/images/trim.png | Bin 0 -> 2553 bytes src/calibre/gui2/dialogs/metadata_single.py | 19 + src/calibre/gui2/dialogs/metadata_single.ui | 11 + 4 files changed, 718 insertions(+) create mode 100644 imgsrc/trim.svg create mode 100644 resources/images/trim.png diff --git a/imgsrc/trim.svg b/imgsrc/trim.svg new file mode 100644 index 0000000000..8c8810fc66 --- /dev/null +++ b/imgsrc/trim.svg @@ -0,0 +1,688 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> + +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:xlink="http://www.w3.org/1999/xlink" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + width="128" + height="128" + id="svg1307" + sodipodi:version="0.32" + inkscape:version="0.46+devel" + version="1.0" + sodipodi:docname="transform-crop.svgz" + inkscape:export-filename="/home/pinheiro/pics/oxygen-icons/scalable/actions/transform-crop.png" + inkscape:export-xdpi="90" + inkscape:export-ydpi="90" + inkscape:output_extension="org.inkscape.output.svgz.inkscape"> + <defs + id="defs1309"> + <linearGradient + inkscape:collect="always" + id="linearGradient2594"> + <stop + style="stop-color:#fafafa;stop-opacity:1;" + offset="0" + id="stop2596" /> + <stop + style="stop-color:#fafafa;stop-opacity:0;" + offset="1" + id="stop2598" /> + </linearGradient> + <linearGradient + inkscape:collect="always" + id="linearGradient3969"> + <stop + style="stop-color:#000000;stop-opacity:1;" + offset="0" + id="stop3971" /> + <stop + style="stop-color:#000000;stop-opacity:0;" + offset="1" + id="stop3973" /> + </linearGradient> + <linearGradient + id="linearGradient2783"> + <stop + style="stop-color:#323232;stop-opacity:1;" + offset="0" + id="stop2785" /> + <stop + id="stop2787" + offset="0.07692308" + style="stop-color:#dfe1e1;stop-opacity:1;" /> + <stop + style="stop-color:#b6b1b1;stop-opacity:1;" + offset="0.26289096" + id="stop2799" /> + <stop + id="stop2789" + offset="0.5" + style="stop-color:#8d8282;stop-opacity:1;" /> + <stop + style="stop-color:#ffffff;stop-opacity:1;" + offset="0.78201604" + id="stop2791" /> + <stop + style="stop-color:#dfd9df;stop-opacity:1;" + offset="0.9005897" + id="stop2793" /> + <stop + style="stop-color:#3a3a3a;stop-opacity:1;" + offset="1" + id="stop2795" /> + </linearGradient> + <linearGradient + id="linearGradient2222" + inkscape:collect="always"> + <stop + id="stop2224" + offset="0" + style="stop-color:#0066ff;stop-opacity:1" /> + <stop + id="stop2226" + offset="1" + style="stop-color:#80b3ff;stop-opacity:1" /> + </linearGradient> + <linearGradient + id="linearGradient3314" + inkscape:collect="always"> + <stop + id="stop3316" + offset="0" + style="stop-color:#ffffff;stop-opacity:1;" /> + <stop + id="stop3318" + offset="1" + style="stop-color:#ffffff;stop-opacity:0;" /> + </linearGradient> + <linearGradient + id="linearGradient2431"> + <stop + style="stop-color:#ffffff;stop-opacity:1;" + offset="0" + id="stop2433" /> + <stop + id="stop2435" + offset="0.42597079" + style="stop-color:#ffffff;stop-opacity:1;" /> + <stop + id="stop2437" + offset="0.5892781" + style="stop-color:#f1f1f1;stop-opacity:1;" /> + <stop + style="stop-color:#eaeaea;stop-opacity:1;" + offset="0.80219781" + id="stop2439" /> + <stop + style="stop-color:#dfdfdf;stop-opacity:1;" + offset="1" + id="stop2441" /> + </linearGradient> + <linearGradient + id="linearGradient7422"> + <stop + style="stop-color:#b4b4b6;stop-opacity:1;" + offset="0" + id="stop7424" /> + <stop + id="stop5348" + offset="0.5" + style="stop-color:#9c9ca1;stop-opacity:1;" /> + <stop + id="stop7426" + offset="1" + style="stop-color:#cdcdd1;stop-opacity:1;" /> + </linearGradient> + <linearGradient + id="linearGradient3310" + inkscape:collect="always"> + <stop + id="stop3312" + offset="0" + style="stop-color:#ffffff;stop-opacity:1;" /> + <stop + id="stop3314" + offset="1" + style="stop-color:#ffffff;stop-opacity:0;" /> + </linearGradient> + <filter + inkscape:collect="always" + x="-0.21138181" + width="1.4227636" + y="-0.21047288" + height="1.4209458" + id="filter9723"> + <feGaussianBlur + inkscape:collect="always" + stdDeviation="1.4336041" + id="feGaussianBlur9725" /> + </filter> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipPath10698"> + <path + style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.80000001;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" + d="M -128.2008,-3.392377 L -104.45558,6.3360672 L -102.43766,6.1757677 L -103.81912,-4.5678172 L -105.75454,-5.8316609 L -124.96922,-4.4459394 L -128.2008,-3.392377 z " + id="path10700" + sodipodi:nodetypes="ccccccc" /> + </clipPath> + <radialGradient + inkscape:collect="always" + xlink:href="#linearGradient2783" + id="radialGradient3418" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(0.9728905,-8.15107,-18.526373,-2.211261,1957.2342,725.31677)" + cx="53.235302" + cy="106.0573" + fx="53.235302" + fy="106.0573" + r="9.1025209" /> + <radialGradient + inkscape:collect="always" + xlink:href="#linearGradient2594" + id="radialGradient3420" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(0.5808473,-2.8009276,-6.4965168,-1.3472267,701.00301,348.75795)" + cx="53.347126" + cy="104.68401" + fx="53.347126" + fy="104.68401" + r="9.1025209" /> + <radialGradient + inkscape:collect="always" + xlink:href="#linearGradient3314" + id="radialGradient3422" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(-2.9339535,-1.0170467,-1.1904108,3.4340702,323.071,-252.78281)" + cx="49.110855" + cy="105.43803" + fx="49.110855" + fy="105.43803" + r="10.20672" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient2783" + id="linearGradient3425" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(2.2608955,0,0,1.9345479,-550.58555,-317.90247)" + x1="190.03462" + y1="90.22673" + x2="208.7153" + y2="90.22673" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3969" + id="linearGradient3430" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(2.2608955,0,0,1.9345479,-497.11778,-432.24104)" + x1="98.411324" + y1="185.68851" + x2="166.32983" + y2="155.59846" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient7422" + id="linearGradient3525" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(6.0715756e-2,0,0,9.7589526e-2,24.201706,-45.627655)" + x1="399.77466" + y1="1164.6696" + x2="399.77466" + y2="549.06134" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient2431" + id="linearGradient3527" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(0.5415355,0,0,0.7222225,23.477667,-8.2222193)" + x1="119.57646" + y1="23.792561" + x2="15.999996" + y2="109.6508" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3310" + id="linearGradient3529" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(0,-1.5975038,-2,0,96,199.26848)" + x1="102.31124" + y1="-5.8302126" + x2="74.330322" + y2="32" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient2222" + id="linearGradient3538" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(0.7476489,0,0,0.7476489,0,-19.999999)" + x1="8.2386189" + y1="-13.864992" + x2="8.2386189" + y2="-1.4047648" /> + <filter + inkscape:collect="always" + id="filter4420"> + <feGaussianBlur + inkscape:collect="always" + stdDeviation="3.0486726" + id="feGaussianBlur4422" /> + </filter> + <mask + maskUnits="userSpaceOnUse" + id="mask3562"> + <rect + ry="1.4444447" + rx="1.1997639" + y="8" + x="-4.0000005" + height="116.00001" + width="124" + id="rect3564" + style="fill:#ffffff;fill-opacity:1;stroke:none;filter:url(#filter4420)" + transform="matrix(1.1453342,0,0,1.1453342,15.087799,-38.432604)" /> + </mask> + </defs> + <sodipodi:namedview + id="base" + pagecolor="#ffffff" + bordercolor="#666666" + borderopacity="1.0" + inkscape:pageopacity="0.0" + inkscape:pageshadow="2" + inkscape:zoom="2.2136483" + inkscape:cx="77.317692" + inkscape:cy="55.850409" + inkscape:current-layer="layer1" + showgrid="true" + inkscape:document-units="px" + inkscape:grid-bbox="true" + guidetolerance="4" + showguides="true" + inkscape:guide-bbox="true" + inkscape:window-width="1440" + inkscape:window-height="840" + inkscape:window-x="223" + inkscape:window-y="37" + objecttolerance="4" + gridtolerance="4"> + <sodipodi:guide + orientation="horizontal" + position="-32.073749" + id="guide2204" /> + <inkscape:grid + id="GridFromPre046Settings" + type="xygrid" + originx="0px" + originy="0px" + spacingx="4px" + spacingy="4px" + color="#0000ff" + empcolor="#0000ff" + opacity="0.2" + empopacity="0.4" + empspacing="4" + visible="true" + enabled="true" /> + </sodipodi:namedview> + <metadata + id="metadata1312"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + <cc:license + rdf:resource="http://creativecommons.org/licenses/GPL/2.0/" /> + <dc:contributor> + <cc:Agent> + <dc:title>Oxygen team</dc:title> + </cc:Agent> + </dc:contributor> + <dc:title></dc:title> + </cc:Work> + <cc:License + rdf:about="http://creativecommons.org/licenses/LGPL/2.1/"> + <cc:permits + rdf:resource="http://web.resource.org/cc/Reproduction" /> + <cc:permits + rdf:resource="http://web.resource.org/cc/Distribution" /> + <cc:requires + rdf:resource="http://web.resource.org/cc/Notice" /> + <cc:permits + rdf:resource="http://web.resource.org/cc/DerivativeWorks" /> + <cc:requires + rdf:resource="http://web.resource.org/cc/ShareAlike" /> + <cc:requires + rdf:resource="http://web.resource.org/cc/SourceCode" /> + </cc:License> + </rdf:RDF> + </metadata> + <g + id="layer1" + inkscape:label="Layer 1" + inkscape:groupmode="layer"> + <rect + ry="0.1870501" + rx="0.1537565" + y="28.129654" + x="8" + height="92" + width="92" + id="rect3226" + style="fill:#618fd2;fill-opacity:0.09195401;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1" /> + <g + id="g3520" + transform="translate(32,-0.1296539)"> + <rect + inkscape:export-ydpi="90" + inkscape:export-xdpi="90" + inkscape:export-filename="/home/pinheiro/Desktop/mock2.png" + style="opacity:0.75;fill:url(#linearGradient3525);fill-opacity:1;fill-rule:nonzero;stroke:none" + id="rect3281" + width="92" + height="92" + x="28.129654" + y="-24" + inkscape:r_cx="true" + inkscape:r_cy="true" + ry="3.9616783" + rx="3.9616783" + transform="matrix(0,1,1,0,0,0)" /> + <rect + ry="1.4444447" + rx="1.1997639" + y="-20" + x="32.129654" + height="84" + width="84" + id="rect3283" + style="fill:url(#linearGradient3527);fill-opacity:1;fill-rule:evenodd;stroke:none" + transform="matrix(0,1,1,0,0,0)" /> + <path + id="path3285" + d="M 64,53.096891 C 45.143834,70.163928 24.748768,86.162699 -2.0000002e-07,96.129654 L -2.0000002e-07,52.647595 C 23.693959,50.212248 45.09831,42.609775 64,32.129654 L 64,53.096891 z" + style="fill:url(#linearGradient3529);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" /> + </g> + <g + transform="translate(-16,20.129654)" + style="fill:#7193c6;fill-opacity:1" + id="g2250"> + <rect + ry="1.3512546" + rx="0.077153668" + y="-116" + x="16" + height="4" + width="4" + id="rect3210" + style="opacity:1;fill:#7193c6;fill-opacity:1;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1" + transform="matrix(0,1,-1,0,0,0)" + inkscape:tile-w="8" + inkscape:tile-h="8" + inkscape:tile-cx="124" + inkscape:tile-cy="28" /> + <use + style="fill:#7193c6;fill-opacity:1" + x="0" + y="0" + inkscape:tiled-clone-of="#rect3210" + xlink:href="#rect3210" + transform="translate(0,8)" + id="use2236" + width="128" + height="128" /> + <use + style="fill:#7193c6;fill-opacity:1" + x="0" + y="0" + inkscape:tiled-clone-of="#rect3210" + xlink:href="#rect3210" + transform="translate(0,16)" + id="use2240" + width="128" + height="128" /> + <use + style="fill:#7193c6;fill-opacity:1" + x="0" + y="0" + inkscape:tiled-clone-of="#rect3210" + xlink:href="#rect3210" + transform="translate(0,24)" + id="use2244" + width="128" + height="128" /> + <use + style="fill:#7193c6;fill-opacity:1" + x="0" + y="0" + inkscape:tiled-clone-of="#rect3210" + xlink:href="#rect3210" + transform="translate(0,32)" + id="use2248" + width="128" + height="128" /> + <use + height="88" + width="88" + transform="translate(0,24)" + id="use3220" + xlink:href="#use2240" + y="0" + x="0" /> + <use + height="88" + width="88" + transform="translate(0,24)" + id="use3222" + xlink:href="#use2244" + y="0" + x="0" /> + <use + height="128" + width="128" + transform="translate(0,32)" + id="use2230" + xlink:href="#use2244" + y="0" + x="0" /> + <use + height="128" + width="128" + transform="translate(0,32)" + id="use2232" + xlink:href="#use2248" + y="0" + x="0" /> + <use + height="128" + width="128" + transform="translate(0,32)" + id="use2234" + xlink:href="#use3220" + y="0" + x="0" /> + </g> + <use + height="128" + width="128" + transform="matrix(8.5712909e-8,-0.9999999,0.9999999,8.5712909e-8,-20.129659,128.12964)" + id="use2258" + xlink:href="#g2250" + y="0" + x="0" /> + <use + height="128" + width="128" + transform="translate(-88,0)" + id="use2314" + xlink:href="#g2250" + y="0" + x="0" /> + <use + height="128" + width="128" + transform="matrix(8.5712909e-8,-0.9999999,0.9999999,8.5712909e-8,-20.129651,216.12964)" + id="use2316" + xlink:href="#g2250" + y="0" + x="0" /> + <use + height="128" + width="128" + transform="translate(96,0.1296547)" + id="use3300" + xlink:href="#rect3222" + y="0" + x="0" /> + <use + height="128" + width="128" + transform="translate(7.4990672e-6,96.129662)" + id="use3302" + xlink:href="#rect3222" + y="0" + x="0" /> + <use + height="128" + width="128" + transform="translate(96,96.129652)" + id="use3304" + xlink:href="#rect3222" + y="0" + x="0" /> + <rect + ry="0.18696606" + rx="0.15479258" + y="-32" + x="0" + height="12" + width="12" + id="rect3222" + style="fill:url(#linearGradient3538);fill-opacity:1;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1" + transform="scale(1,-1)" /> + <rect + transform="scale(1,-1)" + style="fill:#bfd9ff;fill-opacity:1;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1" + id="rect2225" + width="4" + height="4" + x="4" + y="-28" + rx="0.15479258" + ry="0.18696606" /> + <use + style="fill:#a4c0e4" + height="88" + width="88" + transform="translate(96,0.1296539)" + id="use3226" + xlink:href="#rect2225" + y="0" + x="0" /> + <use + style="fill:#a4c0e4" + height="88" + width="88" + transform="translate(7.5e-6,96.129661)" + id="use3228" + xlink:href="#rect2225" + y="0" + x="0" /> + <use + style="fill:#a4c0e4" + height="88" + width="88" + transform="translate(96,96.129654)" + id="use3230" + xlink:href="#rect2225" + y="0" + x="0" /> + <rect + style="opacity:0.57786889;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3.63199997;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" + id="rect1327" + width="1" + height="0" + x="15.057414" + y="-308.20486" /> + <g + id="g3407" + transform="matrix(0.8731076,0,0,0.8731076,-13.173272,33.555799)" + mask="url(#mask3562)"> + <path + sodipodi:nodetypes="ccccccc" + id="path3836" + d="m 29.733826,93.557578 76.565594,-35.724313 3.74271,-5.050163 -27.964957,-18.69067 -6.907623,1.950856 -41.307066,47.80066 -4.128658,9.71363 z" + style="fill:url(#radialGradient3418);fill-opacity:1;fill-rule:nonzero;stroke:none" /> + <path + style="fill:#555753;fill-opacity:1;fill-rule:nonzero;stroke:none" + d="m 107.32508,50.938663 -74.427424,35.613119 -3.008197,6.986785 76.368201,-35.710168 3.7845,-5.046004 -2.71708,-1.843732 z" + id="path8241" + sodipodi:nodetypes="cccccc" /> + <path + style="opacity:0.10688836;fill:url(#radialGradient3420);fill-opacity:1;fill-rule:nonzero;stroke:none" + d="m 29.733826,93.557578 76.565594,-35.724313 3.74271,-5.050163 -27.964957,-18.69067 -6.907623,1.950856 -41.307066,47.80066 -4.128658,9.71363 z" + id="path11683" + sodipodi:nodetypes="ccccccc" /> + <path + sodipodi:nodetypes="ccccccc" + id="path17921" + d="m 29.733826,93.557578 76.565594,-35.724313 3.74271,-5.050163 -27.964957,-18.69067 -6.907623,1.950856 -41.307066,47.80066 -4.128658,9.71363 z" + style="fill:none;stroke:url(#radialGradient3422);stroke-width:0.86455041;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:4" /> + <rect + style="fill:#2e3436;fill-opacity:1;fill-rule:nonzero;stroke:none" + id="rect8239" + width="39.714981" + height="37.454777" + x="27.310663" + y="81.415123" + transform="matrix(0.6571695,-0.7537428,0.7537428,0.6571695,0,0)" + rx="3.8771732" + ry="3.8771732" /> + <rect + transform="matrix(-0.7651682,-0.6438304,-0.6438304,0.7651682,0,0)" + style="fill:url(#linearGradient3425);fill-opacity:1;fill-rule:nonzero;stroke:none" + id="rect2803" + width="40.499767" + height="122.13765" + x="-120.93575" + y="-157.97318" + rx="0" + ry="0" /> + <rect + transform="matrix(-0.7651682,-0.6438304,-0.6438304,0.7651682,0,0)" + y="-161.84383" + x="-119.89533" + height="126.00658" + width="39.223213" + id="rect3967" + style="fill:url(#linearGradient3430);fill-opacity:1;fill-rule:nonzero;stroke:none" /> + <rect + transform="matrix(-0.6438304,0.7651682,0.7651682,0.6438304,0,0)" + y="80.243172" + x="-155.77248" + height="40.591759" + width="100.57008" + id="rect1851" + style="opacity:0.52459011;fill:#e0e0e0;fill-opacity:1;fill-rule:nonzero;stroke:none" /> + <rect + ry="1.2485937" + rx="1.2485937" + transform="matrix(2.0406638,-2.3405465,2.3405465,2.0406638,304.62828,-199.57966)" + y="-5.487061" + x="-104.11894" + height="12.061829" + width="12.789698" + id="rect8248" + style="fill:#2e3436;fill-opacity:1;fill-rule:nonzero;stroke:none;filter:url(#filter9723)" + clip-path="url(#clipPath10698)" /> + </g> + </g> +</svg> diff --git a/resources/images/trim.png b/resources/images/trim.png new file mode 100644 index 0000000000000000000000000000000000000000..3cb93adfa670c8353a95b6d2b5d49180db4340e6 GIT binary patch literal 2553 zcmb_eXH=8f7X1>CCSYt7Nr+;oj(|K=q=q(B0Y#*@V1SWcLQeq!L8$@;a44f9#W4hd zpoBUgp$#Y?5HeCknk0Y(5CoDJ-;ek2{d?=2bN5;Mu6x&B=iVPX-Ok2LOyrCR003eZ z<|a3JNcdSo0=%_L-+O=u0w^mp6M*|O%3CY*06>^zVPbeI=66=fa5mKJG_Bp?Bdbob z*2gUC??=YJ<U*8ljk-z4p|g-gp$7>aeuyFH=GB<Jqv5kTGX0(qaRs7NKgbZdqiO(^ zhlGRQ6&9dqbopue3MuZ|;FdMUb~}4zrL?#R2UoU?Zm%H~$JN$&;He8oq*A^AcRCAp zuU&rt@Sh%_Q723$eeXB-tX<az>@e2PJ_URZ&tnNHhM|}EKJukhn?Dch^V{$IP&d3- zWIQ|%hwdCkm9Ii$Wx3Tr%;9O|gSonMvlHtV+w9%tbU3wo(QuR|$D~82EU<Bka|=KW z*=EJ2_!F7CJ79bNkm3<2AtE_Sc>pvubo|9_zz0xPq_uz>tenN)M`fv>1`N2@!%}qO z;Rl`i`1p0MPF*UPfHd@+xkjBRE!nd56#bs2DSEsYGJU8R)sQF$Bq9#GE-1$L_)CF% zrCPWvbXZX8b_Jpp`>j*<xS<T6hCnnNi#3zE)Nu4!nE|^@PYw(Sff5o92D8rn0f3Y< zaAEA}Dm>T-441|En8{QFNQ;pf+@_bU2vP`y{H#{y4*dw9s`bFV5i}P92?@7waB&GC z|4bRZK6;XXyT#0o74~&UrI}AIDIVYUkA%vINO&vCA-H0wOO$|ngq%r`oYC`(R0>P; z^?+=TMJ<#80>EjROnGN#l<f}%p|pnLv8z%$AsQ}enbu9Vh*Z)M<md&aw<ju>-&1^% z3aS|D?x&9u-J))w%4)GQ*@-6zBsZr!xMM(t!ZvIE3UkDp_^4s^O^th6-{t+7@W1mX zAFdJ`*JaGiE8k+5*5E95i2nXyy=3WRi&An9uo2T>AwRIVVy}YXyn$zCrwjST8Gb$3 zMc5Rqr-Ucim@FULnp|wg<JG0@<<2njA{fCl2AvrEzHW60SD(sN%q{wgzj4@2%GbDA zLtE!?*5P9I{Gf9L&zA4~FMH>ulAF4pxy6h5j_;J|;#Di$Lw=WSqmc#W6Vu3H+JlY4 z=R#jdei;{3Kdml|YS3heM;FwsQv1<-{cX`#%9%Jt?yx{cknLIsqauHDauO+(dZL3K z(W)eXLZqLYK2>Imp_rbDe|!GVRt;%>NmEJ<`t?wMgoj6w!dLo2J&hD#sk{}Zm+Y<O z(4&~y{wZMeTU3#A3DMDaIxZ@f$PS`O9twEo3v{e=c6Jx?3XOo}BXZ5n_P9~!^YNv# z8*^KMdJ}igR&-a}S3>wljRPmB5!{1;Txp!oam%}V)%fBOK<n$1%r7zOBFQzHbz=@< z;?%!uCW=YKU$(uajH)Boa$c2<>0~X$B>2?tW(T=&JDvrqg=lCc1RphooE0@TFH-yR z%jk3&nXHyjqmb^<-;j$Ke^r@p1mU-#EiDKRoZ_zlg<}2YqZUV^mwqFsn~*IJDNQM7 zh062$SpgonoI~sk{s)_3tQTCx*K%XkaiUTah)c|P60E6K6mVXvI01Ug(oosq%^-E{ zVzSiCRR-7Yrl?c~hHzwl`}}atP~c6Ss=PTX<)eP_H}H?q)x$o@ZJR+>I=!mYdTn=0 zXGY=xWn$^W197|i3XS)gEl~D0O~V6D0kCp$^uH=s5SRMCJSnW+X??j9!NL%<$1uMZ z7!E7kL0foF>upl<bkU*C&i#OSRShg(Eb_CuPW-Ty2s<)v--VJ<vOzZ_Glv9qmjO)G z{O+G8>xcS1qOXlPsMT7JuQ>SO6?qrllC~xqZ$jtdw0k4`VU><pk>v)Smga27*!t<7 z%WPJ-sam2N*7e~<FKOke6Prv`*XhQmLGLg@AAb+7*VIPB+_Iir7TP3HeX#|&L6KEJ z%x;nSv|DYx_`<co;!dv)ZKVC6#{5GjN!RP~$8wc>oq)?e*D@gpeAfH%VY~l;k}K>h zNz^2{(~GPr_ra1Uwh&{gRJ=#}fk`5yK}=VIN-+e9B1NIsXm(^u4Z~Q2UBB%l=mf;3 zdzIdGe17cCz$y8MeV2ViKWhcc6AOtp{D3;c(cIJun>+8mS$D}-g&x4}L*2gZB3NZy zT9{XHSJsQzS4s;L^%=YsOBG!ON|hL^?v$1Q5?d>)#5KuXc6J@qauZ$K6hzl1d09Nh zIyzH770|)W$PXSN_RpmeSBN|fY^J`GE6nS5jV_yUWrC?{slvX^Hx7E7p;DX4dTlzS zX1|}7h9RIU?#ekyAY%W_IQ0*@-$TaHee>m1!uxBy7dEB0T!+!>#mPnumtHyxI$f&4 z>EEsx)bIyEfy+g-Zv*_RK!kb#cv+=OyW@-<<^?tB9)@rk<sf?)s0Ezyb%vD)t^yb5 z;_lBT@%R@^JpzQ+sH(q=BAaUg61^JN^b>hsF;5IU4$`)Kg!nNIlGvAgP<U#9EFTI) zLdQY6UD`<6w^w`{67R#b0Bv#JFuY;q^JMXkm=wOBI4jLN%A+$J|F7HVMIdH(c>^GD z@e48Af5d<1#LrSsqP{D@fL`o@olyw5tI@*pDcoTD1B}689I7?>h#dhElQ{I9i5*6L z9&S6utIWI$k|;ViPD8oMD1jm7BL;6qX0Ez)L6AMOw%@NbHA;^YA8kde;wZMM<(VMq zd&k-)r^bI3W&O1rQnL45zvu%zHK(IJt2R*Pga{<(uVu+A2C!{gq{wX7v&yE%ToE82 zaX6B~Wm)AOtin&=u^VGmocC=JE1kI4{~Q%QIDgRRV4U}E=B$>=hGRqgbvIS=ur=g2 zHx(6#7)_O!mloF-4v{JDSDvCALuIh0td}%%B~DVK=;)ABGwO&+W?gRUoroy<`Bw_t zFc$4s2?`#1pWYXJ&h^Fhq^|kB;GDhDv<rT=p5b)UbZpzs5{w%wob`kK390O_=kl~5 zDR@*B7Nu<Q<c=^=v))~3MAPMIN+>;3;n9(u&p*(tVI2-zj?`YnsXxs#3wY?(f4oA4 zN=xL%%dygpcW>8A46rH?;61#1t9z~tUf0@uxV9iuU9Kd<ZNk%;U8vB)^bQZNn@i67 zYiq&Hzy|rb7)5g9+123ZLjyhSzgb2AGx1~1^YI5(|LrO%1eqpE&s!DHvf6<drfEZu vV-E_Q^uxsD?#goC!ug8!c$DrZogv3Di@b@v`~o$emjf(JZA?f;UP=E3%Wc+t literal 0 HcmV?d00001 diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index 26dbda6ca4..53788809b6 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -300,6 +300,24 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): self.cpixmap = pix self.cover_data = cdata + def trim_cover(self, *args): + from calibre.utils.magick import Image + cdata = self.cover_data + if not cdata: + return + im = Image() + im.load(cdata) + im.trim(10) + cdata = im.export('jpg') + pix = QPixmap() + pix.loadFromData(cdata) + self.cover.setPixmap(pix) + self.cover_changed = True + self.cpixmap = pix + self.cover_data = cdata + + + def sync_formats(self): old_extensions, new_extensions, paths = set(), set(), {} for row in range(self.formats.count()): @@ -380,6 +398,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): self.remove_unused_series) QObject.connect(self.auto_author_sort, SIGNAL('clicked()'), self.deduce_author_sort) + self.trim_cover_button.clicked.connect(self.trim_cover) self.connect(self.author_sort, SIGNAL('textChanged(const QString&)'), self.author_sort_box_changed) self.connect(self.authors, SIGNAL('editTextChanged(const QString&)'), diff --git a/src/calibre/gui2/dialogs/metadata_single.ui b/src/calibre/gui2/dialogs/metadata_single.ui index 74febf9c29..dbf825e706 100644 --- a/src/calibre/gui2/dialogs/metadata_single.ui +++ b/src/calibre/gui2/dialogs/metadata_single.ui @@ -625,6 +625,17 @@ Using this button to create author sort will change author sort from red to gree </property> </widget> </item> + <item> + <widget class="QToolButton" name="trim_cover_button"> + <property name="toolTip"> + <string>Remove border (if any) from cover</string> + </property> + <property name="icon"> + <iconset resource="../../../../resources/images.qrc"> + <normaloff>:/images/trim.png</normaloff>:/images/trim.png</iconset> + </property> + </widget> + </item> <item> <widget class="QToolButton" name="reset_cover"> <property name="toolTip"> From 231aab95614acf5ab738ee4a949b52d312a28383 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Mon, 20 Sep 2010 09:37:15 -0600 Subject: [PATCH 11/13] WSJ: Don't error out if a single section fails --- resources/recipes/wsj_free.recipe | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/resources/recipes/wsj_free.recipe b/resources/recipes/wsj_free.recipe index 7f3664f1c4..df8234e8e2 100644 --- a/resources/recipes/wsj_free.recipe +++ b/resources/recipes/wsj_free.recipe @@ -54,10 +54,13 @@ class WallStreetJournal(BasicNewsRecipe): def wsj_add_feed(self,feeds,title,url): self.log('Found section:', title) - if url.endswith('whatsnews'): - articles = self.wsj_find_wn_articles(url) - else: - articles = self.wsj_find_articles(url) + try: + if url.endswith('whatsnews'): + articles = self.wsj_find_wn_articles(url) + else: + articles = self.wsj_find_articles(url) + except: + articles = [] if articles: feeds.append((title, articles)) return feeds From 8bd686628966bb3f8948377d8be7b4cadd78b433 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Mon, 20 Sep 2010 09:40:47 -0600 Subject: [PATCH 12/13] ... --- resources/recipes/wsj.recipe | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/resources/recipes/wsj.recipe b/resources/recipes/wsj.recipe index fd5e977d10..88e07bcea3 100644 --- a/resources/recipes/wsj.recipe +++ b/resources/recipes/wsj.recipe @@ -70,13 +70,16 @@ class WallStreetJournal(BasicNewsRecipe): def wsj_add_feed(self,feeds,title,url): self.log('Found section:', title) - if url.endswith('whatsnews'): - articles = self.wsj_find_wn_articles(url) - else: - articles = self.wsj_find_articles(url) + try: + if url.endswith('whatsnews'): + articles = self.wsj_find_wn_articles(url) + else: + articles = self.wsj_find_articles(url) + except: + articles = [] if articles: feeds.append((title, articles)) - return feeds + return feeds def parse_index(self): soup = self.wsj_get_index() @@ -99,7 +102,7 @@ class WallStreetJournal(BasicNewsRecipe): url = 'http://online.wsj.com' + a['href'] feeds = self.wsj_add_feed(feeds,title,url) title = 'What''s News' - url = url.replace('pageone','whatsnews') + url = url.replace('pageone','whatsnews') feeds = self.wsj_add_feed(feeds,title,url) else: title = self.tag_to_string(a) @@ -141,7 +144,7 @@ class WallStreetJournal(BasicNewsRecipe): articles = [] flavorarea = soup.find('div', attrs={'class':lambda x: x and 'ahed' in x}) - if flavorarea is not None: + if flavorarea is not None: flavorstory = flavorarea.find('a', href=lambda x: x and x.startswith('/article')) if flavorstory is not None: flavorstory['class'] = 'mjLinkItem' From 88f980ad186859708aceb3907ae59d4052648ff3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Mon, 20 Sep 2010 21:33:43 -0600 Subject: [PATCH 13/13] News download: Don't add inline table of contents when downloading news for the Kindle --- src/calibre/gui2/tools.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py index 7a516bb4ff..2f0452a773 100644 --- a/src/calibre/gui2/tools.py +++ b/src/calibre/gui2/tools.py @@ -217,6 +217,10 @@ def fetch_scheduled_recipe(arg): if 'output_profile' in ps: recs.append(('output_profile', ps['output_profile'], OptionRecommendation.HIGH)) + if ps['output_profile'] == 'kindle': + recs.append(('no_inline_toc', True, + OptionRecommendation.HIGH)) + lf = load_defaults('look_and_feel') if lf.get('base_font_size', 0.0) != 0.0: recs.append(('base_font_size', lf['base_font_size'],