From 0ad1f3c088f2ff0872de49171fd99a91a50a031a Mon Sep 17 00:00:00 2001 From: ldolse Date: Wed, 25 Aug 2010 10:49:42 +1000 Subject: [PATCH 01/43] preprocessing regex tweaks --- src/calibre/ebooks/conversion/preprocess.py | 2 +- src/calibre/ebooks/rtf/input.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index da652c1a38..940c27344b 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -209,7 +209,7 @@ class HTMLPreProcessor(object): (re.compile(ur'\u00a0'), lambda match : ' '), # Detect Chapters to match default XPATH in GUI - (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part)\s*([\d\w-]+(\s\w+)?)?(()?)?)]*>\s*(?P(<(i|b)>)?\s*\w+(\s*\w+)?\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head), + (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)>(</(i|b)>)?)?)</?(br|p)[^>]*>\s*(?P<title>(<(i|b)>)?\s*\w+(\s*\w+)?\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head), (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>([A-Z \'"!]{5,})\s*(\d+|\w+)?)(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head), # Have paragraphs show better diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index dcffbe68ca..eaba28e429 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -231,12 +231,12 @@ class RTFInput(InputFormatPlugin): if self.options.preprocess_html: print "********* Preprocessing HTML *********\n" # Detect Chapters to match the xpath in the GUI - chapdetect = re.compile(r'<p[^>]*>\s*<span[^>]*>\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)\s*</span>\s*</p>', re.IGNORECASE) + chapdetect = re.compile(r'<p[^>]*>\s*<span[^>]*>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)>(<(/i|b)>)?)?)\s*</span>\s*</p>', re.IGNORECASE) res = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', res) # Unwrap lines using punctation if the median length of all lines is less than 150 length = line_length('html', res, 0.4) print "*** Median length is " + str(length) + " ***\n" - unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</span>\s*(</p>)?\s*(?P<up2threeblanks><p[^>]*>\s*(<span[^>]*>\s*</span>\s*)</p>\s*){0,3}\s*<p[^>]*>\s*(<span[^>]*>)?\s*" % length, re.UNICODE) + unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</span>\s*</p>\s*(?P<up2threeblanks><p[^>]*>\s*(<span[^>]*>\s*</span>\s*)</p>\s*){0,3}\s*<p[^>]*>\s*<span[^>]*>\s*" % length, re.UNICODE) if length < 150: res = unwrap.sub(' ', res) f.write(res) From 5c951fb9628617133f17ead6d1393ea84b7c6412 Mon Sep 17 00:00:00 2001 From: ldolse <ldolse@yahoo.com> Date: Sat, 4 Sep 2010 15:12:29 +1000 Subject: [PATCH 02/43] Preprocessing Updates --- src/calibre/ebooks/conversion/preprocess.py | 26 +++-- src/calibre/ebooks/html/input.py | 2 +- src/calibre/ebooks/lit/input.py | 104 ++++++++++++++++++-- src/calibre/ebooks/mobi/input.py | 10 ++ src/calibre/ebooks/pdf/reflow.py | 4 + 5 files changed, 132 insertions(+), 14 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 957418f1fd..2954fd7c26 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -62,6 +62,7 @@ def wrap_lines(match): else: return ital+' ' + def line_length(format, raw, percent): ''' raw is the raw text to find the line length to use for wrapping. @@ -191,32 +192,36 @@ class HTMLPreProcessor(object): (re.compile(u'¸\s*(<br.*?>)*\s*c', re.UNICODE), lambda match: u'ç'), (re.compile(u'¸\s*(<br.*?>)*\s*C', re.UNICODE), lambda match: u'Ç'), + # If pdf printed from a browser then the header/footer has a reliable pattern + (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''), + + # Center separator lines + (re.compile(u'<br>\s*(?P<break>([*#•]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'), + # Remove page links (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''), # Remove <hr> tags (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'), # Replace <br><br> with <p> - (re.compile(r'<br.*?>\s*<br.*?>', re.IGNORECASE), lambda match: '<p>'), + # (re.compile(r'<br>\s*<br>', re.IGNORECASE), lambda match: '\n<p>'), - # Remove hyphenation - (re.compile(r'-<br.*?>\n\r?'), lambda match: ''), + # unwrap hyphenation - don't delete the hyphen (often doesn't split words) + (re.compile(r'(?<=[-–])\s*<br>\s*(?=[[a-z\d])'), lambda match: ''), # Remove gray background (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'), # Detect Chapters to match default XPATH in GUI - (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)>(</(i|b)>)?)?)</?(br|p)[^>]*>\s*(?P<title>(<(i|b)>)?\s*\w+(\s*\w+)?\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head), - (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>([A-Z \'"!]{5,})\s*(\d+|\w+)?)(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head), + (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+(\s\w+)?)?\s*(</(i|b)>(</(i|b)>)?)?)\s*(</?(br|p)[^>]*>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head), # Have paragraphs show better (re.compile(r'<br.*?>'), lambda match : '<p>'), # Clean up spaces (re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '), - # Connect paragraphs split by - - (re.compile(u'(?<=[^\s][-–])[\s]*(</p>)*[\s]*(<p>)*\s*(?=[^\s])'), lambda match: ''), # Add space before and after italics (re.compile(u'(?<!“)<i>'), lambda match: ' <i>'), (re.compile(r'</i>(?=\w)'), lambda match: '</i> '), + ] # Fix Book Designer markup @@ -293,6 +298,13 @@ class HTMLPreProcessor(object): import traceback print 'Failed to parse remove_footer regexp' traceback.print_exc() + + # Make the more aggressive chapter marking regex optional with the preprocess option to reduce false positives + if getattr(self.extra_opts, 'preprocess_html', None): + if is_pdftohtml: + end_rules.append( + (re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head), + ) if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01: length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor')) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index d57bfddd3e..35a8a1a9bc 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -494,7 +494,7 @@ class HTMLInput(InputFormatPlugin): if not hasattr(self, 'log'): from calibre.utils.logging import default_log self.log = default_log - self.log("********* Preprocessing HTML *********") + self.log("********* Preprocessing HTML - HTML Input plugin *********") # Detect Chapters to match the xpath in the GUI chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE) html = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', html) diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/lit/input.py index 9bf20fb1d4..f7bb0fbfd9 100644 --- a/src/calibre/ebooks/lit/input.py +++ b/src/calibre/ebooks/lit/input.py @@ -11,12 +11,14 @@ import re from calibre.customize.conversion import InputFormatPlugin from calibre.ebooks.conversion.preprocess import line_length + class LITInput(InputFormatPlugin): name = 'LIT Input' author = 'Marshall T. Vandegrift' description = 'Convert LIT files to HTML' file_types = set(['lit']) + html_preprocess_sections = 0 def convert(self, stream, options, file_ext, log, accelerators): @@ -55,14 +57,104 @@ class LITInput(InputFormatPlugin): def preprocess_html(self, html): + + def chapter_head(match): + chap = match.group('chap') + title = match.group('title') + if not title: + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("marked " + str(self.html_preprocess_sections) + " chapters. - " + str(chap)) + return '<h2>'+chap+'</h2>\n' + else: + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("marked " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title)) + return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n' + + def chapter_link(match): + chap = match.group('sectionlink') + if not chap: + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links") + return '<br style="page-break-before:always">' + else: + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links. - " + str(chap)) + return '<br clear="all" style="page-break-before:always">\n<h2>'+chap+'</h2>' + + + def no_markup(raw, percent): + ''' + Detects total marked up line endings in the file. raw is the text to + inspect. Percent is the minimum percent of line endings which should + be marked up to return true. + ''' + htm_end_ere = re.compile('</p>', re.DOTALL) + line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL) + htm_end = htm_end_ere.findall(raw) + line_end = line_end_ere.findall(raw) + tot_htm_ends = len(htm_end) + tot_ln_fds = len(line_end) + self.log("*** There are " + str(tot_ln_fds) + " total Line feeds, and " + str(tot_htm_ends) + " marked endings***") + + if percent > 1: + percent = 1 + if percent < 0: + percent = 0 + + min_lns = tot_ln_fds * percent + self.log("There must be more than " + str(min_lns) + " unmarked lines to be true") + if min_lns > tot_htm_ends: + return True + self.log("********* Preprocessing HTML *********") - # Detect Chapters to match the xpath in the GUI - chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE) - html = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', html) - # Unwrap lines using punctation if the median length of all lines is less than 150 + # remove non-breaking spaces + html = re.sub(ur'\u00a0', ' ', html) + # Get rid of empty <o:p> tags to simplify other processing + html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html) + # Get rid of empty span tags + html = re.sub(r"\s*<span[^>]*>\s*</span>", " ", html) + + # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing + linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE) + blankreg = re.compile(r'\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>', re.IGNORECASE) + blanklines = blankreg.findall(html) + lines = linereg.findall(html) + if len(lines) > 1: + self.log("There are " + str(len(blanklines)) + " blank lines. " + str(float(len(blanklines)) / float(len(lines))) + " percent blank") + if float(len(blanklines)) / float(len(lines)) > 0.40: + self.log("deleting blank lines") + html = blankreg.sub('', html) + # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly + html = re.sub(r"\s*</p>", "</p>\n", html) + + # some lit files don't have any <p> tags or equivalent, check and + # mark up line endings if required before proceeding + if no_markup(html, 0.1): + self.log("not enough paragraph markers, adding now") + add_markup = re.compile('(?<!>)(\n)') + html = add_markup.sub('</p>\n<p>', html) + + # detect chapters/sections to match xpath or splitting logic # - # Insert extra line feeds so the line length regex functions properly - html = re.sub(r"</p>", "</p>\n", html) + # Mark split points based on embedded links + chaplink = re.compile(r'<a\sname[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<sectionlink>[^\s<]+(\s*[^\s<]+){0,4})?\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*</a>', re.IGNORECASE) + html = chaplink.sub(chapter_link, html) + # Continue with alternate patterns, start with most typical chapter headings + if self.html_preprocess_sections < 10: + chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}.?(\d+\.?|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\s*){0,4}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE) + html = chapdetect.sub(chapter_head, html) + if self.html_preprocess_sections < 10: + self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying a more aggressive pattern") + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#]+\s*){1,9}|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE) + html = chapdetect2.sub(chapter_head, html) + + # search for places where a first or second level heading is immediately followed by another + # top level heading. demote the second heading to h3 to prevent splitting between chapter + # headings and titles, images, etc + doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE) + html = doubleheading.sub('\g<firsthead>'+'<h3'+'\g<secondhead>'+'</h3>', html) + # + # Unwrap lines using punctation if the median length of all lines is less than 150 length = line_length('html', html, 0.4) self.log("*** Median length is " + str(length) + " ***") unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py index 487e70c04f..b8dc7a9560 100644 --- a/src/calibre/ebooks/mobi/input.py +++ b/src/calibre/ebooks/mobi/input.py @@ -3,6 +3,7 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' +import re from calibre.customize.conversion import InputFormatPlugin class MOBIInput(InputFormatPlugin): @@ -37,3 +38,12 @@ class MOBIInput(InputFormatPlugin): include_meta_content_type=False)) accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]' return mr.created_opf_path + + def preprocess_html(self, html): + # search for places where a first or second level heading is immediately followed by another + # top level heading. demote the second heading to h3 to prevent splitting between chapter + # headings and titles, images, etc + doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE) + html = doubleheading.sub('\g<firsthead>'+'<h3'+'\g<secondhead>'+'</h3>', html) + return html + diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index 584d631d0b..36848ddb8b 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -408,6 +408,10 @@ class Page(object): # Fraction of text height that two strings' bottoms can differ by # for them to be considered to be part of the same text fragment LINE_FACTOR = 0.4 + + # Percentage of the page heigth which should be considered header + # or footer to be discarded from reflow considerations + HEAD_FOOTER_MARGIN # Multiplies the average line height when determining row height # of a particular element to detect columns. From c9cb61a40e015059716478255ad67aa30716ea6f Mon Sep 17 00:00:00 2001 From: GRiker <griker@hotmail.com> Date: Fri, 10 Sep 2010 13:46:01 -0700 Subject: [PATCH 03/43] GwR jacket work --- src/calibre/ebooks/oeb/transforms/jacket.py | 72 ++++++++++++++------- 1 file changed, 49 insertions(+), 23 deletions(-) diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index fec4d230c3..030067850c 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -6,14 +6,14 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import textwrap +import os, textwrap from xml.sax.saxutils import escape from itertools import repeat from lxml import etree +from calibre import guess_type, strftime from calibre.ebooks.oeb.base import XPath, XPNSMAP -from calibre import guess_type from calibre.library.comments import comments_to_html class Jacket(object): ''' @@ -24,22 +24,18 @@ class Jacket(object): JACKET_TEMPLATE = textwrap.dedent(u'''\ <html xmlns="%(xmlns)s"> <head> - <title>%(title)s + %(title_str)s + -
-
-

%(title)s

-

%(jacket)s

-
%(series)s
-
%(rating)s
-
%(tags)s
-
-
- %(comments)s -
+ +
%(comments)s
''') @@ -71,11 +67,18 @@ class Jacket(object): return ans id, href = self.oeb.manifest.generate('star', 'star.png') self.oeb.manifest.add(id, href, 'image/png', data=I('star.png', data=True)) - ans = 'Rating: ' + ''.join(repeat('star'%href, num)) + ans = 'Rating: ' + ''.join(repeat('star'%href, num)) return ans def insert_metadata(self, mi): self.log('Inserting metadata into book...') + jacket_resources = P("jacket") + + if os.path.isdir(jacket_resources): + stylesheet = os.path.join(jacket_resources, 'stylesheet.css') + with open(stylesheet) as f: + css_data = f.read() + comments = mi.comments if not comments: try: @@ -87,11 +90,13 @@ class Jacket(object): orig_comments = comments if comments: comments = comments_to_html(comments) - series = 'Series: ' + escape(mi.series if mi.series else '') + + series = 'Series: %s' % escape(mi.series if mi.series else '') if mi.series and mi.series_index is not None: - series += escape(' [%s]'%mi.format_series_index()) + series += '%s' % escape(' [%s]'%mi.format_series_index()) if not mi.series: series = '' + tags = mi.tags if not tags: try: @@ -99,23 +104,30 @@ class Jacket(object): except: tags = [] if tags: - tags = 'Tags: ' + self.opts.dest.tags_to_string(tags) + tags = 'Tags:%s' % self.opts.dest.tags_to_string(tags) else: tags = '' + try: - title = mi.title if mi.title else unicode(self.oeb.metadata.title[0]) + title_str = mi.title if mi.title else unicode(self.oeb.metadata.title[0]) except: - title = _('Unknown') + title_str = _('Unknown') + title = '%s (%s)' % (escape(title_str), strftime(u'%Y', mi.pubdate.timetuple())) + def generate_html(comments): return self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'], - title=escape(title), comments=comments, - jacket=escape(_('Book Jacket')), series=series, - tags=tags, rating=self.get_rating(mi.rating)) + title=title, comments=comments, + series=series, + tags=tags, rating=self.get_rating(mi.rating), + css=css_data, title_str=title_str) + id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml') from calibre.ebooks.oeb.base import RECOVER_PARSER, XPath try: root = etree.fromstring(generate_html(comments), parser=RECOVER_PARSER) +# print "root: %s" % etree.tostring(root, encoding='utf-8', +# xml_declaration=True, pretty_print=True) except: root = etree.fromstring(generate_html(escape(orig_comments)), parser=RECOVER_PARSER) @@ -137,8 +149,22 @@ class Jacket(object): def __call__(self, oeb, opts, metadata): + ''' + Add metadata in jacket.xhtml if specifed in opts + If not specified, remove previous jacket instance + ''' self.oeb, self.opts, self.log = oeb, opts, oeb.log if opts.remove_first_image: self.remove_first_image() if opts.insert_metadata: self.insert_metadata(metadata) + else: + jacket = XPath('//h:meta[@name="calibre-content" and @content="jacket"]') + for item in list(self.oeb.spine)[:4]: + if jacket(item.data): + try: + self.log.info("Removing previous jacket instance") + self.oeb.manifest.remove(item) + break + except: + continue From 4c7373026b9ee8a618dccf8602740d6a7d578aa2 Mon Sep 17 00:00:00 2001 From: ldolse Date: Sat, 11 Sep 2010 12:10:49 +1000 Subject: [PATCH 04/43] preprocessing changes for lit & pdf, added utils.py, changed default unwrap_factor --- src/calibre/ebooks/conversion/preprocess.py | 15 ++++++++--- src/calibre/ebooks/conversion/utils.py | 6 +++++ src/calibre/ebooks/lit/input.py | 29 +++++++++++++-------- src/calibre/ebooks/pdf/input.py | 4 +-- 4 files changed, 37 insertions(+), 17 deletions(-) create mode 100644 src/calibre/ebooks/conversion/utils.py diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 2954fd7c26..452a322d95 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -77,6 +77,7 @@ def line_length(format, raw, percent): elif format == 'pdf': linere = re.compile('(?<=
).*?(?=
)', re.DOTALL) lines = linere.findall(raw) + print "percent is " + str(percent) lengths = [] for line in lines: @@ -165,6 +166,11 @@ class HTMLPreProcessor(object): (re.compile(u'`\s*()*\s*I', re.UNICODE), lambda match: u'Ì'), (re.compile(u'`\s*()*\s*a', re.UNICODE), lambda match: u'à'), (re.compile(u'`\s*()*\s*A', re.UNICODE), lambda match: u'À'), + + #(re.compile(u'a\s*()*\s*`', re.UNICODE), lambda match: u'à'), + #(re.compile(u'A\s*()*\s*`', re.UNICODE), lambda match: u'À'), + #(re.compile(u'o\s*()*\s*`', re.UNICODE), lambda match: u'ò'), + #(re.compile(u'O\s*()*\s*`', re.UNICODE), lambda match: u'Ò'), (re.compile(u'´\s*()*\s*o', re.UNICODE), lambda match: u'ó'), (re.compile(u'´\s*()*\s*O', re.UNICODE), lambda match: u'Ó'), @@ -206,13 +212,13 @@ class HTMLPreProcessor(object): # (re.compile(r'
\s*
', re.IGNORECASE), lambda match: '\n

'), # unwrap hyphenation - don't delete the hyphen (often doesn't split words) - (re.compile(r'(?<=[-–])\s*
\s*(?=[[a-z\d])'), lambda match: ''), + (re.compile(u'(?<=[-–—])\s*
\s*(?=[[a-z\d])'), lambda match: ''), # Remove gray background (re.compile(r']+>'), lambda match : ''), # Detect Chapters to match default XPATH in GUI - (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+(\s\w+)?)?\s*(()?)?)\s*(]*>\s*){1,3}\s*(?P(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head), + (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</(i|b)>(</(i|b)>)?)?)\s*(</?(br|p)[^>]*>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head), # Have paragraphs show better (re.compile(r'<br.*?>'), lambda match : '<p>'), @@ -303,15 +309,16 @@ class HTMLPreProcessor(object): if getattr(self.extra_opts, 'preprocess_html', None): if is_pdftohtml: end_rules.append( - (re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head), + (re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?|\d+\.?\s*([\d\w-]+\s*){0,4}\s*)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head), ) if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01: length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor')) if length: + print "The pdf line length returned is " + str(length) end_rules.append( # Un wrap using punctuation - (re.compile(r'(?<=.{%i}[a-z\.,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines), + (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines), ) for rule in self.PREPROCESS + start_rules: diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py new file mode 100644 index 0000000000..52be473372 --- /dev/null +++ b/src/calibre/ebooks/conversion/utils.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' +__docformat__ = 'restructuredtext en' \ No newline at end of file diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/lit/input.py index f7bb0fbfd9..35dad501be 100644 --- a/src/calibre/ebooks/lit/input.py +++ b/src/calibre/ebooks/lit/input.py @@ -102,7 +102,7 @@ class LITInput(InputFormatPlugin): percent = 0 min_lns = tot_ln_fds * percent - self.log("There must be more than " + str(min_lns) + " unmarked lines to be true") + self.log("There must be more than " + str(min_lns) + " unmarked lines to return true") if min_lns > tot_htm_ends: return True @@ -141,24 +141,31 @@ class LITInput(InputFormatPlugin): html = chaplink.sub(chapter_link, html) # Continue with alternate patterns, start with most typical chapter headings if self.html_preprocess_sections < 10: - chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}.?(\d+\.?|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\s*){0,4}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE) + chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}s*(<span[^>]*>)?\s*.?(\d+\.?|Introduction|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</(i|b|u)>){0,2})\s*(</span>)?s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE) html = chapdetect.sub(chapter_head, html) if self.html_preprocess_sections < 10: self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying a more aggressive pattern") chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#]+\s*){1,9}|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE) - html = chapdetect2.sub(chapter_head, html) - + html = chapdetect2.sub(chapter_head, html) + # + # Unwrap lines using punctation if the median length of all lines is less than 150 + length = line_length('html', html, 0.4) + self.log("*** Median line length is " + str(length) + " ***") + unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) + if length < 150: + self.log("Unwrapping Lines") + html = unwrap.sub(' ', html) + # If still no sections after unwrapping lines break on lines with no punctuation + if self.html_preprocess_sections < 10: + self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", splitting based on punctuation") + #self.log(html) + chapdetect3 = re.compile(r'(<p[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?([a-z]+\s*){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</p>)(?P<title>)?', re.IGNORECASE) + html = chapdetect3.sub(chapter_head, html) # search for places where a first or second level heading is immediately followed by another # top level heading. demote the second heading to h3 to prevent splitting between chapter # headings and titles, images, etc doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE) html = doubleheading.sub('\g<firsthead>'+'<h3'+'\g<secondhead>'+'</h3>', html) - # - # Unwrap lines using punctation if the median length of all lines is less than 150 - length = line_length('html', html, 0.4) - self.log("*** Median length is " + str(length) + " ***") - unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) - if length < 150: - html = unwrap.sub(' ', html) + return html diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py index 64a089281e..113c3d99d8 100644 --- a/src/calibre/ebooks/pdf/input.py +++ b/src/calibre/ebooks/pdf/input.py @@ -22,10 +22,10 @@ class PDFInput(InputFormatPlugin): options = set([ OptionRecommendation(name='no_images', recommended_value=False, help=_('Do not extract images from the document')), - OptionRecommendation(name='unwrap_factor', recommended_value=0.5, + OptionRecommendation(name='unwrap_factor', recommended_value=0.45, help=_('Scale used to determine the length at which a line should ' 'be unwrapped. Valid values are a decimal between 0 and 1. The ' - 'default is 0.5, this is the median line length.')), + 'default is 0.45, this is the median line length.')), OptionRecommendation(name='new_pdf_engine', recommended_value=False, help=_('Use the new PDF conversion engine.')) ]) From faf15b2f3d611594352721d4d06407025fea1320 Mon Sep 17 00:00:00 2001 From: ldolse <ldolse@yahoo.com> Date: Sat, 11 Sep 2010 13:09:23 +1000 Subject: [PATCH 05/43] preprocess merge gone wrong, fixing --- src/calibre/ebooks/conversion/preprocess.py | 25 ++++++--------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index e2364d961f..24a389e65c 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -168,7 +168,6 @@ class HTMLPreProcessor(object): (re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'), (re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'), (re.compile(u'`\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ù'), -<<<<<<< TREE (re.compile(u'`\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'è'), (re.compile(u'`\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'È'), (re.compile(u'`\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'ì'), @@ -176,13 +175,6 @@ class HTMLPreProcessor(object): (re.compile(u'`\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'à'), (re.compile(u'`\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'À'), - #(re.compile(u'a\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'à'), - #(re.compile(u'A\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'À'), - #(re.compile(u'o\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ò'), - #(re.compile(u'O\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ò'), -======= ->>>>>>> MERGE-SOURCE - # ´ (re.compile(u'´\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'á'), (re.compile(u'´\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Á'), @@ -218,14 +210,7 @@ class HTMLPreProcessor(object): # ¸ (re.compile(u'¸\s*(<br.*?>)*\s*c', re.UNICODE), lambda match: u'ç'), (re.compile(u'¸\s*(<br.*?>)*\s*C', re.UNICODE), lambda match: u'Ç'), - -<<<<<<< TREE - # If pdf printed from a browser then the header/footer has a reliable pattern - (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''), - - # Center separator lines - (re.compile(u'<br>\s*(?P<break>([*#•]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'), -======= + # ˛ (re.compile(u'˛\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ą'), (re.compile(u'˛\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ą'), @@ -235,8 +220,12 @@ class HTMLPreProcessor(object): # ˙ (re.compile(u'˙\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ż'), (re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'), - ->>>>>>> MERGE-SOURCE + + # If pdf printed from a browser then the header/footer has a reliable pattern + (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''), + + # Center separator lines + (re.compile(u'<br>\s*(?P<break>([*#•]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'), # Remove page links (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''), From 2a906184ad4c56d3018806c03bf2647bd8ecc242 Mon Sep 17 00:00:00 2001 From: ldolse <ldolse@yahoo.com> Date: Sat, 11 Sep 2010 13:17:21 +1000 Subject: [PATCH 06/43] preprocess merge gone wrong, merged original accent code back --- src/calibre/ebooks/conversion/preprocess.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 24a389e65c..f2b19efa9b 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -168,13 +168,7 @@ class HTMLPreProcessor(object): (re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'), (re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'), (re.compile(u'`\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ù'), - (re.compile(u'`\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'è'), - (re.compile(u'`\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'È'), - (re.compile(u'`\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'ì'), - (re.compile(u'`\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ì'), - (re.compile(u'`\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'à'), - (re.compile(u'`\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'À'), - + # ´ (re.compile(u'´\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'á'), (re.compile(u'´\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Á'), @@ -210,7 +204,7 @@ class HTMLPreProcessor(object): # ¸ (re.compile(u'¸\s*(<br.*?>)*\s*c', re.UNICODE), lambda match: u'ç'), (re.compile(u'¸\s*(<br.*?>)*\s*C', re.UNICODE), lambda match: u'Ç'), - + # ˛ (re.compile(u'˛\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ą'), (re.compile(u'˛\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ą'), @@ -221,6 +215,7 @@ class HTMLPreProcessor(object): (re.compile(u'˙\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ż'), (re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'), + # If pdf printed from a browser then the header/footer has a reliable pattern (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''), From 480eccb0b0c3921fd356d329e6d601b9207c2d26 Mon Sep 17 00:00:00 2001 From: ldolse <ldolse@yahoo.com> Date: Sat, 11 Sep 2010 15:33:10 +1000 Subject: [PATCH 07/43] Fixed unwrapping for various hyphen and dash types, other minor tweaks to pdf --- src/calibre/ebooks/conversion/preprocess.py | 28 ++++++++++++++------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index f2b19efa9b..c120f0a560 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -77,7 +77,6 @@ def line_length(format, raw, percent): elif format == 'pdf': linere = re.compile('(?<=<br>).*?(?=<br>)', re.DOTALL) lines = linere.findall(raw) - print "percent is " + str(percent) lengths = [] for line in lines: @@ -230,14 +229,17 @@ class HTMLPreProcessor(object): # (re.compile(r'<br>\s*<br>', re.IGNORECASE), lambda match: '\n<p>'), # unwrap hyphenation - don't delete the hyphen (often doesn't split words) - (re.compile(u'(?<=[-–—])\s*<br>\s*(?=[[a-z\d])'), lambda match: ''), + #(re.compile(u'(?<=[-–—])\s*<br>\s*(?=[[a-z\d])'), lambda match: ''), + # unwrap/delete soft hyphens + #(re.compile(u'[­]\s*<br>\s*(?=[[a-z\d])'), lambda match: ''), # Remove gray background (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'), # Detect Chapters to match default XPATH in GUI (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</(i|b)>(</(i|b)>)?)?)\s*(</?(br|p)[^>]*>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head), - + (re.compile(r'<br\s*/?>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?'), chap_head), + # Have paragraphs show better (re.compile(r'<br.*?>'), lambda match : '<p>'), # Clean up spaces @@ -322,21 +324,29 @@ class HTMLPreProcessor(object): import traceback print 'Failed to parse remove_footer regexp' traceback.print_exc() + + # unwrap hyphenation - moved here so it's executed after header/footer removal + if is_pdftohtml: + # unwrap visible dashes and hyphens - don't delete as 50% or more of the time these + # hyphens are for compound words, formatting, etc + end_rules.append((re.compile(u'(?<=[-–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: '')) + # unwrap/delete soft hyphens + end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: '')) + # unwrap/delete soft hyphens with formatting + end_rules.append((re.compile(u'[­]\s*(</(i|u|b)>)+(\s*<p>)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: '')) # Make the more aggressive chapter marking regex optional with the preprocess option to reduce false positives if getattr(self.extra_opts, 'preprocess_html', None): if is_pdftohtml: - end_rules.append( - (re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?|\d+\.?\s*([\d\w-]+\s*){0,4}\s*)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head), - ) - + end_rules.append((re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?|\d+\.?\s*([\d\w-]+\s*){0,4}\s*)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head)) + if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01: length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor')) if length: - print "The pdf line length returned is " + str(length) + # print "The pdf line length returned is " + str(length) end_rules.append( # Un wrap using punctuation - (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines), + (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines), ) for rule in self.PREPROCESS + start_rules: From cf7cc4de4d9b9fa5e4b22c5ce2cb63c099165589 Mon Sep 17 00:00:00 2001 From: ldolse <ldolse@yahoo.com> Date: Sat, 11 Sep 2010 21:02:44 +1000 Subject: [PATCH 08/43] preprocess updates for lit, html, and pdf --- src/calibre/ebooks/conversion/preprocess.py | 8 -- src/calibre/ebooks/conversion/utils.py | 122 +++++++++++++++++++- src/calibre/ebooks/html/input.py | 20 +--- src/calibre/ebooks/lit/input.py | 117 +------------------ src/calibre/ebooks/pdb/pdf/reader.py | 2 +- src/calibre/ebooks/pdf/input.py | 2 +- 6 files changed, 129 insertions(+), 142 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index c120f0a560..6123577191 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -214,7 +214,6 @@ class HTMLPreProcessor(object): (re.compile(u'˙\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ż'), (re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'), - # If pdf printed from a browser then the header/footer has a reliable pattern (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''), @@ -225,13 +224,6 @@ class HTMLPreProcessor(object): (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''), # Remove <hr> tags (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'), - # Replace <br><br> with <p> - # (re.compile(r'<br>\s*<br>', re.IGNORECASE), lambda match: '\n<p>'), - - # unwrap hyphenation - don't delete the hyphen (often doesn't split words) - #(re.compile(u'(?<=[-–—])\s*<br>\s*(?=[[a-z\d])'), lambda match: ''), - # unwrap/delete soft hyphens - #(re.compile(u'[­]\s*<br>\s*(?=[[a-z\d])'), lambda match: ''), # Remove gray background (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'), diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 52be473372..68cebb3a11 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -3,4 +3,124 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' -__docformat__ = 'restructuredtext en' \ No newline at end of file +__docformat__ = 'restructuredtext en' + +import re +from calibre.ebooks.conversion.preprocess import line_length +from calibre.utils.logging import default_log +from lxml import etree + +class PreProcessor(object): + html_preprocess_sections = 0 + + def __init__(self, args): + self.args = args + self.log = default_log + + def chapter_head(self, match): + chap = match.group('chap') + title = match.group('title') + if not title: + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("marked " + str(self.html_preprocess_sections) + " chapters. - " + str(chap)) + return '<h2>'+chap+'</h2>\n' + else: + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("marked " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title)) + return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n' + + def chapter_link(self, match): + chap = match.group('sectionlink') + if not chap: + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links") + return '<br style="page-break-before:always">' + else: + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links. - " + str(chap)) + return '<br clear="all" style="page-break-before:always">\n<h2>'+chap+'</h2>' + + def no_markup(self, raw, percent): + ''' + Detects total marked up line endings in the file. raw is the text to + inspect. Percent is the minimum percent of line endings which should + be marked up to return true. + ''' + htm_end_ere = re.compile('</p>', re.DOTALL) + line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL) + htm_end = htm_end_ere.findall(raw) + line_end = line_end_ere.findall(raw) + tot_htm_ends = len(htm_end) + tot_ln_fds = len(line_end) + self.log("*** There are " + str(tot_ln_fds) + " total Line feeds, and " + str(tot_htm_ends) + " marked endings***") + + if percent > 1: + percent = 1 + if percent < 0: + percent = 0 + + min_lns = tot_ln_fds * percent + self.log("There must be fewer than " + str(min_lns) + " unmarked lines to return true") + if min_lns > tot_htm_ends: + return True + + def __call__(self, html): + self.log("********* Preprocessing HTML *********") + # remove non-breaking spaces + html = re.sub(ur'\u00a0', ' ', html) + # Get rid of empty <o:p> tags to simplify other processing + html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html) + # Get rid of empty span tags + html = re.sub(r"\s*<span[^>]*>\s*</span>", " ", html) + + # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing + linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE) + blankreg = re.compile(r'\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>', re.IGNORECASE) + blanklines = blankreg.findall(html) + lines = linereg.findall(html) + if len(lines) > 1: + self.log("There are " + str(len(blanklines)) + " blank lines. " + str(float(len(blanklines)) / float(len(lines))) + " percent blank") + if float(len(blanklines)) / float(len(lines)) > 0.40: + self.log("deleting blank lines") + html = blankreg.sub('', html) + # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly + html = re.sub(r"\s*</p>", "</p>\n", html) + html = re.sub(r"\s*<p>\s*", "\n<p>", html) + + # some lit files don't have any <p> tags or equivalent, check and + # mark up line endings if required before proceeding + if self.no_markup(html, 0.1): + self.log("not enough paragraph markers, adding now") + add_markup = re.compile('(?<!>)(\n)') + html = add_markup.sub('</p>\n<p>', html) + + # detect chapters/sections to match xpath or splitting logic + # + # Start with most typical chapter headings + chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}s*(<span[^>]*>)?\s*.?(Introduction|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</(i|b|u)>){0,2})\s*(</span>)?s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE) + html = chapdetect.sub(self.chapter_head, html) + if self.html_preprocess_sections < 10: + self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying a more aggressive pattern") + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#-]+\s*){1,9}|\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE) + html = chapdetect2.sub(self.chapter_head, html) + # + # Unwrap lines using punctation if the median length of all lines is less than 200 + length = line_length('html', html, 0.4) + self.log("*** Median line length is " + str(length) + " ***") + unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) + if length < 200: + self.log("Unwrapping Lines") + html = unwrap.sub(' ', html) + # If still no sections after unwrapping lines break on lines with no punctuation + if self.html_preprocess_sections < 10: + self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", splitting based on punctuation") + #self.log(html) + chapdetect3 = re.compile(r'(<p[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?([a-z]+\s*){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</p>)(?P<title>)?', re.IGNORECASE) + html = chapdetect3.sub(self.chapter_head, html) + # search for places where a first or second level heading is immediately followed by another + # top level heading. demote the second heading to h3 to prevent splitting between chapter + # headings and titles, images, etc + doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE) + html = doubleheading.sub('\g<firsthead>'+'<h3'+'\g<secondhead>'+'</h3>', html) + + return html \ No newline at end of file diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 35a8a1a9bc..e83216ae1f 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -24,7 +24,7 @@ from calibre.constants import islinux, isfreebsd, iswindows from calibre import unicode_path from calibre.utils.localization import get_lang from calibre.utils.filenames import ascii_filename -from calibre.ebooks.conversion.preprocess import line_length +from calibre.ebooks.conversion.utils import PreProcessor class Link(object): ''' @@ -491,20 +491,6 @@ class HTMLInput(InputFormatPlugin): return (None, raw) def preprocess_html(self, html): - if not hasattr(self, 'log'): - from calibre.utils.logging import default_log - self.log = default_log - self.log("********* Preprocessing HTML - HTML Input plugin *********") - # Detect Chapters to match the xpath in the GUI - chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE) - html = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', html) - # Unwrap lines using punctation if the median length of all lines is less than 150 - # - # Insert extra line feeds so the line length regex functions properly - html = re.sub(r"</p>", "</p>\n", html) - length = line_length('html', html, 0.4) - self.log.debug("*** Median length is " + str(length) + " ***") - unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) - if length < 150: - html = unwrap.sub(' ', html) + preprocessor = PreProcessor(html) + html = preprocessor(html) return html diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/lit/input.py index 35dad501be..58e7bc84bf 100644 --- a/src/calibre/ebooks/lit/input.py +++ b/src/calibre/ebooks/lit/input.py @@ -6,10 +6,8 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import re - from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.conversion.preprocess import line_length +from calibre.ebooks.conversion.utils import PreProcessor class LITInput(InputFormatPlugin): @@ -18,7 +16,6 @@ class LITInput(InputFormatPlugin): author = 'Marshall T. Vandegrift' description = 'Convert LIT files to HTML' file_types = set(['lit']) - html_preprocess_sections = 0 def convert(self, stream, options, file_ext, log, accelerators): @@ -57,115 +54,7 @@ class LITInput(InputFormatPlugin): def preprocess_html(self, html): - - def chapter_head(match): - chap = match.group('chap') - title = match.group('title') - if not title: - self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log("marked " + str(self.html_preprocess_sections) + " chapters. - " + str(chap)) - return '<h2>'+chap+'</h2>\n' - else: - self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log("marked " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title)) - return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n' - - def chapter_link(match): - chap = match.group('sectionlink') - if not chap: - self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links") - return '<br style="page-break-before:always">' - else: - self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links. - " + str(chap)) - return '<br clear="all" style="page-break-before:always">\n<h2>'+chap+'</h2>' - - - def no_markup(raw, percent): - ''' - Detects total marked up line endings in the file. raw is the text to - inspect. Percent is the minimum percent of line endings which should - be marked up to return true. - ''' - htm_end_ere = re.compile('</p>', re.DOTALL) - line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL) - htm_end = htm_end_ere.findall(raw) - line_end = line_end_ere.findall(raw) - tot_htm_ends = len(htm_end) - tot_ln_fds = len(line_end) - self.log("*** There are " + str(tot_ln_fds) + " total Line feeds, and " + str(tot_htm_ends) + " marked endings***") - - if percent > 1: - percent = 1 - if percent < 0: - percent = 0 - - min_lns = tot_ln_fds * percent - self.log("There must be more than " + str(min_lns) + " unmarked lines to return true") - if min_lns > tot_htm_ends: - return True - - self.log("********* Preprocessing HTML *********") - # remove non-breaking spaces - html = re.sub(ur'\u00a0', ' ', html) - # Get rid of empty <o:p> tags to simplify other processing - html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html) - # Get rid of empty span tags - html = re.sub(r"\s*<span[^>]*>\s*</span>", " ", html) - - # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing - linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE) - blankreg = re.compile(r'\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>', re.IGNORECASE) - blanklines = blankreg.findall(html) - lines = linereg.findall(html) - if len(lines) > 1: - self.log("There are " + str(len(blanklines)) + " blank lines. " + str(float(len(blanklines)) / float(len(lines))) + " percent blank") - if float(len(blanklines)) / float(len(lines)) > 0.40: - self.log("deleting blank lines") - html = blankreg.sub('', html) - # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly - html = re.sub(r"\s*</p>", "</p>\n", html) - - # some lit files don't have any <p> tags or equivalent, check and - # mark up line endings if required before proceeding - if no_markup(html, 0.1): - self.log("not enough paragraph markers, adding now") - add_markup = re.compile('(?<!>)(\n)') - html = add_markup.sub('</p>\n<p>', html) - - # detect chapters/sections to match xpath or splitting logic - # - # Mark split points based on embedded links - chaplink = re.compile(r'<a\sname[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<sectionlink>[^\s<]+(\s*[^\s<]+){0,4})?\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*</a>', re.IGNORECASE) - html = chaplink.sub(chapter_link, html) - # Continue with alternate patterns, start with most typical chapter headings - if self.html_preprocess_sections < 10: - chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}s*(<span[^>]*>)?\s*.?(\d+\.?|Introduction|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</(i|b|u)>){0,2})\s*(</span>)?s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE) - html = chapdetect.sub(chapter_head, html) - if self.html_preprocess_sections < 10: - self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying a more aggressive pattern") - chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#]+\s*){1,9}|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE) - html = chapdetect2.sub(chapter_head, html) - # - # Unwrap lines using punctation if the median length of all lines is less than 150 - length = line_length('html', html, 0.4) - self.log("*** Median line length is " + str(length) + " ***") - unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) - if length < 150: - self.log("Unwrapping Lines") - html = unwrap.sub(' ', html) - # If still no sections after unwrapping lines break on lines with no punctuation - if self.html_preprocess_sections < 10: - self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", splitting based on punctuation") - #self.log(html) - chapdetect3 = re.compile(r'(<p[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?([a-z]+\s*){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</p>)(?P<title>)?', re.IGNORECASE) - html = chapdetect3.sub(chapter_head, html) - # search for places where a first or second level heading is immediately followed by another - # top level heading. demote the second heading to h3 to prevent splitting between chapter - # headings and titles, images, etc - doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE) - html = doubleheading.sub('\g<firsthead>'+'<h3'+'\g<secondhead>'+'</h3>', html) - + preprocessor = PreProcessor(html) + html = preprocessor(html) return html diff --git a/src/calibre/ebooks/pdb/pdf/reader.py b/src/calibre/ebooks/pdb/pdf/reader.py index 3ae9f8ccca..c151551866 100644 --- a/src/calibre/ebooks/pdb/pdf/reader.py +++ b/src/calibre/ebooks/pdb/pdf/reader.py @@ -21,7 +21,7 @@ class Reader(FormatReader): self.options = options setattr(self.options, 'new_pdf_engine', False) setattr(self.options, 'no_images', False) - setattr(self.options, 'unwrap_factor', 0.5) + setattr(self.options, 'unwrap_factor', 0.45) def extract_content(self, output_dir): self.log.info('Extracting PDF...') diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py index 113c3d99d8..14b3552b04 100644 --- a/src/calibre/ebooks/pdf/input.py +++ b/src/calibre/ebooks/pdf/input.py @@ -25,7 +25,7 @@ class PDFInput(InputFormatPlugin): OptionRecommendation(name='unwrap_factor', recommended_value=0.45, help=_('Scale used to determine the length at which a line should ' 'be unwrapped. Valid values are a decimal between 0 and 1. The ' - 'default is 0.45, this is the median line length.')), + 'default is 0.45, just below the median line length.')), OptionRecommendation(name='new_pdf_engine', recommended_value=False, help=_('Use the new PDF conversion engine.')) ]) From f6de0bef13d7d1001b951d465cff3135aad616ed Mon Sep 17 00:00:00 2001 From: ldolse <ldolse@yahoo.com> Date: Sat, 11 Sep 2010 22:15:09 +1000 Subject: [PATCH 09/43] replaced messed up rtf file --- src/calibre/ebooks/rtf/preprocess.py | 624 +++++++++++++-------------- 1 file changed, 289 insertions(+), 335 deletions(-) diff --git a/src/calibre/ebooks/rtf/preprocess.py b/src/calibre/ebooks/rtf/preprocess.py index ee45da697f..a3076651fd 100644 --- a/src/calibre/ebooks/rtf/preprocess.py +++ b/src/calibre/ebooks/rtf/preprocess.py @@ -1,390 +1,344 @@ #!/usr/bin/env python # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement __license__ = 'GPL v3' -__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' +__copyright__ = '2010, Gerendi Sandor Attila' __docformat__ = 'restructuredtext en' -import functools, re +""" +RTF tokenizer and token parser. v.1.0 (1/17/2010) +Author: Gerendi Sandor Attila -from calibre import entity_to_unicode +At this point this will tokenize a RTF file then rebuild it from the tokens. +In the process the UTF8 tokens are altered to be supported by the RTF2XML and also remain RTF specification compilant. +""" -XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>') -SVG_NS = 'http://www.w3.org/2000/svg' -XLINK_NS = 'http://www.w3.org/1999/xlink' +class tokenDelimitatorStart(): + def __init__(self): + pass + def toRTF(self): + return b'{' + def __repr__(self): + return '{' -convert_entities = functools.partial(entity_to_unicode, - result_exceptions = { - u'<' : '<', - u'>' : '>', - u"'" : ''', - u'"' : '"', - u'&' : '&', - }) -_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE) +class tokenDelimitatorEnd(): + def __init__(self): + pass + def toRTF(self): + return b'}' + def __repr__(self): + return '}' -LIGATURES = { -# u'\u00c6': u'AE', -# u'\u00e6': u'ae', -# u'\u0152': u'OE', -# u'\u0153': u'oe', -# u'\u0132': u'IJ', -# u'\u0133': u'ij', -# u'\u1D6B': u'ue', - u'\uFB00': u'ff', - u'\uFB01': u'fi', - u'\uFB02': u'fl', - u'\uFB03': u'ffi', - u'\uFB04': u'ffl', - u'\uFB05': u'ft', - u'\uFB06': u'st', - } +class tokenControlWord(): + def __init__(self, name, separator = ''): + self.name = name + self.separator = separator + def toRTF(self): + return self.name + self.separator + def __repr__(self): + return self.name + self.separator -_ligpat = re.compile(u'|'.join(LIGATURES)) +class tokenControlWordWithNumericArgument(): + def __init__(self, name, argument, separator = ''): + self.name = name + self.argument = argument + self.separator = separator + def toRTF(self): + return self.name + repr(self.argument) + self.separator + def __repr__(self): + return self.name + repr(self.argument) + self.separator -def sanitize_head(match): - x = match.group(1) - x = _span_pat.sub('', x) - return '<head>\n%s\n</head>' % x +class tokenControlSymbol(): + def __init__(self, name): + self.name = name + def toRTF(self): + return self.name + def __repr__(self): + return self.name -def chap_head(match): - chap = match.group('chap') - title = match.group('title') - if not title: - return '<h1>'+chap+'</h1><br/>\n' - else: - return '<h1>'+chap+'</h1>\n<h3>'+title+'</h3>\n' +class tokenData(): + def __init__(self, data): + self.data = data + def toRTF(self): + return self.data + def __repr__(self): + return self.data -def wrap_lines(match): - ital = match.group('ital') - if not ital: - return ' ' - else: - return ital+' ' +class tokenBinN(): + def __init__(self, data, separator = ''): + self.data = data + self.separator = separator + def toRTF(self): + return "\\bin" + repr(len(self.data)) + self.separator + self.data + def __repr__(self): + return "\\bin" + repr(len(self.data)) + self.separator + self.data + +class token8bitChar(): + def __init__(self, data): + self.data = data + def toRTF(self): + return "\\'" + self.data + def __repr__(self): + return "\\'" + self.data + +class tokenUnicode(): + def __init__(self, data, separator = '', current_ucn = 1, eqList = []): + self.data = data + self.separator = separator + self.current_ucn = current_ucn + self.eqList = eqList + def toRTF(self): + result = '\\u' + repr(self.data) + ' ' + ucn = self.current_ucn + if len(self.eqList) < ucn: + ucn = len(self.eqList) + result = tokenControlWordWithNumericArgument('\\uc', ucn).toRTF() + result + i = 0 + for eq in self.eqList: + if i >= ucn: + break + result = result + eq.toRTF() + return result + def __repr__(self): + return '\\u' + repr(self.data) -def line_length(format, raw, percent): - ''' - raw is the raw text to find the line length to use for wrapping. - percentage is a decimal number, 0 - 1 which is used to determine - how far in the list of line lengths to use. The list of line lengths is - ordered smallest to larged and does not include duplicates. 0.5 is the - median value. - ''' - raw = raw.replace(' ', ' ') - if format == 'html': - linere = re.compile('(?<=<p).*?(?=</p>)', re.DOTALL) - elif format == 'pdf': - linere = re.compile('(?<=<br>).*?(?=<br>)', re.DOTALL) - lines = linere.findall(raw) - print "percent is " + str(percent) +def isAsciiLetter(value): + return ((value >= 'a') and (value <= 'z')) or ((value >= 'A') and (value <= 'Z')) - lengths = [] - for line in lines: - if len(line) > 0: - lengths.append(len(line)) +def isDigit(value): + return (value >= '0') and (value <= '9') - if not lengths: - return 0 +def isChar(value, char): + return value == char - lengths = list(set(lengths)) - total = sum(lengths) - avg = total / len(lengths) - max_line = avg * 2 - - lengths = sorted(lengths) - for i in range(len(lengths) - 1, -1, -1): - if lengths[i] > max_line: - del lengths[i] - - if percent > 1: - percent = 1 - if percent < 0: - percent = 0 - - index = int(len(lengths) * percent) - 1 - - return lengths[index] +def isString(buffer, string): + return buffer == string -class CSSPreProcessor(object): +class RtfTokenParser(): + def __init__(self, tokens): + self.tokens = tokens + self.process() + self.processUnicode() - PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}') + def process(self): + i = 0 + newTokens = [] + while i < len(self.tokens): + if isinstance(self.tokens[i], tokenControlSymbol): + if isString(self.tokens[i].name, "\\'"): + i = i + 1 + if not isinstance(self.tokens[i], tokenData): + raise Exception('Error: token8bitChar without data.') + if len(self.tokens[i].data) < 2: + raise Exception('Error: token8bitChar without data.') + newTokens.append(token8bitChar(self.tokens[i].data[0:2])) + if len(self.tokens[i].data) > 2: + newTokens.append(tokenData(self.tokens[i].data[2:])) + i = i + 1 + continue - def __call__(self, data, add_namespace=False): - from calibre.ebooks.oeb.base import XHTML_CSS_NAMESPACE - data = self.PAGE_PAT.sub('', data) - if not add_namespace: - return data - ans, namespaced = [], False - for line in data.splitlines(): - ll = line.lstrip() - if not (namespaced or ll.startswith('@import') or - ll.startswith('@charset')): - ans.append(XHTML_CSS_NAMESPACE.strip()) - namespaced = True - ans.append(line) + newTokens.append(self.tokens[i]) + i = i + 1 - return u'\n'.join(ans) + self.tokens = list(newTokens) -class HTMLPreProcessor(object): + def processUnicode(self): + i = 0 + newTokens = [] + ucNbStack = [1] + while i < len(self.tokens): + if isinstance(self.tokens[i], tokenDelimitatorStart): + ucNbStack.append(ucNbStack[len(ucNbStack) - 1]) + newTokens.append(self.tokens[i]) + i = i + 1 + continue + if isinstance(self.tokens[i], tokenDelimitatorEnd): + ucNbStack.pop() + newTokens.append(self.tokens[i]) + i = i + 1 + continue + if isinstance(self.tokens[i], tokenControlWordWithNumericArgument): + if isString(self.tokens[i].name, '\\uc'): + ucNbStack[len(ucNbStack) - 1] = self.tokens[i].argument + newTokens.append(self.tokens[i]) + i = i + 1 + continue + if isString(self.tokens[i].name, '\\u'): + x = i + j = 0 + i = i + 1 + replace = [] + partialData = None + ucn = ucNbStack[len(ucNbStack) - 1] + while (i < len(self.tokens)) and (j < ucn): + if isinstance(self.tokens[i], tokenDelimitatorStart): + break + if isinstance(self.tokens[i], tokenDelimitatorEnd): + break + if isinstance(self.tokens[i], tokenData): + if len(self.tokens[i].data) >= ucn - j: + replace.append(tokenData(self.tokens[i].data[0 : ucn - j])) + if len(self.tokens[i].data) > ucn - j: + partialData = tokenData(self.tokens[i].data[ucn - j:]) + i = i + 1 + break + else: + replace.append(self.tokens[i]) + j = j + len(self.tokens[i].data) + i = i + 1 + continue + if isinstance(self.tokens[i], token8bitChar) or isinstance(self.tokens[i], tokenBinN): + replace.append(self.tokens[i]) + i = i + 1 + j = j + 1 + continue + raise Exception('Error: incorect utf replacement.') - PREPROCESS = [ - # Some idiotic HTML generators (Frontpage I'm looking at you) - # Put all sorts of crap into <head>. This messes up lxml - (re.compile(r'<head[^>]*>\n*(.*?)\n*</head>', re.IGNORECASE|re.DOTALL), - sanitize_head), - # Convert all entities, since lxml doesn't handle them well - (re.compile(r'&(\S+?);'), convert_entities), - # Remove the <![if/endif tags inserted by everybody's darling, MS Word - (re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE), - lambda match: ''), - ] + #calibre rtf2xml does not support utfreplace + replace = [] - # Fix pdftohtml markup - PDFTOHTML = [ - # Fix umlauts - # ¨ - (re.compile(u'¨\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ä'), - (re.compile(u'¨\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ä'), - (re.compile(u'¨\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ë'), - (re.compile(u'¨\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'Ë'), - (re.compile(u'¨\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'ï'), - (re.compile(u'¨\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ï'), - (re.compile(u'¨\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ö'), - (re.compile(u'¨\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ö'), - (re.compile(u'¨\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ü'), - (re.compile(u'¨\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ü'), + newTokens.append(tokenUnicode(self.tokens[x].argument, self.tokens[x].separator, ucNbStack[len(ucNbStack) - 1], replace)) + if partialData != None: + newTokens.append(partialData) + continue - # Fix accents - # ` - (re.compile(u'`\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'à'), - (re.compile(u'`\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'À'), - (re.compile(u'`\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'è'), - (re.compile(u'`\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'È'), - (re.compile(u'`\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'ì'), - (re.compile(u'`\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ì'), - (re.compile(u'`\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ò'), - (re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'), - (re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'), - (re.compile(u'`\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ù'), + newTokens.append(self.tokens[i]) + i = i + 1 - # ´ - (re.compile(u'´\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'á'), - (re.compile(u'´\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Á'), - (re.compile(u'´\s*(<br.*?>)*\s*c', re.UNICODE), lambda match: u'ć'), - (re.compile(u'´\s*(<br.*?>)*\s*C', re.UNICODE), lambda match: u'Ć'), - (re.compile(u'´\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'é'), - (re.compile(u'´\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'É'), - (re.compile(u'´\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'í'), - (re.compile(u'´\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Í'), - (re.compile(u'´\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ó'), - (re.compile(u'´\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ó'), - (re.compile(u'´\s*(<br.*?>)*\s*n', re.UNICODE), lambda match: u'ń'), - (re.compile(u'´\s*(<br.*?>)*\s*N', re.UNICODE), lambda match: u'Ń'), - (re.compile(u'´\s*(<br.*?>)*\s*s', re.UNICODE), lambda match: u'ś'), - (re.compile(u'´\s*(<br.*?>)*\s*S', re.UNICODE), lambda match: u'Ś'), - (re.compile(u'´\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ú'), - (re.compile(u'´\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ú'), - (re.compile(u'´\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ź'), - (re.compile(u'´\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ź'), + self.tokens = list(newTokens) - # ˆ - (re.compile(u'ˆ\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'â'), - (re.compile(u'ˆ\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Â'), - (re.compile(u'ˆ\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ê'), - (re.compile(u'ˆ\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'Ê'), - (re.compile(u'ˆ\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'î'), - (re.compile(u'ˆ\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Î'), - (re.compile(u'ˆ\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ô'), - (re.compile(u'ˆ\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ô'), - (re.compile(u'ˆ\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'û'), - (re.compile(u'ˆ\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Û'), - # ¸ - (re.compile(u'¸\s*(<br.*?>)*\s*c', re.UNICODE), lambda match: u'ç'), - (re.compile(u'¸\s*(<br.*?>)*\s*C', re.UNICODE), lambda match: u'Ç'), + def toRTF(self): + result = [] + for token in self.tokens: + result.append(token.toRTF()) + return "".join(result) - # ˛ - (re.compile(u'˛\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ą'), - (re.compile(u'˛\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ą'), - (re.compile(u'˛\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ę'), - (re.compile(u'˛\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'Ę'), - - # ˙ - (re.compile(u'˙\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ż'), - (re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'), - - # If pdf printed from a browser then the header/footer has a reliable pattern - (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''), +class RtfTokenizer(): + def __init__(self, rtfData): + self.rtfData = [] + self.tokens = [] + self.rtfData = rtfData + self.tokenize() - # Center separator lines - (re.compile(u'<br>\s*(?P<break>([*#•]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'), + def tokenize(self): + i = 0 + lastDataStart = -1 + while i < len(self.rtfData): - # Remove page links - (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''), - # Remove <hr> tags - (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'), - # Replace <br><br> with <p> - # (re.compile(r'<br>\s*<br>', re.IGNORECASE), lambda match: '\n<p>'), + if isChar(self.rtfData[i], '{'): + if lastDataStart > -1: + self.tokens.append(tokenData(self.rtfData[lastDataStart : i])) + lastDataStart = -1 + self.tokens.append(tokenDelimitatorStart()) + i = i + 1 + continue - # unwrap hyphenation - don't delete the hyphen (often doesn't split words) - (re.compile(u'(?<=[-–—])\s*<br>\s*(?=[[a-z\d])'), lambda match: ''), + if isChar(self.rtfData[i], '}'): + if lastDataStart > -1: + self.tokens.append(tokenData(self.rtfData[lastDataStart : i])) + lastDataStart = -1 + self.tokens.append(tokenDelimitatorEnd()) + i = i + 1 + continue - # Remove gray background - (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'), + if isChar(self.rtfData[i], '\\'): + if i + 1 >= len(self.rtfData): + raise Exception('Error: Control character found at the end of the document.') - # Detect Chapters to match default XPATH in GUI - (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</(i|b)>(</(i|b)>)?)?)\s*(</?(br|p)[^>]*>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head), + if lastDataStart > -1: + self.tokens.append(tokenData(self.rtfData[lastDataStart : i])) + lastDataStart = -1 - # Have paragraphs show better - (re.compile(r'<br.*?>'), lambda match : '<p>'), - # Clean up spaces - (re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '), - # Add space before and after italics - (re.compile(u'(?<!“)<i>'), lambda match: ' <i>'), - (re.compile(r'</i>(?=\w)'), lambda match: '</i> '), - - ] + tokenStart = i + i = i + 1 - # Fix Book Designer markup - BOOK_DESIGNER = [ - # HR - (re.compile('<hr>', re.IGNORECASE), - lambda match : '<span style="page-break-after:always"> </span>'), - # Create header tags - (re.compile('<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE), - lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))), - (re.compile('<h2[^><]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE), - lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))), - (re.compile('<span[^><]*?id=title[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), - lambda match : '<h2 class="title">%s</h2>'%(match.group(1),)), - (re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), - lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)), - ] - def __init__(self, input_plugin_preprocess, plugin_preprocess, - extra_opts=None): - self.input_plugin_preprocess = input_plugin_preprocess - self.plugin_preprocess = plugin_preprocess - self.extra_opts = extra_opts + #Control Words + if isAsciiLetter(self.rtfData[i]): + #consume <ASCII Letter Sequence> + consumed = False + while i < len(self.rtfData): + if not isAsciiLetter(self.rtfData[i]): + tokenEnd = i + consumed = True + break + i = i + 1 - def is_baen(self, src): - return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"', - re.IGNORECASE).search(src) is not None + if not consumed: + raise Exception('Error (at:%d): Control Word without end.'%(tokenStart)) - def is_book_designer(self, raw): - return re.search('<H2[^><]*id=BookTitle', raw) is not None + #we have numeric argument before delimiter + if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]): + #consume the numeric argument + consumed = False + l = 0 + while i < len(self.rtfData): + if not isDigit(self.rtfData[i]): + consumed = True + break + l = l + 1 + i = i + 1 + if l > 10 : + raise Exception('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart]) - def is_pdftohtml(self, src): - return '<!-- created by calibre\'s pdftohtml -->' in src[:1000] + if not consumed: + raise Exception('Error (at:%d): Control Word without numeric argument end.'%[tokenStart]) - def __call__(self, html, remove_special_chars=None, - get_preprocess_html=False): - if remove_special_chars is not None: - html = remove_special_chars.sub('', html) - html = html.replace('\0', '') - is_pdftohtml = self.is_pdftohtml(html) - if self.is_baen(html): - rules = [] - elif self.is_book_designer(html): - rules = self.BOOK_DESIGNER - elif is_pdftohtml: - rules = self.PDFTOHTML - else: - rules = [] + separator = '' + if isChar(self.rtfData[i], ' '): + separator = ' ' - start_rules = [] - if is_pdftohtml: - # Remove non breaking spaces - start_rules.append((re.compile(ur'\u00a0'), lambda match : ' ')) + controlWord = self.rtfData[tokenStart: tokenEnd] + if tokenEnd < i: + value = int(self.rtfData[tokenEnd: i]) + if isString(controlWord, "\\bin"): + i = i + value + self.tokens.append(tokenBinN(self.rtfData[tokenStart:i], separator)) + else: + self.tokens.append(tokenControlWordWithNumericArgument(controlWord, value, separator)) + else: + self.tokens.append(tokenControlWord(controlWord, separator)) + #space delimiter, we should discard it + if self.rtfData[i] == ' ': + i = i + 1 - if not getattr(self.extra_opts, 'keep_ligatures', False): - html = _ligpat.sub(lambda m:LIGATURES[m.group()], html) + #Control Symbol + else: + self.tokens.append(tokenControlSymbol(self.rtfData[tokenStart : i + 1])) + i = i + 1 + continue - end_rules = [] - if getattr(self.extra_opts, 'remove_header', None): - try: - rules.insert(0, - (re.compile(self.extra_opts.header_regex), lambda match : '') - ) - except: - import traceback - print 'Failed to parse remove_header regexp' - traceback.print_exc() + if lastDataStart < 0: + lastDataStart = i + i = i + 1 - if getattr(self.extra_opts, 'remove_footer', None): - try: - rules.insert(0, - (re.compile(self.extra_opts.footer_regex), lambda match : '') - ) - except: - import traceback - print 'Failed to parse remove_footer regexp' - traceback.print_exc() + def toRTF(self): + result = [] + for token in self.tokens: + result.append(token.toRTF()) + return "".join(result) - # Make the more aggressive chapter marking regex optional with the preprocess option to reduce false positives - if getattr(self.extra_opts, 'preprocess_html', None): - if is_pdftohtml: - end_rules.append( - (re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?|\d+\.?\s*([\d\w-]+\s*){0,4}\s*)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head), - ) - if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01: - length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor')) - if length: - print "The pdf line length returned is " + str(length) - end_rules.append( - # Un wrap using punctuation - (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines), - ) +if __name__ == "__main__": + import sys + if len(sys.argv) < 2: + print ("Usage %prog rtfFileToConvert") + sys.exit() + f = open(sys.argv[1], 'rb') + data = f.read() + f.close() - for rule in self.PREPROCESS + start_rules: - html = rule[0].sub(rule[1], html) + tokenizer = RtfTokenizer(data) + parsedTokens = RtfTokenParser(tokenizer.tokens) - if get_preprocess_html: - return html + data = parsedTokens.toRTF() - def dump(raw, where): - import os - dp = getattr(self.extra_opts, 'debug_pipeline', None) - if dp and os.path.exists(dp): - odir = os.path.join(dp, 'input') - if os.path.exists(odir): - odir = os.path.join(odir, where) - if not os.path.exists(odir): - os.makedirs(odir) - name, i = None, 0 - while not name or os.path.exists(os.path.join(odir, name)): - i += 1 - name = '%04d.html'%i - with open(os.path.join(odir, name), 'wb') as f: - f.write(raw.encode('utf-8')) + f = open(sys.argv[1], 'w') + f.write(data) + f.close() - #dump(html, 'pre-preprocess') - - for rule in rules + end_rules: - html = rule[0].sub(rule[1], html) - - #dump(html, 'post-preprocess') - - # Handle broken XHTML w/ SVG (ugh) - if 'svg:' in html and SVG_NS not in html: - html = html.replace( - '<html', '<html xmlns:svg="%s"' % SVG_NS, 1) - if 'xlink:' in html and XLINK_NS not in html: - html = html.replace( - '<html', '<html xmlns:xlink="%s"' % XLINK_NS, 1) - - html = XMLDECL_RE.sub('', html) - - if getattr(self.extra_opts, 'asciiize', False): - from calibre.ebooks.unidecode.unidecoder import Unidecoder - unidecoder = Unidecoder() - html = unidecoder.decode(html) - - if self.plugin_preprocess: - html = self.input_plugin_preprocess(html) - - return html From 9a06996b16486a3511e4055535a6be48f484a90a Mon Sep 17 00:00:00 2001 From: ldolse <ldolse@yahoo.com> Date: Sun, 12 Sep 2010 11:17:49 +1000 Subject: [PATCH 10/43] minor tweaks to preprocessing, backed out reflow change --- src/calibre/ebooks/conversion/preprocess.py | 4 +-- src/calibre/ebooks/conversion/utils.py | 36 +++++++++++---------- src/calibre/ebooks/pdf/reflow.py | 4 --- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 6123577191..46308b2ea0 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -319,8 +319,8 @@ class HTMLPreProcessor(object): # unwrap hyphenation - moved here so it's executed after header/footer removal if is_pdftohtml: - # unwrap visible dashes and hyphens - don't delete as 50% or more of the time these - # hyphens are for compound words, formatting, etc + # unwrap visible dashes and hyphens - don't delete they are often hyphens for + # for compound words, formatting, etc end_rules.append((re.compile(u'(?<=[-–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: '')) # unwrap/delete soft hyphens end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: '')) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 68cebb3a11..fb683bdb12 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -29,16 +29,12 @@ class PreProcessor(object): self.log("marked " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title)) return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n' - def chapter_link(self, match): - chap = match.group('sectionlink') - if not chap: - self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links") - return '<br style="page-break-before:always">' - else: - self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links. - " + str(chap)) - return '<br clear="all" style="page-break-before:always">\n<h2>'+chap+'</h2>' + def chapter_break(self, match): + chap = match.group('section') + styles = match.group('styles') + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("marked " + str(self.html_preprocess_sections) + " section markers based on punctuation. - " + str(chap)) + return '<'+styles+' style="page-break-before:always">'+chap def no_markup(self, raw, percent): ''' @@ -74,7 +70,7 @@ class PreProcessor(object): html = re.sub(r"\s*<span[^>]*>\s*</span>", " ", html) # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing - linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE) + linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL) blankreg = re.compile(r'\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>', re.IGNORECASE) blanklines = blankreg.findall(html) lines = linereg.findall(html) @@ -100,8 +96,13 @@ class PreProcessor(object): chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}s*(<span[^>]*>)?\s*.?(Introduction|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</(i|b|u)>){0,2})\s*(</span>)?s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE) html = chapdetect.sub(self.chapter_head, html) if self.html_preprocess_sections < 10: - self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying a more aggressive pattern") - chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#-]+\s*){1,9}|\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE) + self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters") + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE) + html = chapdetect2.sub(self.chapter_head, html) + + if self.html_preprocess_sections < 10: + self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words") + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#-]+\s*){1,9})\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE) html = chapdetect2.sub(self.chapter_head, html) # # Unwrap lines using punctation if the median length of all lines is less than 200 @@ -110,13 +111,14 @@ class PreProcessor(object): unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) if length < 200: self.log("Unwrapping Lines") - html = unwrap.sub(' ', html) + html = unwrap.sub(' ', html) + # If still no sections after unwrapping lines break on lines with no punctuation if self.html_preprocess_sections < 10: - self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", splitting based on punctuation") + self.log(str(self.html_preprocess_sections) + " split points marked, matching based on punctuation") #self.log(html) - chapdetect3 = re.compile(r'(<p[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?([a-z]+\s*){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</p>)(?P<title>)?', re.IGNORECASE) - html = chapdetect3.sub(self.chapter_head, html) + chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</(i|b|u)>){0,2}\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) + html = chapdetect3.sub(self.chapter_break, html) # search for places where a first or second level heading is immediately followed by another # top level heading. demote the second heading to h3 to prevent splitting between chapter # headings and titles, images, etc diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index 36848ddb8b..584d631d0b 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -408,10 +408,6 @@ class Page(object): # Fraction of text height that two strings' bottoms can differ by # for them to be considered to be part of the same text fragment LINE_FACTOR = 0.4 - - # Percentage of the page heigth which should be considered header - # or footer to be discarded from reflow considerations - HEAD_FOOTER_MARGIN # Multiplies the average line height when determining row height # of a particular element to detect columns. From cdb696f63bc39b9327abe809fa71e94baa6e0b86 Mon Sep 17 00:00:00 2001 From: ldolse <ldolse@yahoo.com> Date: Mon, 13 Sep 2010 00:12:21 +1000 Subject: [PATCH 11/43] enhanced preprocessing class - looking pretty good --- src/calibre/ebooks/conversion/preprocess.py | 18 ++-- src/calibre/ebooks/conversion/utils.py | 98 +++++++++++++++------ 2 files changed, 82 insertions(+), 34 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 46308b2ea0..f6277956c8 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -62,7 +62,6 @@ def wrap_lines(match): else: return ital+' ' - def line_length(format, raw, percent): ''' raw is the raw text to find the line length to use for wrapping. @@ -76,6 +75,8 @@ def line_length(format, raw, percent): linere = re.compile('(?<=<p).*?(?=</p>)', re.DOTALL) elif format == 'pdf': linere = re.compile('(?<=<br>).*?(?=<br>)', re.DOTALL) + elif format == 'spanned_html': + linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL) lines = linere.findall(raw) lengths = [] @@ -223,14 +224,15 @@ class HTMLPreProcessor(object): # Remove page links (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''), # Remove <hr> tags - (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'), + (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br>'), # Remove gray background (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'), # Detect Chapters to match default XPATH in GUI - (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</(i|b)>(</(i|b)>)?)?)\s*(</?(br|p)[^>]*>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head), - (re.compile(r'<br\s*/?>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?'), chap_head), + (re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head), + # Cover the case where every letter in a chapter title is separated by a space + (re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head), # Have paragraphs show better (re.compile(r'<br.*?>'), lambda match : '<p>'), @@ -238,8 +240,7 @@ class HTMLPreProcessor(object): (re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '), # Add space before and after italics (re.compile(u'(?<!“)<i>'), lambda match: ' <i>'), - (re.compile(r'</i>(?=\w)'), lambda match: '</i> '), - + (re.compile(r'</i>(?=\w)'), lambda match: '</i> '), ] # Fix Book Designer markup @@ -327,10 +328,11 @@ class HTMLPreProcessor(object): # unwrap/delete soft hyphens with formatting end_rules.append((re.compile(u'[­]\s*(</(i|u|b)>)+(\s*<p>)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: '')) - # Make the more aggressive chapter marking regex optional with the preprocess option to reduce false positives + # Make the more aggressive chapter marking regex optional with the preprocess option to + # reduce false positives and move after header/footer removal if getattr(self.extra_opts, 'preprocess_html', None): if is_pdftohtml: - end_rules.append((re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?|\d+\.?\s*([\d\w-]+\s*){0,4}\s*)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head)) + end_rules.append((re.compile(r'<p>\s*(?P<chap>(<[ibu]>){0,2}\s*([A-Z \'"!]{3,})\s*([\dA-Z:]+\s){0,4}\s*(</[ibu]>){0,2})\s*<p>\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<p>)?'), chap_head),) if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01: length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor')) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index fb683bdb12..abfa43e7ed 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -8,10 +8,10 @@ __docformat__ = 'restructuredtext en' import re from calibre.ebooks.conversion.preprocess import line_length from calibre.utils.logging import default_log -from lxml import etree class PreProcessor(object): html_preprocess_sections = 0 + found_indents = 0 def __init__(self, args): self.args = args @@ -22,11 +22,11 @@ class PreProcessor(object): title = match.group('title') if not title: self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log("marked " + str(self.html_preprocess_sections) + " chapters. - " + str(chap)) + self.log("found " + str(self.html_preprocess_sections) + " chapters. - " + str(chap)) return '<h2>'+chap+'</h2>\n' else: self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log("marked " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title)) + self.log("found " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title)) return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n' def chapter_break(self, match): @@ -35,7 +35,22 @@ class PreProcessor(object): self.html_preprocess_sections = self.html_preprocess_sections + 1 self.log("marked " + str(self.html_preprocess_sections) + " section markers based on punctuation. - " + str(chap)) return '<'+styles+' style="page-break-before:always">'+chap - + + def insert_indent(self, match): + pstyle = match.group('formatting') + span = match.group('span') + self.found_indents = self.found_indents + 1 + if pstyle: + if not span: + return '<p '+pstyle+' style="text-indent:3%">' + else: + return '<p '+pstyle+' style="text-indent:3%">'+span + else: + if not span: + return '<p style="text-indent:3%">' + else: + return '<p style="text-indent:3%">'+span + def no_markup(self, raw, percent): ''' Detects total marked up line endings in the file. raw is the text to @@ -48,7 +63,7 @@ class PreProcessor(object): line_end = line_end_ere.findall(raw) tot_htm_ends = len(htm_end) tot_ln_fds = len(line_end) - self.log("*** There are " + str(tot_ln_fds) + " total Line feeds, and " + str(tot_htm_ends) + " marked endings***") + self.log("There are " + str(tot_ln_fds) + " total Line feeds, and " + str(tot_htm_ends) + " marked up endings") if percent > 1: percent = 1 @@ -56,13 +71,18 @@ class PreProcessor(object): percent = 0 min_lns = tot_ln_fds * percent - self.log("There must be fewer than " + str(min_lns) + " unmarked lines to return true") + self.log("There must be fewer than " + str(min_lns) + " unmarked lines to add markup") if min_lns > tot_htm_ends: return True def __call__(self, html): self.log("********* Preprocessing HTML *********") - # remove non-breaking spaces + # Replace series of non-breaking spaces with text-indent + txtindent = re.compile(ur'<p(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}', re.IGNORECASE) + html = txtindent.sub(self.insert_indent, html) + if self.found_indents > 1: + self.log("replaced "+str(self.found_indents)+ " nbsp indents with inline styles") + # remove remaining non-breaking spaces html = re.sub(ur'\u00a0', ' ', html) # Get rid of empty <o:p> tags to simplify other processing html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html) @@ -83,41 +103,67 @@ class PreProcessor(object): html = re.sub(r"\s*</p>", "</p>\n", html) html = re.sub(r"\s*<p>\s*", "\n<p>", html) - # some lit files don't have any <p> tags or equivalent, check and - # mark up line endings if required before proceeding + # some lit files don't have any <p> tags or equivalent (generally just plain text between + # <pre> tags), check and mark up line endings if required before proceeding if self.no_markup(html, 0.1): self.log("not enough paragraph markers, adding now") add_markup = re.compile('(?<!>)(\n)') html = add_markup.sub('</p>\n<p>', html) # detect chapters/sections to match xpath or splitting logic + heading = re.compile('<h(1|2)[^>]*>', re.IGNORECASE) + self.html_preprocess_sections = len(heading.findall(html)) + self.log("found " + str(self.html_preprocess_sections) + " pre-existing headings") # - # Start with most typical chapter headings - chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}s*(<span[^>]*>)?\s*.?(Introduction|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</(i|b|u)>){0,2})\s*(</span>)?s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE) - html = chapdetect.sub(self.chapter_head, html) + # Start with most typical chapter headings, get more aggressive until one works + if self.html_preprocess_sections < 10: + chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}s*(<span[^>]*>)?\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE) + html = chapdetect.sub(self.chapter_head, html) if self.html_preprocess_sections < 10: self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters") - chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE) + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) html = chapdetect2.sub(self.chapter_head, html) if self.html_preprocess_sections < 10: self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words") - chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#-]+\s*){1,9})\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE) - html = chapdetect2.sub(self.chapter_head, html) - # - # Unwrap lines using punctation if the median length of all lines is less than 200 - length = line_length('html', html, 0.4) - self.log("*** Median line length is " + str(length) + " ***") - unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) - if length < 200: - self.log("Unwrapping Lines") - html = unwrap.sub(' ', html) + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(([A-Z#-]+\s*){1,9})\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) + html = chapdetect2.sub(self.chapter_head, html) - # If still no sections after unwrapping lines break on lines with no punctuation + # Unwrap lines + # + self.log("Unwrapping Lines") + # Some OCR sourced files have line breaks in the html using a combination of span & p tags + # span are used for hard line breaks, p for new paragraphs. Determine which is used so + # that lines can be wrapped across page boundaries + paras_reg = re.compile('<p[^>]*>', re.IGNORECASE) + spans_reg = re.compile('<span[^>]*>', re.IGNORECASE) + paras = len(paras_reg.findall(html)) + spans = len(spans_reg.findall(html)) + if spans > 1: + if float(paras) / float(spans) < 0.75: + format = 'spanned_html' + else: + format = 'html' + else: + format = 'html' + + # Calculate Length + length = line_length(format, html, 0.4) + self.log("*** Median line length is " + str(length) + ",calculated with " + format + " format ***") + # + # Unwrap and/or delete soft-hyphens, hyphens + html = re.sub(u'­\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html) + html = re.sub(u'(?<=[-–—])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html) + + # Unwrap lines using punctation if the median length of all lines is less than 200 + unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) + html = unwrap.sub(' ', html) + + # If still no sections after unwrapping mark split points on lines with no punctuation if self.html_preprocess_sections < 10: - self.log(str(self.html_preprocess_sections) + " split points marked, matching based on punctuation") + self.log("Looking for more split points based on punctuation, currently have " + str(self.html_preprocess_sections)) #self.log(html) - chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</(i|b|u)>){0,2}\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) + chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) html = chapdetect3.sub(self.chapter_break, html) # search for places where a first or second level heading is immediately followed by another # top level heading. demote the second heading to h3 to prevent splitting between chapter From 6cc332089a421e6100fa4937c5126309c483e132 Mon Sep 17 00:00:00 2001 From: Starson17 <starson17@gmail.com> Date: Sun, 12 Sep 2010 11:28:24 -0400 Subject: [PATCH 12/43] Change Merge and Safe Merge warnings re ISBN --- src/calibre/gui2/actions/edit_metadata.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index f0232d9859..878ba77a43 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -209,8 +209,9 @@ class EditMetadataAction(InterfaceAction): dest_id, src_books, src_ids = self.books_to_merge(rows) if safe_merge: if not confirm('<p>'+_( - 'All book formats and metadata from the selected books ' - 'will be added to the <b>first selected book.</b><br><br> ' + 'Book formats and metadata from the selected books ' + 'will be added to the <b>first selected book.</b> ' + 'ISBN will <i>not</i> be merged.<br><br> ' 'The second and subsequently selected books will not ' 'be deleted or changed.<br><br>' 'Please confirm you want to proceed.') @@ -220,8 +221,9 @@ class EditMetadataAction(InterfaceAction): self.merge_metadata(dest_id, src_ids) else: if not confirm('<p>'+_( - 'All book formats and metadata from the selected books will be merged ' - 'into the <b>first selected book</b>.<br><br>' + 'Book formats and metadata from the selected books will be merged ' + 'into the <b>first selected book</b>. ' + 'ISBN will <i>not</i> be merged.<br><br>' 'After merger the second and ' 'subsequently selected books will be <b>deleted</b>. <br><br>' 'All book formats of the first selected book will be kept ' From 78874a9117941de749f3b09934be8588181dd4b7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 12 Sep 2010 09:32:16 -0600 Subject: [PATCH 13/43] Use the new sorting code in the content server as well. --- src/calibre/library/caches.py | 153 +------------------------- src/calibre/library/server/content.py | 38 +++---- 2 files changed, 18 insertions(+), 173 deletions(-) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index dfd7086076..4f795ab733 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import re, itertools, functools +import re, itertools from itertools import repeat from datetime import timedelta from threading import Thread, RLock @@ -584,39 +584,7 @@ class ResultCache(SearchQueryParser): # Sorting functions {{{ - def seriescmp(self, sidx, siidx, x, y, library_order=None): - try: - if library_order: - ans = cmp(title_sort(self._data[x][sidx].lower()), - title_sort(self._data[y][sidx].lower())) - else: - ans = cmp(self._data[x][sidx].lower(), - self._data[y][sidx].lower()) - except AttributeError: # Some entries may be None - ans = cmp(self._data[x][sidx], self._data[y][sidx]) - if ans != 0: return ans - return cmp(self._data[x][siidx], self._data[y][siidx]) - - def cmp(self, loc, x, y, asstr=True, subsort=False): - try: - ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if \ - asstr else cmp(self._data[x][loc], self._data[y][loc]) - except AttributeError: # Some entries may be None - ans = cmp(self._data[x][loc], self._data[y][loc]) - except TypeError: ## raised when a datetime is None - x = self._data[x][loc] - if x is None: - x = UNDEFINED_DATE - y = self._data[y][loc] - if y is None: - y = UNDEFINED_DATE - return cmp(x, y) - if subsort and ans == 0: - idx = self.FIELD_MAP['sort'] - return cmp(self._data[x][idx].lower(), self._data[y][idx].lower()) - return ans - - def sanitize_field_name(self, field): + def sanitize_sort_field_name(self, field): field = field.lower().strip() if field not in self.field_metadata.iterkeys(): if field in ('author', 'tag', 'comment'): @@ -627,38 +595,10 @@ class ResultCache(SearchQueryParser): return field def sort(self, field, ascending, subsort=False): - field = self.sanitize_field_name(field) - as_string = field not in ('size', 'rating', 'timestamp') - - if self.first_sort: - subsort = True - self.first_sort = False - if self.field_metadata[field]['is_custom']: - if self.field_metadata[field]['datatype'] == 'series': - fcmp = functools.partial(self.seriescmp, - self.field_metadata[field]['rec_index'], - self.field_metadata.cc_series_index_column_for(field), - library_order=tweaks['title_series_sorting'] == 'library_order') - else: - as_string = self.field_metadata[field]['datatype'] in ('comments', 'text') - field = self.field_metadata[field]['colnum'] - fcmp = functools.partial(self.cmp, self.FIELD_MAP[field], - subsort=subsort, asstr=as_string) - elif field == 'series': - fcmp = functools.partial(self.seriescmp, self.FIELD_MAP['series'], - self.FIELD_MAP['series_index'], - library_order=tweaks['title_series_sorting'] == 'library_order') - else: - fcmp = functools.partial(self.cmp, self.field_metadata[field]['rec_index'], - subsort=subsort, asstr=as_string) - self._map.sort(cmp=fcmp, reverse=not ascending) - tmap = list(itertools.repeat(False, len(self._data))) - for x in self._map_filtered: - tmap[x] = True - self._map_filtered = [x for x in self._map if tmap[x]] + self.multisort([(field, ascending)]) def multisort(self, fields=[], subsort=False): - fields = [(self.sanitize_field_name(x), bool(y)) for x, y in fields] + fields = [(self.sanitize_sort_field_name(x), bool(y)) for x, y in fields] keys = self.field_metadata.field_keys() fields = [x for x in fields if x[0] in keys] if subsort and 'sort' not in [x[0] for x in fields]: @@ -671,6 +611,7 @@ class ResultCache(SearchQueryParser): self._map.sort(key=keyg, reverse=not fields[0][1]) else: self._map.sort(key=keyg) + tmap = list(itertools.repeat(False, len(self._data))) for x in self._map_filtered: tmap[x] = True @@ -733,87 +674,3 @@ class SortKeyGenerator(object): # }}} -if __name__ == '__main__': - # Testing.timing for new multi-sort {{{ - import time - - from calibre.library import db - db = db() - - db.refresh() - - fields = db.field_metadata.field_keys() - - print fields - - - def do_single_sort(meth, field, order): - if meth == 'old': - db.data.sort(field, order) - else: - db.data.multisort([(field, order)]) - - def test_single_sort(field): - for meth in ('old', 'new'): - ttime = 0 - NUM = 10 - asc = desc = None - for i in range(NUM): - db.data.sort('id', False) - st = time.time() - do_single_sort(meth, field, True) - asc = db.data._map - do_single_sort(meth, field, False) - desc = db.data._map - ttime += time.time() - st - yield (ttime/NUM, asc, desc) - - - print 'Running single sort differentials' - for field in fields: - if field in ('search', 'id', 'news', 'flags'): continue - print '\t', field, db.field_metadata[field]['datatype'] - old, new = test_single_sort(field) - if old[1] != new[1] or old[2] != new[2]: - print '\t\t', 'Sort failure!' - raise SystemExit(1) - print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0]) - - def do_multi_sort(meth, ms): - if meth == 'new': - db.data.multisort(ms) - else: - for s in reversed(ms): - db.data.sort(*s) - - def test_multi_sort(ms): - for meth in ('old', 'new'): - ttime = 0 - NUM = 10 - for i in range(NUM): - db.data.sort('id', False) - st = time.time() - do_multi_sort(meth, ms) - ttime += time.time() - st - yield (ttime/NUM, db.data._map) - - print 'Running multi-sort differentials' - - for ms in [ - [('timestamp', False), ('author', True), ('title', False)], - [('size', True), ('tags', True), ('author', False)], - [('series', False), ('title', True)], - [('size', True), ('tags', True), ('author', False), ('pubdate', - True), ('tags', False), ('formats', False), ('uuid', True)], - - ]: - print '\t', ms - db.data.sort('id', False) - old, new = test_multi_sort(ms) - if old[1] != new[1]: - print '\t\t', 'Sort failure!' - raise SystemExit() - print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0]) - - # }}} - diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 6784abd8f4..ecb467b4c2 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -5,7 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import re, os, cStringIO, operator +import re, os, cStringIO import cherrypy try: @@ -16,7 +16,15 @@ except ImportError: from calibre import fit_image, guess_type from calibre.utils.date import fromtimestamp -from calibre.ebooks.metadata import title_sort +from calibre.library.caches import SortKeyGenerator + +class CSSortKeyGenerator(SortKeyGenerator): + + def __init__(self, fields, fm): + SortKeyGenerator.__init__(self, fields, fm, None) + + def __call__(self, record): + return self.itervals(record).next() class ContentServer(object): @@ -47,32 +55,12 @@ class ContentServer(object): def sort(self, items, field, order): - field = field.lower().strip() - if field == 'author': - field = 'authors' - if field == 'date': - field = 'timestamp' + field = self.db.data.sanitize_sort_field_name(field) if field not in ('title', 'authors', 'rating', 'timestamp', 'tags', 'size', 'series'): raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field) - cmpf = cmp if field in ('rating', 'size', 'timestamp') else \ - lambda x, y: cmp(x.lower() if x else '', y.lower() if y else '') - if field == 'series': - items.sort(cmp=self.seriescmp, reverse=not order) - else: - lookup = 'sort' if field == 'title' else field - lookup = 'author_sort' if field == 'authors' else field - field = self.db.FIELD_MAP[lookup] - getter = operator.itemgetter(field) - items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order) + keyg = CSSortKeyGenerator([(field, order)], self.db.field_metadata) + items.sort(key=keyg, reverse=not order) - def seriescmp(self, x, y): - si = self.db.FIELD_MAP['series'] - try: - ans = cmp(title_sort(x[si].lower()), title_sort(y[si].lower())) - except AttributeError: # Some entries may be None - ans = cmp(x[si], y[si]) - if ans != 0: return ans - return cmp(x[self.db.FIELD_MAP['series_index']], y[self.db.FIELD_MAP['series_index']]) # }}} From 80c976e0f24f05a5ee7a9bfce50bf7745215e339 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 12 Sep 2010 11:11:00 -0600 Subject: [PATCH 14/43] Fix #6794 (Updated recipes for Infobae and NSPM) --- resources/recipes/infobae.recipe | 82 ++++++++------------------------ resources/recipes/nspm.recipe | 11 ++++- 2 files changed, 30 insertions(+), 63 deletions(-) diff --git a/resources/recipes/infobae.recipe b/resources/recipes/infobae.recipe index cda9bf83d2..b7f9cd3c6c 100644 --- a/resources/recipes/infobae.recipe +++ b/resources/recipes/infobae.recipe @@ -1,12 +1,8 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>' +__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>' ''' infobae.com ''' -import re -import urllib, urlparse from calibre.web.feeds.news import BasicNewsRecipe @@ -20,35 +16,24 @@ class Infobae(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - language = 'es' - lang = 'es-AR' - + language = 'es' encoding = 'cp1252' - cover_url = 'http://www.infobae.com/imgs/header/header.gif' + masthead_url = 'http://www.infobae.com/imgs/header/header.gif' remove_javascript = True - preprocess_regexps = [(re.compile( - r'<meta name="Description" content="[^"]+">'), lambda m:'')] - - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' - - extra_css = ''' - .col-center{font-family:Arial,Helvetica,sans-serif;} - h1{font-family:Arial,Helvetica,sans-serif; color:#0D4261;} - .fuenteIntNota{font-family:Arial,Helvetica,sans-serif; color:#1D1D1D; font-size:x-small;} - ''' - - keep_only_tags = [dict(name='div', attrs={'class':['content']})] - - - remove_tags = [ - dict(name='div', attrs={'class':['options','col-right','controles', 'bannerLibre','tiulo-masleidas','masleidas-h']}), - dict(name='a', attrs={'name' : 'comentario',}), - dict(name='iframe'), - dict(name='img', alt = "Ver galerias de imagenes"), - - ] - + remove_empty_feeds = True + extra_css = ''' + body{font-family:Arial,Helvetica,sans-serif;} + .popUpTitulo{color:#0D4261; font-size: xx-large} + ''' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + , 'linearize_tables' : True + } + feeds = [ (u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' ) @@ -57,39 +42,14 @@ class Infobae(BasicNewsRecipe): ,(u'Deportes' , u'http://www.infobae.com/adjuntos/html/RSS/deportes.xml' ) ] -# def print_version(self, url): -# main, sep, article_part = url.partition('contenidos/') -# article_id, rsep, rrest = article_part.partition('-') -# return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id - - def get_article_url(self, article): - ans = article.get('link').encode('utf-8') - parts = list(urlparse.urlparse(ans)) - parts[2] = urllib.quote(parts[2]) - ans = urlparse.urlunparse(parts) - return ans.decode('utf-8') - - - def preprocess_html(self, soup): - - for tag in soup.head.findAll('strong'): - tag.extract() - for tag in soup.findAll('meta'): - del tag['content'] - tag.extract() - - mtag = '<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">\n<meta http-equiv="Content-Language" content="es-AR"/>\n' - soup.head.insert(0,mtag) - for item in soup.findAll(style=True): - del item['style'] - - return soup + def print_version(self, url): + article_part = url.rpartition('/')[2] + article_id= article_part.partition('-')[0] + return 'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id def postprocess_html(self, soup, first): - for tag in soup.findAll(name='strong'): tag.name = 'b' - return soup diff --git a/resources/recipes/nspm.recipe b/resources/recipes/nspm.recipe index 13ff42b277..29f2cfc5e3 100644 --- a/resources/recipes/nspm.recipe +++ b/resources/recipes/nspm.recipe @@ -6,6 +6,7 @@ nspm.rs import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag, NavigableString class Nspm(BasicNewsRecipe): title = 'Nova srpska politicka misao' @@ -21,6 +22,7 @@ class Nspm(BasicNewsRecipe): encoding = 'utf-8' language = 'sr' delay = 2 + remove_empty_feeds = True publication_type = 'magazine' masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg' extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @@ -45,8 +47,9 @@ class Nspm(BasicNewsRecipe): dict(name=['link','object','embed','script','meta','base','iframe']) ,dict(attrs={'class':'buttonheading'}) ] - remove_tags_after = dict(attrs={'class':'article_separator'}) - remove_attributes = ['width','height'] + remove_tags_before = dict(attrs={'class':'contentheading'}) + remove_tags_after = dict(attrs={'class':'article_separator'}) + remove_attributes = ['width','height'] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -67,4 +70,8 @@ class Nspm(BasicNewsRecipe): def preprocess_html(self, soup): for item in soup.body.findAll(style=True): del item['style'] + for item in soup.body.findAll('h1'): + nh = NavigableString(item.a.string) + item.a.extract() + item.insert(0,nh) return self.adeify_images(soup) From 548417ea6b6157faf1688b3b082f3eac5476636f Mon Sep 17 00:00:00 2001 From: ldolse <ldolse@yahoo.com> Date: Mon, 13 Sep 2010 09:18:45 +1000 Subject: [PATCH 15/43] comments and minor tweak --- src/calibre/ebooks/conversion/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index abfa43e7ed..ecf030b27d 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -111,7 +111,7 @@ class PreProcessor(object): html = add_markup.sub('</p>\n<p>', html) # detect chapters/sections to match xpath or splitting logic - heading = re.compile('<h(1|2)[^>]*>', re.IGNORECASE) + heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE) self.html_preprocess_sections = len(heading.findall(html)) self.log("found " + str(self.html_preprocess_sections) + " pre-existing headings") # @@ -134,7 +134,7 @@ class PreProcessor(object): self.log("Unwrapping Lines") # Some OCR sourced files have line breaks in the html using a combination of span & p tags # span are used for hard line breaks, p for new paragraphs. Determine which is used so - # that lines can be wrapped across page boundaries + # that lines can be un-wrapped across page boundaries paras_reg = re.compile('<p[^>]*>', re.IGNORECASE) spans_reg = re.compile('<span[^>]*>', re.IGNORECASE) paras = len(paras_reg.findall(html)) From de6aadee76d4dafe9b84133dc3af43ddef22fd0a Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Mon, 13 Sep 2010 10:15:35 -0600 Subject: [PATCH 16/43] News download: Fix bug that could break some downloads in non ASCII locales --- resources/recipes/xkcd.recipe | 6 +++--- src/calibre/web/feeds/__init__.py | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/resources/recipes/xkcd.recipe b/resources/recipes/xkcd.recipe index 312027004e..ad0d420deb 100644 --- a/resources/recipes/xkcd.recipe +++ b/resources/recipes/xkcd.recipe @@ -24,18 +24,18 @@ class XkcdCom(BasicNewsRecipe): (re.compile(r'(<img.*title=")([^"]+)(".*>)'), lambda m: '%s%s<p>%s</p>' % (m.group(1), m.group(3), m.group(2))) ] - + def parse_index(self): INDEX = 'http://xkcd.com/archive/' - soup = self.index_to_soup(INDEX) + soup = self.index_to_soup(INDEX) articles = [] for item in soup.findAll('a', title=True): articles.append({ 'date': item['title'], 'timestamp': time.mktime(time.strptime(item['title'], '%Y-%m-%d'))+1, 'url': 'http://xkcd.com' + item['href'], - 'title': self.tag_to_string(item).encode('UTF-8'), + 'title': self.tag_to_string(item), 'description': '', 'content': '', }) diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index a70cf8b664..8aef350498 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -165,7 +165,9 @@ class Feed(object): if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article: self.articles.append(article) else: - self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, article.localtime.strftime('%a, %d %b, %Y %H:%M'), self.title)) + t = strftime(u'%a, %d %b, %Y %H:%M', article.localtime.timetuple()) + self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'% + (title, t, self.title)) d = item.get('date', '') article.formatted_date = d From b73e1b3da50810e151d10a5d62251754a077e605 Mon Sep 17 00:00:00 2001 From: ldolse <ldolse@yahoo.com> Date: Tue, 14 Sep 2010 02:56:56 +1000 Subject: [PATCH 17/43] tweaked preprocess for $, added rtf to new preprocess logic, changed last pdf default --- src/calibre/ebooks/conversion/preprocess.py | 2 +- src/calibre/ebooks/rtf/input.py | 13 +++---------- src/calibre/gui2/convert/pdf_input.ui | 2 +- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index f6277956c8..9464be1210 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -340,7 +340,7 @@ class HTMLPreProcessor(object): # print "The pdf line length returned is " + str(length) end_rules.append( # Un wrap using punctuation - (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines), + (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), ) for rule in self.PREPROCESS + start_rules: diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index 216ccf591d..d229b80c16 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -8,6 +8,7 @@ from lxml import etree from calibre.customize.conversion import InputFormatPlugin from calibre.ebooks.conversion.preprocess import line_length +from calibre.ebooks.conversion.utils import PreProcessor class InlineClass(etree.XSLTExtension): @@ -229,16 +230,8 @@ class RTFInput(InputFormatPlugin): res = transform.tostring(result) res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] if self.options.preprocess_html: - self.log("********* Preprocessing HTML *********") - # Detect Chapters to match the xpath in the GUI - chapdetect = re.compile(r'<p[^>]*>\s*<span[^>]*>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)>(<(/i|b)>)?)?)\s*</span>\s*</p>', re.IGNORECASE) - res = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', res) - # Unwrap lines using punctation if the median length of all lines is less than 150 - length = line_length('html', res, 0.4) - self.log("*** Median length is " + str(length) + " ***") - unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</span>\s*</p>\s*(?P<up2threeblanks><p[^>]*>\s*(<span[^>]*>\s*</span>\s*)</p>\s*){0,3}\s*<p[^>]*>\s*<span[^>]*>\s*" % length, re.UNICODE) - if length < 150: - res = unwrap.sub(' ', res) + preprocessor = PreProcessor(res) + res = preprocessor(res) f.write(res) self.write_inline_css(inline_class) stream.seek(0) diff --git a/src/calibre/gui2/convert/pdf_input.ui b/src/calibre/gui2/convert/pdf_input.ui index 626c68ea63..b2ee421922 100644 --- a/src/calibre/gui2/convert/pdf_input.ui +++ b/src/calibre/gui2/convert/pdf_input.ui @@ -46,7 +46,7 @@ <double>0.010000000000000</double> </property> <property name="value"> - <double>0.500000000000000</double> + <double>0.450000000000000</double> </property> </widget> </item> From 8b73bb52e8d551538d0c0e55e7b91b6b16f69977 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Mon, 13 Sep 2010 16:42:22 -0600 Subject: [PATCH 18/43] Fix #6802 (Sovos E Reader Not Recognised / Floppy Drive Activation) --- src/calibre/customize/builtins.py | 3 ++- src/calibre/devices/teclast/driver.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 4c87236e71..68df832048 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -459,7 +459,7 @@ from calibre.devices.iriver.driver import IRIVER_STORY from calibre.devices.binatone.driver import README from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK from calibre.devices.edge.driver import EDGE -from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS +from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, SOVOS from calibre.devices.sne.driver import SNE from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, GEMEI from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG @@ -557,6 +557,7 @@ plugins += [ TECLAST_K3, NEWSMY, IPAPYRUS, + SOVOS, EDGE, SNE, ALEX, diff --git a/src/calibre/devices/teclast/driver.py b/src/calibre/devices/teclast/driver.py index 0c60a367cf..2055ff9306 100644 --- a/src/calibre/devices/teclast/driver.py +++ b/src/calibre/devices/teclast/driver.py @@ -52,3 +52,14 @@ class IPAPYRUS(TECLAST_K3): VENDOR_NAME = 'E_READER' WINDOWS_MAIN_MEM = '' +class SOVOS(TECLAST_K3): + + name = 'Sovos device interface' + gui_name = 'Sovos' + description = _('Communicate with the Sovos reader.') + + FORMATS = ['epub', 'fb2', 'pdf', 'txt'] + + VENDOR_NAME = 'RK28XX' + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'USB-MSC' + From fb053fe3f37d531a170bb2a1d67ccf70ea030351 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Mon, 13 Sep 2010 16:58:09 -0600 Subject: [PATCH 19/43] Fix #6773 (Slightly broken CHM file) --- src/calibre/ebooks/chm/reader.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py index 67a2d36607..831c16bf6a 100644 --- a/src/calibre/ebooks/chm/reader.py +++ b/src/calibre/ebooks/chm/reader.py @@ -132,7 +132,11 @@ class CHMReader(CHMFile): for path in self.Contents(): lpath = os.path.join(output_dir, path) self._ensure_dir(lpath) - data = self.GetFile(path) + try: + data = self.GetFile(path) + except: + self.log.exception('Failed to extract %s from CHM, ignoring'%path) + continue if lpath.find(';') != -1: # fix file names with ";<junk>" at the end, see _reformat() lpath = lpath.split(';')[0] From ba5de1c92d797abc1f82782c7e15bd61dfa387c5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Mon, 13 Sep 2010 18:18:32 -0600 Subject: [PATCH 20/43] Conversion pipeline: When setting margins on <body> explicitly set padding to 0 to override and existing padding in the input document --- src/calibre/ebooks/oeb/transforms/flatcss.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index f48bdb9934..ffdc641d1e 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -138,6 +138,7 @@ class CSSFlattener(object): float(self.context.margin_left)) bs.append('margin-right : %fpt'%\ float(self.context.margin_right)) + bs.extend(['padding-left: 0pt', 'padding-right: 0pt']) if self.context.change_justification != 'original': bs.append('text-align: '+ self.context.change_justification) body.set('style', '; '.join(bs)) From c5063b8633506f3b661d3e3dcc84d7ec68e74345 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Mon, 13 Sep 2010 18:26:51 -0600 Subject: [PATCH 21/43] Fix #6804 (Timeout error when browsing content server via browser) --- resources/content_server/gui.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/content_server/gui.js b/resources/content_server/gui.js index 631fb8b617..d0fb49cc8e 100644 --- a/resources/content_server/gui.js +++ b/resources/content_server/gui.js @@ -26,7 +26,7 @@ var current_library_request = null; ////////////////////////////// GET BOOK LIST ////////////////////////////// -var LIBRARY_FETCH_TIMEOUT = 30000; // milliseconds +var LIBRARY_FETCH_TIMEOUT = 5*60000; // milliseconds function create_table_headers() { var thead = $('table#book_list thead tr'); From c5415bbe8012179b405f2c3ca3b5258e83a863b3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Mon, 13 Sep 2010 19:11:38 -0600 Subject: [PATCH 22/43] Fix #6806 (--start-in-tray switch displays hidden windows in metacity, xfwm4 and compiz) --- src/calibre/gui2/cover_flow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/gui2/cover_flow.py b/src/calibre/gui2/cover_flow.py index 88bbae6c41..cb951b09be 100644 --- a/src/calibre/gui2/cover_flow.py +++ b/src/calibre/gui2/cover_flow.py @@ -155,6 +155,7 @@ class CoverFlowMixin(object): self.cb_splitter.action_toggle.triggered.connect(self.toggle_cover_browser) if CoverFlow is not None: self.cover_flow.stop.connect(self.hide_cover_browser) + self.cover_flow.setVisible(False) else: self.cb_splitter.insertWidget(self.cb_splitter.side_index, self.cover_flow) if CoverFlow is not None: From ba67e47c9260a1f813048ab0239ed78d5324e89a Mon Sep 17 00:00:00 2001 From: GRiker <griker@hotmail.com> Date: Mon, 13 Sep 2010 19:12:49 -0600 Subject: [PATCH 23/43] GwR wip book jacket --- src/calibre/devices/apple/driver.py | 6 +- src/calibre/ebooks/oeb/transforms/flatcss.py | 16 +- src/calibre/ebooks/oeb/transforms/jacket.py | 147 +++++++++++++------ src/calibre/library/catalog.py | 4 + 4 files changed, 120 insertions(+), 53 deletions(-) diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index e318d368ff..c9bc04a242 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -2342,8 +2342,10 @@ class ITUNES(DriverBase): if isosx: if DEBUG: self.log.info(" deleting '%s' from iDevice" % cached_book['title']) - cached_book['dev_book'].delete() - + try: + cached_book['dev_book'].delete() + except: + self.log.error(" error deleting '%s'" % cached_book['title']) elif iswindows: hit = self._find_device_book(cached_book) if hit: diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index f48bdb9934..030c271362 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -146,7 +146,6 @@ class CSSFlattener(object): extra_css=css) self.stylizers[item] = stylizer - def baseline_node(self, node, stylizer, sizes, csize): csize = stylizer.style(node)['font-size'] if node.text: @@ -194,7 +193,7 @@ class CSSFlattener(object): value = 0.0 cssdict[property] = "%0.5fem" % (value / fsize) - def flatten_node(self, node, stylizer, names, styles, psize, left=0): + def flatten_node(self, node, stylizer, names, styles, psize, item_id, left=0): if not isinstance(node.tag, basestring) \ or namespace(node.tag) != XHTML_NS: return @@ -286,15 +285,18 @@ class CSSFlattener(object): if self.lineh and 'line-height' not in cssdict: lineh = self.lineh / psize cssdict['line-height'] = "%0.5fem" % lineh + if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ('p', 'div'): - for prop in ('margin', 'padding', 'border'): - for edge in ('top', 'bottom'): - cssdict['%s-%s'%(prop, edge)] = '0pt' + if item_id != 'jacket' or self.context.output_profile.name == 'Kindle': + for prop in ('margin', 'padding', 'border'): + for edge in ('top', 'bottom'): + cssdict['%s-%s'%(prop, edge)] = '0pt' if self.context.insert_blank_line: cssdict['margin-top'] = cssdict['margin-bottom'] = '0.5em' if self.context.remove_paragraph_spacing: cssdict['text-indent'] = "%1.1fem" % self.context.remove_paragraph_spacing_indent_size + if cssdict: items = cssdict.items() items.sort() @@ -313,7 +315,7 @@ class CSSFlattener(object): if 'style' in node.attrib: del node.attrib['style'] for child in node: - self.flatten_node(child, stylizer, names, styles, psize, left) + self.flatten_node(child, stylizer, names, styles, psize, item_id, left) def flatten_head(self, item, stylizer, href): html = item.data @@ -360,7 +362,7 @@ class CSSFlattener(object): stylizer = self.stylizers[item] body = html.find(XHTML('body')) fsize = self.context.dest.fbase - self.flatten_node(body, stylizer, names, styles, fsize) + self.flatten_node(body, stylizer, names, styles, fsize, item.id) items = [(key, val) for (val, key) in styles.items()] items.sort() css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items) diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index 030067850c..309a7fd7b6 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -13,6 +13,9 @@ from itertools import repeat from lxml import etree from calibre import guess_type, strftime +from calibre.constants import __appname__, __version__ +from calibre.utils.date import now +from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.oeb.base import XPath, XPNSMAP from calibre.library.comments import comments_to_html class Jacket(object): @@ -29,13 +32,30 @@ class Jacket(object): <style type="text/css" media="screen">%(css)s</style> </head> <body> - <div class="banner"> - <div class="meta_div">%(title)s</div> - <div class="meta_div">%(series)s</div> - <div class="meta_div">%(rating)s</div> - <div class="meta_div">%(tags)s</div> + <div class="cbj_banner"> + <div class="cbj_title">%(title)s</div> + <table class="cbj_header"> + <tr class="cbj_series"> + <td class="cbj_label">Series:</td> + <td class="cbj_content">%(series)s</td> + </tr> + <tr class="cbj_pubdate"> + <td class="cbj_label">Published:</td> + <td class="cbj_content">%(pubdate)s</td> + </tr> + <tr class="cbj_rating"> + <td class="cbj_label">Rating:</td> + <td class="cbj_content">%(rating)s</td> + </tr> + <tr class="cbj_tags"> + <td class="cbj_label">Tags:</td> + <td class="cbj_content">%(tags)s</td> + </tr> + </table> + <div class="cbj_footer">%(footer)s</div> </div> - <div class="comments">%(comments)s</div> + <hr class="cbj_kindle_banner_hr" /> + <div class="cbj_comments">%(comments)s</div> </body> </html> ''') @@ -56,7 +76,7 @@ class Jacket(object): def get_rating(self, rating): ans = '' if rating is None: - return + return ans try: num = float(rating)/2 except: @@ -65,19 +85,54 @@ class Jacket(object): num = min(num, 5) if num < 1: return ans - id, href = self.oeb.manifest.generate('star', 'star.png') - self.oeb.manifest.add(id, href, 'image/png', data=I('star.png', data=True)) - ans = '<span class="rating">Rating: </span> ' + ''.join(repeat('<img style="vertical-align:text-top" alt="star" src="%s" />'%href, num)) + if self.opts.output_profile.name == 'Kindle': + ans = '%s' % ''.join(repeat('★', num)) + else: + id, href = self.oeb.manifest.generate('star', 'star.png') + self.oeb.manifest.add(id, href, 'image/png', data=I('star.png', data=True)) + ans = '%s' % ''.join(repeat('<img style="vertical-align:text-bottom" alt="star" src="%s" />'%href, num)) return ans def insert_metadata(self, mi): self.log('Inserting metadata into book...') jacket_resources = P("jacket") - if os.path.isdir(jacket_resources): - stylesheet = os.path.join(jacket_resources, 'stylesheet.css') - with open(stylesheet) as f: - css_data = f.read() + css_data = '' + stylesheet = os.path.join(jacket_resources, 'stylesheet.css') + with open(stylesheet) as f: + css = f.read() + + try: + title_str = mi.title if mi.title else unicode(self.oeb.metadata.title[0]) + except: + title_str = _('Unknown') + title = '<span class="title">%s</span>' % (escape(title_str)) + + series = escape(mi.series if mi.series else '') + if mi.series and mi.series_index is not None: + series += escape(' [%s]'%mi.format_series_index()) + if not mi.series: + series = '' + + try: + pubdate = strftime(u'%Y', mi.pubdate.timetuple()) + except: + #pubdate = strftime(u'%Y', now()) + pubdate = '' + + rating = self.get_rating(mi.rating) + + tags = mi.tags + if not tags: + try: + tags = map(unicode, self.oeb.metadata.subject) + except: + tags = [] + if tags: + #tags = self.opts.dest.tags_to_string(tags) + tags = ', '.join(tags) + else: + tags = '' comments = mi.comments if not comments: @@ -91,46 +146,50 @@ class Jacket(object): if comments: comments = comments_to_html(comments) - series = '<span class="meta_label">Series: </span><span class="series">%s</span>' % escape(mi.series if mi.series else '') - if mi.series and mi.series_index is not None: - series += '<span class="series">%s</span>' % escape(' [%s]'%mi.format_series_index()) - if not mi.series: - series = '' - - tags = mi.tags - if not tags: - try: - tags = map(unicode, self.oeb.metadata.subject) - except: - tags = [] - if tags: - tags = '<span class="meta_label">Tags:</span><span class="tags">%s</span>' % self.opts.dest.tags_to_string(tags) - else: - tags = '' - - try: - title_str = mi.title if mi.title else unicode(self.oeb.metadata.title[0]) - except: - title_str = _('Unknown') - title = '<span class="title">%s</span><span class="pubdate"> (%s)</span>' % (escape(title_str), strftime(u'%Y', mi.pubdate.timetuple())) - + footer = 'B<span class="cbj_smallcaps">OOK JACKET GENERATED BY %s %s</span>' % (__appname__.upper(),__version__) def generate_html(comments): - return self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'], - title=title, comments=comments, - series=series, - tags=tags, rating=self.get_rating(mi.rating), - css=css_data, title_str=title_str) + args = dict(xmlns=XPNSMAP['h'], + title_str=title_str, + css=css, + title=title, + pubdate=pubdate, + series=series, + rating=rating, + tags=tags, + comments=comments, + footer = footer) + + # Post-process the generated html to strip out empty header items + generated_html = self.JACKET_TEMPLATE % args + soup = BeautifulSoup(generated_html) + if not series: + series_tag = soup.find('tr', attrs={'class':'cbj_series'}) + series_tag.extract() + if not rating: + rating_tag = soup.find('tr', attrs={'class':'cbj_rating'}) + rating_tag.extract() + if not tags: + tags_tag = soup.find('tr', attrs={'class':'cbj_tags'}) + tags_tag.extract() + if not pubdate: + pubdate_tag = soup.find('tr', attrs={'class':'cbj_pubdate'}) + pubdate_tag.extract() + if self.opts.output_profile.name != 'Kindle': + hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'}) + hr_tag.extract() + + return soup.renderContents() id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml') from calibre.ebooks.oeb.base import RECOVER_PARSER, XPath + try: root = etree.fromstring(generate_html(comments), parser=RECOVER_PARSER) -# print "root: %s" % etree.tostring(root, encoding='utf-8', -# xml_declaration=True, pretty_print=True) except: root = etree.fromstring(generate_html(escape(orig_comments)), parser=RECOVER_PARSER) + jacket = XPath('//h:meta[@name="calibre-content" and @content="jacket"]') found = None for item in list(self.oeb.spine)[:4]: diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index bd2160aff1..ef7569bd88 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -2523,6 +2523,10 @@ class EPUB_MOBI(CatalogPlugin): # Fetch the database as a dictionary self.booksBySeries = self.plugin.search_sort_db(self.db, self.opts) + if not self.booksBySeries: + self.opts.generate_series = False + self.opts.log(" no series found in selected books, cancelling series generation") + return friendly_name = "Series" From 6a3609f031bb9400630cd6418b278903a4883c8a Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Mon, 13 Sep 2010 19:58:22 -0600 Subject: [PATCH 24/43] Implement #6808 (Feature request: ability to convert all single/double quotes to "smart quotes") --- src/calibre/ebooks/conversion/cli.py | 2 +- src/calibre/ebooks/conversion/plumber.py | 8 + src/calibre/ebooks/conversion/preprocess.py | 23 +- src/calibre/gui2/convert/look_and_feel.py | 2 +- src/calibre/gui2/convert/look_and_feel.ui | 9 +- src/calibre/utils/smartypants.py | 899 ++++++++++++++++++++ 6 files changed, 933 insertions(+), 10 deletions(-) create mode 100755 src/calibre/utils/smartypants.py diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index 7439718cf6..2ef633d0bb 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -122,7 +122,7 @@ def add_pipeline_options(parser, plumber): 'font_size_mapping', 'line_height', 'linearize_tables', - 'extra_css', + 'extra_css', 'smarten_punctuation', 'margin_top', 'margin_left', 'margin_right', 'margin_bottom', 'change_justification', 'insert_blank_line', 'remove_paragraph_spacing','remove_paragraph_spacing_indent_size', diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 24b35f804f..16282dd28d 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -362,6 +362,14 @@ OptionRecommendation(name='preprocess_html', ) ), +OptionRecommendation(name='smarten_punctuation', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Convert plain quotes, dashes and ellipsis to their ' + 'typographically correct equivalents. For details, see ' + 'http://daringfireball.net/projects/smartypants' + ) + ), + OptionRecommendation(name='remove_header', recommended_value=False, level=OptionRecommendation.LOW, help=_('Use a regular expression to try and remove the header.' diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 7742a20a21..4538af96c4 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -221,7 +221,7 @@ class HTMLPreProcessor(object): (re.compile(u'˛\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ą'), (re.compile(u'˛\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ę'), (re.compile(u'˛\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'Ę'), - + # ˙ (re.compile(u'˙\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ż'), (re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'), @@ -244,14 +244,14 @@ class HTMLPreProcessor(object): (re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head), # Cover the case where every letter in a chapter title is separated by a space (re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head), - + # Have paragraphs show better (re.compile(r'<br.*?>'), lambda match : '<p>'), # Clean up spaces (re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '), # Add space before and after italics (re.compile(u'(?<!“)<i>'), lambda match: ' <i>'), - (re.compile(r'</i>(?=\w)'), lambda match: '</i> '), + (re.compile(r'</i>(?=\w)'), lambda match: '</i> '), ] # Fix Book Designer markup @@ -328,7 +328,7 @@ class HTMLPreProcessor(object): import traceback print 'Failed to parse remove_footer regexp' traceback.print_exc() - + # unwrap hyphenation - moved here so it's executed after header/footer removal if is_pdftohtml: # unwrap visible dashes and hyphens - don't delete they are often hyphens for @@ -338,13 +338,13 @@ class HTMLPreProcessor(object): end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: '')) # unwrap/delete soft hyphens with formatting end_rules.append((re.compile(u'[­]\s*(</(i|u|b)>)+(\s*<p>)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: '')) - - # Make the more aggressive chapter marking regex optional with the preprocess option to + + # Make the more aggressive chapter marking regex optional with the preprocess option to # reduce false positives and move after header/footer removal if getattr(self.extra_opts, 'preprocess_html', None): if is_pdftohtml: end_rules.append((re.compile(r'<p>\s*(?P<chap>(<[ibu]>){0,2}\s*([A-Z \'"!]{3,})\s*([\dA-Z:]+\s){0,4}\s*(</[ibu]>){0,2})\s*<p>\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<p>)?'), chap_head),) - + if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01: length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor')) if length: @@ -401,5 +401,14 @@ class HTMLPreProcessor(object): if self.plugin_preprocess: html = self.input_plugin_preprocess(html) + if getattr(self.extra_opts, 'smarten_punctuation', False): + html = self.smarten_punctuation(html) + return html + def smarten_punctuation(self, html): + from calibre.utils.smartypants import smartyPants + from calibre.ebooks.chardet import substitute_entites + html = smartyPants(html) + return substitute_entites(html) + diff --git a/src/calibre/gui2/convert/look_and_feel.py b/src/calibre/gui2/convert/look_and_feel.py index b0403bf1dd..ec3f0b944d 100644 --- a/src/calibre/gui2/convert/look_and_feel.py +++ b/src/calibre/gui2/convert/look_and_feel.py @@ -22,7 +22,7 @@ class LookAndFeelWidget(Widget, Ui_Form): Widget.__init__(self, parent, ['change_justification', 'extra_css', 'base_font_size', 'font_size_mapping', 'line_height', - 'linearize_tables', + 'linearize_tables', 'smarten_punctuation', 'disable_font_rescaling', 'insert_blank_line', 'remove_paragraph_spacing', 'remove_paragraph_spacing_indent_size','input_encoding', 'asciiize', 'keep_ligatures'] diff --git a/src/calibre/gui2/convert/look_and_feel.ui b/src/calibre/gui2/convert/look_and_feel.ui index de48e7caf9..c683300854 100644 --- a/src/calibre/gui2/convert/look_and_feel.ui +++ b/src/calibre/gui2/convert/look_and_feel.ui @@ -178,7 +178,7 @@ </property> </widget> </item> - <item row="9" column="0" colspan="4"> + <item row="10" column="0" colspan="4"> <widget class="QGroupBox" name="groupBox"> <property name="title"> <string>Extra &CSS</string> @@ -214,6 +214,13 @@ </property> </widget> </item> + <item row="9" column="0"> + <widget class="QCheckBox" name="opt_smarten_punctuation"> + <property name="text"> + <string>Smarten &punctuation</string> + </property> + </widget> + </item> </layout> </widget> <resources> diff --git a/src/calibre/utils/smartypants.py b/src/calibre/utils/smartypants.py new file mode 100755 index 0000000000..44aac4de8c --- /dev/null +++ b/src/calibre/utils/smartypants.py @@ -0,0 +1,899 @@ +#!/usr/bin/python + +r""" +============== +smartypants.py +============== + +---------------------------- +SmartyPants ported to Python +---------------------------- + +Ported by `Chad Miller`_ +Copyright (c) 2004, 2007 Chad Miller + +original `SmartyPants`_ by `John Gruber`_ +Copyright (c) 2003 John Gruber + + +Synopsis +======== + +A smart-quotes plugin for Pyblosxom_. + +The priginal "SmartyPants" is a free web publishing plug-in for Movable Type, +Blosxom, and BBEdit that easily translates plain ASCII punctuation characters +into "smart" typographic punctuation HTML entities. + +This software, *smartypants.py*, endeavours to be a functional port of +SmartyPants to Python, for use with Pyblosxom_. + + +Description +=========== + +SmartyPants can perform the following transformations: + +- Straight quotes ( " and ' ) into "curly" quote HTML entities +- Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities +- Dashes (``--`` and ``---``) into en- and em-dash entities +- Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity + +This means you can write, edit, and save your posts using plain old +ASCII straight quotes, plain dashes, and plain dots, but your published +posts (and final HTML output) will appear with smart quotes, em-dashes, +and proper ellipses. + +SmartyPants does not modify characters within ``<pre>``, ``<code>``, ``<kbd>``, +``<math>`` or ``<script>`` tag blocks. Typically, these tags are used to +display text where smart quotes and other "smart punctuation" would not be +appropriate, such as source code or example markup. + + +Backslash Escapes +================= + +If you need to use literal straight quotes (or plain hyphens and +periods), SmartyPants accepts the following backslash escape sequences +to force non-smart punctuation. It does so by transforming the escape +sequence into a decimal-encoded HTML entity: + +(FIXME: table here.) + +.. comment It sucks that there's a disconnect between the visual layout and table markup when special characters are involved. +.. comment ====== ===== ========= +.. comment Escape Value Character +.. comment ====== ===== ========= +.. comment \\\\\\\\ \ \\\\ +.. comment \\\\" " " +.. comment \\\\' ' ' +.. comment \\\\. . . +.. comment \\\\- - \- +.. comment \\\\` ` \` +.. comment ====== ===== ========= + +This is useful, for example, when you want to use straight quotes as +foot and inch marks: 6'2" tall; a 17" iMac. + +Options +======= + +For Pyblosxom users, the ``smartypants_attributes`` attribute is where you +specify configuration options. + +Numeric values are the easiest way to configure SmartyPants' behavior: + +"0" + Suppress all transformations. (Do nothing.) +"1" + Performs default SmartyPants transformations: quotes (including + \`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash) + is used to signify an em-dash; there is no support for en-dashes. + +"2" + Same as smarty_pants="1", except that it uses the old-school typewriter + shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``" + (dash dash dash) + for em-dashes. + +"3" + Same as smarty_pants="2", but inverts the shorthand for dashes: + "``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for + en-dashes. + +"-1" + Stupefy mode. Reverses the SmartyPants transformation process, turning + the HTML entities produced by SmartyPants into their ASCII equivalents. + E.g. "“" is turned into a simple double-quote ("), "—" is + turned into two dashes, etc. + + +The following single-character attribute values can be combined to toggle +individual transformations from within the smarty_pants attribute. For +example, to educate normal quotes and em-dashes, but not ellipses or +\`\`backticks'' -style quotes: + +``py['smartypants_attributes'] = "1"`` + +"q" + Educates normal quote characters: (") and ('). + +"b" + Educates \`\`backticks'' -style double quotes. + +"B" + Educates \`\`backticks'' -style double quotes and \`single' quotes. + +"d" + Educates em-dashes. + +"D" + Educates em-dashes and en-dashes, using old-school typewriter shorthand: + (dash dash) for en-dashes, (dash dash dash) for em-dashes. + +"i" + Educates em-dashes and en-dashes, using inverted old-school typewriter + shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes. + +"e" + Educates ellipses. + +"w" + Translates any instance of ``"`` into a normal double-quote character. + This should be of no interest to most people, but of particular interest + to anyone who writes their posts using Dreamweaver, as Dreamweaver + inexplicably uses this entity to represent a literal double-quote + character. SmartyPants only educates normal quotes, not entities (because + ordinarily, entities are used for the explicit purpose of representing the + specific character they represent). The "w" option must be used in + conjunction with one (or both) of the other quote options ("q" or "b"). + Thus, if you wish to apply all SmartyPants transformations (quotes, en- + and em-dashes, and ellipses) and also translate ``"`` entities into + regular quotes so SmartyPants can educate them, you should pass the + following to the smarty_pants attribute: + +The ``smartypants_forbidden_flavours`` list contains pyblosxom flavours for +which no Smarty Pants rendering will occur. + + +Caveats +======= + +Why You Might Not Want to Use Smart Quotes in Your Weblog +--------------------------------------------------------- + +For one thing, you might not care. + +Most normal, mentally stable individuals do not take notice of proper +typographic punctuation. Many design and typography nerds, however, break +out in a nasty rash when they encounter, say, a restaurant sign that uses +a straight apostrophe to spell "Joe's". + +If you're the sort of person who just doesn't care, you might well want to +continue not caring. Using straight quotes -- and sticking to the 7-bit +ASCII character set in general -- is certainly a simpler way to live. + +Even if you I *do* care about accurate typography, you still might want to +think twice before educating the quote characters in your weblog. One side +effect of publishing curly quote HTML entities is that it makes your +weblog a bit harder for others to quote from using copy-and-paste. What +happens is that when someone copies text from your blog, the copied text +contains the 8-bit curly quote characters (as well as the 8-bit characters +for em-dashes and ellipses, if you use these options). These characters +are not standard across different text encoding methods, which is why they +need to be encoded as HTML entities. + +People copying text from your weblog, however, may not notice that you're +using curly quotes, and they'll go ahead and paste the unencoded 8-bit +characters copied from their browser into an email message or their own +weblog. When pasted as raw "smart quotes", these characters are likely to +get mangled beyond recognition. + +That said, my own opinion is that any decent text editor or email client +makes it easy to stupefy smart quote characters into their 7-bit +equivalents, and I don't consider it my problem if you're using an +indecent text editor or email client. + + +Algorithmic Shortcomings +------------------------ + +One situation in which quotes will get curled the wrong way is when +apostrophes are used at the start of leading contractions. For example: + +``'Twas the night before Christmas.`` + +In the case above, SmartyPants will turn the apostrophe into an opening +single-quote, when in fact it should be a closing one. I don't think +this problem can be solved in the general case -- every word processor +I've tried gets this wrong as well. In such cases, it's best to use the +proper HTML entity for closing single-quotes (``’``) by hand. + + +Bugs +==== + +To file bug reports or feature requests (other than topics listed in the +Caveats section above) please send email to: mailto:smartypantspy@chad.org + +If the bug involves quotes being curled the wrong way, please send example +text to illustrate. + +To Do list +---------- + +- Provide a function for use within templates to quote anything at all. + + +Version History +=============== + +1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400 + - Fixed bug where blocks of precious unalterable text was instead + interpreted. Thanks to Le Roux and Dirk van Oosterbosch. + +1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400 + - Fix bogus magical quotation when there is no hint that the + user wants it, e.g., in "21st century". Thanks to Nathan Hamblen. + - Be smarter about quotes before terminating numbers in an en-dash'ed + range. + +1.5_1.4: Thu, 10 Feb 2005 20:24:36 -0500 + - Fix a date-processing bug, as reported by jacob childress. + - Begin a test-suite for ensuring correct output. + - Removed import of "string", since I didn't really need it. + (This was my first every Python program. Sue me!) + +1.5_1.3: Wed, 15 Sep 2004 18:25:58 -0400 + - Abort processing if the flavour is in forbidden-list. Default of + [ "rss" ] (Idea of Wolfgang SCHNERRING.) + - Remove stray virgules from en-dashes. Patch by Wolfgang SCHNERRING. + +1.5_1.2: Mon, 24 May 2004 08:14:54 -0400 + - Some single quotes weren't replaced properly. Diff-tesuji played + by Benjamin GEIGER. + +1.5_1.1: Sun, 14 Mar 2004 14:38:28 -0500 + - Support upcoming pyblosxom 0.9 plugin verification feature. + +1.5_1.0: Tue, 09 Mar 2004 08:08:35 -0500 + - Initial release + +Version Information +------------------- + +Version numbers will track the SmartyPants_ version numbers, with the addition +of an underscore and the smartypants.py version on the end. + +New versions will be available at `http://wiki.chad.org/SmartyPantsPy`_ + +.. _http://wiki.chad.org/SmartyPantsPy: http://wiki.chad.org/SmartyPantsPy + +Authors +======= + +`John Gruber`_ did all of the hard work of writing this software in Perl for +`Movable Type`_ and almost all of this useful documentation. `Chad Miller`_ +ported it to Python to use with Pyblosxom_. + + +Additional Credits +================== + +Portions of the SmartyPants original work are based on Brad Choate's nifty +MTRegex plug-in. `Brad Choate`_ also contributed a few bits of source code to +this plug-in. Brad Choate is a fine hacker indeed. + +`Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta +testing of the original SmartyPants. + +`Rael Dornfest`_ ported SmartyPants to Blosxom. + +.. _Brad Choate: http://bradchoate.com/ +.. _Jeremy Hedley: http://antipixel.com/ +.. _Charles Wiltgen: http://playbacktime.com/ +.. _Rael Dornfest: http://raelity.org/ + + +Copyright and License +===================== + +SmartyPants_ license:: + + Copyright (c) 2003 John Gruber + (http://daringfireball.net/) + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name "SmartyPants" nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + This software is provided by the copyright holders and contributors "as + is" and any express or implied warranties, including, but not limited + to, the implied warranties of merchantability and fitness for a + particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. + + +smartypants.py license:: + + smartypants.py is a derivative work of SmartyPants. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + This software is provided by the copyright holders and contributors "as + is" and any express or implied warranties, including, but not limited + to, the implied warranties of merchantability and fitness for a + particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. + + + +.. _John Gruber: http://daringfireball.net/ +.. _Chad Miller: http://web.chad.org/ + +.. _Pyblosxom: http://roughingit.subtlehints.net/pyblosxom +.. _SmartyPants: http://daringfireball.net/projects/smartypants/ +.. _Movable Type: http://www.movabletype.org/ + +""" + +default_smartypants_attr = "1" + +import re + +tags_to_skip_regex = re.compile(r"<(/)?(pre|code|kbd|script|math)[^>]*>", re.I) + + +def verify_installation(request): + return 1 + # assert the plugin is functional + + +def cb_story(args): + global default_smartypants_attr + + try: + forbidden_flavours = args["entry"]["smartypants_forbidden_flavours"] + except KeyError: + forbidden_flavours = [ "rss" ] + + try: + attributes = args["entry"]["smartypants_attributes"] + except KeyError: + attributes = default_smartypants_attr + + if attributes is None: + attributes = default_smartypants_attr + + entryData = args["entry"].getData() + + try: + if args["request"]["flavour"] in forbidden_flavours: + return + except KeyError: + if "<" in args["entry"]["body"][0:15]: # sniff the stream + return # abort if it looks like escaped HTML. FIXME + + # FIXME: make these configurable, perhaps? + args["entry"]["body"] = smartyPants(entryData, attributes) + args["entry"]["title"] = smartyPants(args["entry"]["title"], attributes) + + +### interal functions below here + +def smartyPants(text, attr=default_smartypants_attr): + convert_quot = False # should we translate " entities into normal quotes? + + # Parse attributes: + # 0 : do nothing + # 1 : set all + # 2 : set all, using old school en- and em- dash shortcuts + # 3 : set all, using inverted old school en and em- dash shortcuts + # + # q : quotes + # b : backtick quotes (``double'' only) + # B : backtick quotes (``double'' and `single') + # d : dashes + # D : old school dashes + # i : inverted old school dashes + # e : ellipses + # w : convert " entities to " for Dreamweaver users + + skipped_tag_stack = [] + do_dashes = "0" + do_backticks = "0" + do_quotes = "0" + do_ellipses = "0" + do_stupefy = "0" + + if attr == "0": + # Do nothing. + return text + elif attr == "1": + do_quotes = "1" + do_backticks = "1" + do_dashes = "1" + do_ellipses = "1" + elif attr == "2": + # Do everything, turn all options on, use old school dash shorthand. + do_quotes = "1" + do_backticks = "1" + do_dashes = "2" + do_ellipses = "1" + elif attr == "3": + # Do everything, turn all options on, use inverted old school dash shorthand. + do_quotes = "1" + do_backticks = "1" + do_dashes = "3" + do_ellipses = "1" + elif attr == "-1": + # Special "stupefy" mode. + do_stupefy = "1" + else: + for c in attr: + if c == "q": do_quotes = "1" + elif c == "b": do_backticks = "1" + elif c == "B": do_backticks = "2" + elif c == "d": do_dashes = "1" + elif c == "D": do_dashes = "2" + elif c == "i": do_dashes = "3" + elif c == "e": do_ellipses = "1" + elif c == "w": convert_quot = "1" + else: + pass + # ignore unknown option + + tokens = _tokenize(text) + result = [] + in_pre = False + + prev_token_last_char = "" + # This is a cheat, used to get some context + # for one-character tokens that consist of + # just a quote char. What we do is remember + # the last character of the previous text + # token, to use as context to curl single- + # character quote tokens correctly. + + for cur_token in tokens: + if cur_token[0] == "tag": + # Don't mess with quotes inside some tags. This does not handle self <closing/> tags! + result.append(cur_token[1]) + skip_match = tags_to_skip_regex.match(cur_token[1]) + if skip_match is not None: + if not skip_match.group(1): + skipped_tag_stack.append(skip_match.group(2).lower()) + in_pre = True + else: + if len(skipped_tag_stack) > 0: + if skip_match.group(2).lower() == skipped_tag_stack[-1]: + skipped_tag_stack.pop() + else: + pass + # This close doesn't match the open. This isn't XHTML. We should barf here. + if len(skipped_tag_stack) == 0: + in_pre = False + else: + t = cur_token[1] + last_char = t[-1:] # Remember last char of this token before processing. + if not in_pre: + t = processEscapes(t) + + if convert_quot != "0": + t = re.sub('"', '"', t) + + if do_dashes != "0": + if do_dashes == "1": + t = educateDashes(t) + if do_dashes == "2": + t = educateDashesOldSchool(t) + if do_dashes == "3": + t = educateDashesOldSchoolInverted(t) + + if do_ellipses != "0": + t = educateEllipses(t) + + # Note: backticks need to be processed before quotes. + if do_backticks != "0": + t = educateBackticks(t) + + if do_backticks == "2": + t = educateSingleBackticks(t) + + if do_quotes != "0": + if t == "'": + # Special case: single-character ' token + if re.match("\S", prev_token_last_char): + t = "’" + else: + t = "‘" + elif t == '"': + # Special case: single-character " token + if re.match("\S", prev_token_last_char): + t = "”" + else: + t = "“" + + else: + # Normal case: + t = educateQuotes(t) + + if do_stupefy == "1": + t = stupefyEntities(t) + + prev_token_last_char = last_char + result.append(t) + + return "".join(result) + + +def educateQuotes(str): + """ + Parameter: String. + + Returns: The string, with "educated" curly quote HTML entities. + + Example input: "Isn't this fun?" + Example output: “Isn’t this fun?” + """ + + punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" + + # Special case if the very first character is a quote + # followed by punctuation at a non-word-break. Close the quotes by brute force: + str = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""’""", str) + str = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""”""", str) + + # Special case for double sets of quotes, e.g.: + # <p>He said, "'Quoted' words in a larger quote."</p> + str = re.sub(r""""'(?=\w)""", """“‘""", str) + str = re.sub(r"""'"(?=\w)""", """‘“""", str) + + # Special case for decade abbreviations (the '80s): + str = re.sub(r"""\b'(?=\d{2}s)""", r"""’""", str) + + close_class = r"""[^\ \t\r\n\[\{\(\-]""" + dec_dashes = r"""–|—""" + + # Get most opening single quotes: + opening_single_quotes_regex = re.compile(r""" + ( + \s | # a whitespace char, or +   | # a non-breaking space entity, or + -- | # dashes, or + &[mn]dash; | # named dash entities + %s | # or decimal entities + &\#x201[34]; # or hex + ) + ' # the quote + (?=\w) # followed by a word character + """ % (dec_dashes,), re.VERBOSE) + str = opening_single_quotes_regex.sub(r"""\1‘""", str) + + closing_single_quotes_regex = re.compile(r""" + (%s) + ' + (?!\s | s\b | \d) + """ % (close_class,), re.VERBOSE) + str = closing_single_quotes_regex.sub(r"""\1’""", str) + + closing_single_quotes_regex = re.compile(r""" + (%s) + ' + (\s | s\b) + """ % (close_class,), re.VERBOSE) + str = closing_single_quotes_regex.sub(r"""\1’\2""", str) + + # Any remaining single quotes should be opening ones: + str = re.sub(r"""'""", r"""‘""", str) + + # Get most opening double quotes: + opening_double_quotes_regex = re.compile(r""" + ( + \s | # a whitespace char, or +   | # a non-breaking space entity, or + -- | # dashes, or + &[mn]dash; | # named dash entities + %s | # or decimal entities + &\#x201[34]; # or hex + ) + " # the quote + (?=\w) # followed by a word character + """ % (dec_dashes,), re.VERBOSE) + str = opening_double_quotes_regex.sub(r"""\1“""", str) + + # Double closing quotes: + closing_double_quotes_regex = re.compile(r""" + #(%s)? # character that indicates the quote should be closing + " + (?=\s) + """ % (close_class,), re.VERBOSE) + str = closing_double_quotes_regex.sub(r"""”""", str) + + closing_double_quotes_regex = re.compile(r""" + (%s) # character that indicates the quote should be closing + " + """ % (close_class,), re.VERBOSE) + str = closing_double_quotes_regex.sub(r"""\1”""", str) + + # Any remaining quotes should be opening ones. + str = re.sub(r'"', r"""“""", str) + + return str + + +def educateBackticks(str): + """ + Parameter: String. + Returns: The string, with ``backticks'' -style double quotes + translated into HTML curly quote entities. + Example input: ``Isn't this fun?'' + Example output: “Isn't this fun?” + """ + + str = re.sub(r"""``""", r"""“""", str) + str = re.sub(r"""''""", r"""”""", str) + return str + + +def educateSingleBackticks(str): + """ + Parameter: String. + Returns: The string, with `backticks' -style single quotes + translated into HTML curly quote entities. + + Example input: `Isn't this fun?' + Example output: ‘Isn’t this fun?’ + """ + + str = re.sub(r"""`""", r"""‘""", str) + str = re.sub(r"""'""", r"""’""", str) + return str + + +def educateDashes(str): + """ + Parameter: String. + + Returns: The string, with each instance of "--" translated to + an em-dash HTML entity. + """ + + str = re.sub(r"""---""", r"""–""", str) # en (yes, backwards) + str = re.sub(r"""--""", r"""—""", str) # em (yes, backwards) + return str + + +def educateDashesOldSchool(str): + """ + Parameter: String. + + Returns: The string, with each instance of "--" translated to + an en-dash HTML entity, and each "---" translated to + an em-dash HTML entity. + """ + + str = re.sub(r"""---""", r"""—""", str) # em (yes, backwards) + str = re.sub(r"""--""", r"""–""", str) # en (yes, backwards) + return str + + +def educateDashesOldSchoolInverted(str): + """ + Parameter: String. + + Returns: The string, with each instance of "--" translated to + an em-dash HTML entity, and each "---" translated to + an en-dash HTML entity. Two reasons why: First, unlike the + en- and em-dash syntax supported by + EducateDashesOldSchool(), it's compatible with existing + entries written before SmartyPants 1.1, back when "--" was + only used for em-dashes. Second, em-dashes are more + common than en-dashes, and so it sort of makes sense that + the shortcut should be shorter to type. (Thanks to Aaron + Swartz for the idea.) + """ + str = re.sub(r"""---""", r"""–""", str) # em + str = re.sub(r"""--""", r"""—""", str) # en + return str + + + +def educateEllipses(str): + """ + Parameter: String. + Returns: The string, with each instance of "..." translated to + an ellipsis HTML entity. + + Example input: Huh...? + Example output: Huh…? + """ + + str = re.sub(r"""\.\.\.""", r"""…""", str) + str = re.sub(r"""\. \. \.""", r"""…""", str) + return str + + +def stupefyEntities(str): + """ + Parameter: String. + Returns: The string, with each SmartyPants HTML entity translated to + its ASCII counterpart. + + Example input: “Hello — world.” + Example output: "Hello -- world." + """ + + str = re.sub(r"""–""", r"""-""", str) # en-dash + str = re.sub(r"""—""", r"""--""", str) # em-dash + + str = re.sub(r"""‘""", r"""'""", str) # open single quote + str = re.sub(r"""’""", r"""'""", str) # close single quote + + str = re.sub(r"""“""", r'''"''', str) # open double quote + str = re.sub(r"""”""", r'''"''', str) # close double quote + + str = re.sub(r"""…""", r"""...""", str)# ellipsis + + return str + + +def processEscapes(str): + r""" + Parameter: String. + Returns: The string, with after processing the following backslash + escape sequences. This is useful if you want to force a "dumb" + quote or other character to appear. + + Escape Value + ------ ----- + \\ \ + \" " + \' ' + \. . + \- - + \` ` + """ + str = re.sub(r"""\\\\""", r"""\""", str) + str = re.sub(r'''\\"''', r""""""", str) + str = re.sub(r"""\\'""", r"""'""", str) + str = re.sub(r"""\\\.""", r""".""", str) + str = re.sub(r"""\\-""", r"""-""", str) + str = re.sub(r"""\\`""", r"""`""", str) + + return str + + +def _tokenize(str): + """ + Parameter: String containing HTML markup. + Returns: Reference to an array of the tokens comprising the input + string. Each token is either a tag (possibly with nested, + tags contained therein, such as <a href="<MTFoo>">, or a + run of text between tags. Each element of the array is a + two-element array; the first is either 'tag' or 'text'; + the second is the actual value. + + Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin. + <http://www.bradchoate.com/past/mtregex.php> + """ + + tokens = [] + + #depth = 6 + #nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth) + #match = r"""(?: <! ( -- .*? -- \s* )+ > ) | # comments + # (?: <\? .*? \?> ) | # directives + # %s # nested tags """ % (nested_tags,) + tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""") + + token_match = tag_soup.search(str) + + previous_end = 0 + while token_match is not None: + if token_match.group(1): + tokens.append(['text', token_match.group(1)]) + + tokens.append(['tag', token_match.group(2)]) + + previous_end = token_match.end() + token_match = tag_soup.search(str, token_match.end()) + + if previous_end < len(str): + tokens.append(['text', str[previous_end:]]) + + return tokens + + + +if __name__ == "__main__": + + import locale + + try: + locale.setlocale(locale.LC_ALL, '') + except: + pass + + from docutils.core import publish_string + docstring_html = publish_string(__doc__, writer_name='html') + + print docstring_html + + + # Unit test output goes out stderr. No worries. + import unittest + sp = smartyPants + + class TestSmartypantsAllAttributes(unittest.TestCase): + # the default attribute is "1", which means "all". + + def test_dates(self): + self.assertEqual(sp("1440-80's"), "1440-80’s") + self.assertEqual(sp("1440-'80s"), "1440-‘80s") + self.assertEqual(sp("1440---'80s"), "1440–‘80s") + self.assertEqual(sp("1960s"), "1960s") # no effect. + self.assertEqual(sp("1960's"), "1960’s") + self.assertEqual(sp("one two '60s"), "one two ‘60s") + self.assertEqual(sp("'60s"), "‘60s") + + def test_skip_tags(self): + self.assertEqual( + sp("""<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>"""), + """<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>""") + self.assertEqual( + sp("""<p>He said "Let's write some code." This code here <code>if True:\n\tprint "Okay"</code> is python code.</p>"""), + """<p>He said “Let’s write some code.” This code here <code>if True:\n\tprint "Okay"</code> is python code.</p>""") + + + def test_ordinal_numbers(self): + self.assertEqual(sp("21st century"), "21st century") # no effect. + self.assertEqual(sp("3rd"), "3rd") # no effect. + + def test_educated_quotes(self): + self.assertEqual(sp('''"Isn't this fun?"'''), '''“Isn’t this fun?”''') + + unittest.main() + + + + +__author__ = "Chad Miller <smartypantspy@chad.org>" +__version__ = "1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400" +__url__ = "http://wiki.chad.org/SmartyPantsPy" +__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom" From d215bd4b464aa91a77815064b929b0ea6ad74ca7 Mon Sep 17 00:00:00 2001 From: GRiker <griker@hotmail.com> Date: Tue, 14 Sep 2010 07:31:49 -0600 Subject: [PATCH 25/43] GwR change id to calibre_jacket --- src/calibre/ebooks/oeb/transforms/flatcss.py | 2 +- src/calibre/ebooks/oeb/transforms/jacket.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index 9a5ff36d55..7212bd33c6 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -289,7 +289,7 @@ class CSSFlattener(object): if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ('p', 'div'): - if item_id != 'jacket' or self.context.output_profile.name == 'Kindle': + if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle': for prop in ('margin', 'padding', 'border'): for edge in ('top', 'bottom'): cssdict['%s-%s'%(prop, edge)] = '0pt' diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index 309a7fd7b6..6786d7cf9c 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -181,7 +181,7 @@ class Jacket(object): return soup.renderContents() - id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml') + id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml') from calibre.ebooks.oeb.base import RECOVER_PARSER, XPath try: From abd1dba1d3b63192ff0ef2d9e5e7c408ecd89844 Mon Sep 17 00:00:00 2001 From: GRiker <griker@hotmail.com> Date: Tue, 14 Sep 2010 09:43:21 -0600 Subject: [PATCH 26/43] added jacket CSS --- resources/jacket/stylesheet.css | 116 ++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 resources/jacket/stylesheet.css diff --git a/resources/jacket/stylesheet.css b/resources/jacket/stylesheet.css new file mode 100644 index 0000000000..204342ed22 --- /dev/null +++ b/resources/jacket/stylesheet.css @@ -0,0 +1,116 @@ +/* +** Book Jacket generation +** +** The template for Book Jackets is in ebooks.oeb.transforms.jacket:JACKET_TEMPLATE +** This CSS is inserted into the generated HTML at conversion time +** +** Users can control parts of the presentation of a generated book jacket by +** editing this file. +** +** The general form of a generated Book Jacket: +** +** Title +** Series: series [series_index] +** Published: year_of_publication +** Rating: #_of_stars +** Tags: tag1, tag2, tag3 ... +** +** Comments +** +** If a book does not have Series information, a date of publication, a rating or tags +** the corresponding row is automatically removed from the generated book jacket. +*/ + +/* +** Banner +** Only affects EPUB, kindle ignores this type of formatting +*/ +.cbj_banner { + background: #eee; + border: thin solid black; + margin: 1em; + padding: 1em; + -webkit-border-radius:8px; + } + +/* +** Title +*/ +.cbj_title { + font-size: x-large; + text-align: center; + } + +/* +** Table containing Series, Publication Year, Rating and Tags +*/ +table.cbj_header { + width: 100%; + } + +/* +** General formatting for banner labels +*/ +table.cbj_header td.cbj_label { + font-family: sans-serif; + font-weight: bold; + text-align: right; + width: 40%; + } + +/* +** General formatting for banner content +*/ +table.cbj_header td.cbj_content { + font-family: sans-serif; + text-align: left; + width:60%; + } + +/* +** To skip a banner item (Series|Published|Rating|Tags), +** edit the appropriate CSS rule below. +*/ +table.cbj_header tr.cbj_series { + /* Uncomment the next line to remove 'Series' from banner section */ + /* display:none; */ + } + +table.cbj_header tr.cbj_pubdate { + /* Uncomment the next line to remove 'Published' from banner section */ + /* display:none; */ + } + +table.cbj_header tr.cbj_rating { + /* Uncomment the next line to remove 'Rating' from banner section */ + /* display:none; */ + } + +table.cbj_header tr.cbj_tags { + /* Uncomment the next line to remove 'Tags' from banner section */ + /* display:none; */ + } + +hr { + /* This rule controls formatting for any hr elements contained in the jacket */ + border-top: 0px solid white; + border-right: 0px solid white; + border-bottom: 2px solid black; + border-left: 0px solid white; + margin-left: 10%; + width: 80%; + } + +.cbj_footer { + font-family: sans-serif; + font-size: small; + margin-top: 8px; + text-align: center; + } +.cbj_smallcaps { + font-size: 90%; + } + +.cbj_comments { + font-family: sans-serif; + } From 47141e527e45880d2b143c0ec0c7f73bec30efc9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Tue, 14 Sep 2010 13:38:03 -0600 Subject: [PATCH 27/43] Fix handling of non-ASCII chars when redering series in default EPUB cover --- src/calibre/ebooks/oeb/transforms/cover.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/transforms/cover.py b/src/calibre/ebooks/oeb/transforms/cover.py index 59b42df68a..532c9bbc03 100644 --- a/src/calibre/ebooks/oeb/transforms/cover.py +++ b/src/calibre/ebooks/oeb/transforms/cover.py @@ -99,7 +99,8 @@ class CoverManager(object): series_string = None if m.series and m.series_index: series_string = _('Book %s of %s')%( - fmt_sidx(m.series_index[0], use_roman=True), m.series[0]) + fmt_sidx(m.series_index[0], use_roman=True), + unicode(m.series[0])) try: from calibre.ebooks import calibre_cover From 3ef0192e8d058fb2b904a0cef12979cd9a4951e1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Tue, 14 Sep 2010 13:39:30 -0600 Subject: [PATCH 28/43] Fix #6756 (Slate recipe does not work) --- resources/recipes/slate.recipe | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/resources/recipes/slate.recipe b/resources/recipes/slate.recipe index c03255d2df..9da1c4da78 100644 --- a/resources/recipes/slate.recipe +++ b/resources/recipes/slate.recipe @@ -27,9 +27,6 @@ class PeriodicalNameHere(BasicNewsRecipe): encoding = None language = 'en' - - - # Method variables for customizing feed parsing summary_length = 250 use_embedded_content = None @@ -45,13 +42,26 @@ class PeriodicalNameHere(BasicNewsRecipe): match_regexps = [] # The second entry is for 'Big Money', which comes from a different site, uses different markup - keep_only_tags = [dict(attrs={ 'id':['article_top', 'article_body']}), + keep_only_tags = [dict(attrs={ 'id':['article_top', 'article_body', 'story']}), dict(attrs={ 'id':['content']}) ] # The second entry is for 'Big Money', which comes from a different site, uses different markup - remove_tags = [dict(attrs={ 'id':['toolbox','recommend_tab','insider_ad_wrapper', - 'article_bottom_tools_cntr','fray_article_discussion', 'fray_article_links','bottom_sponsored_links','author_bio', - 'bizbox_links_bottom','ris_links_wrapper','BOXXLE']}), + remove_tags = [dict(attrs={ 'id':[ + 'add_comments_button', + 'article_bottom_tools', + 'article_bottom_tools_cntr', + 'bizbox_links_bottom', + 'BOXXLE', + 'comments_button', + 'comments-to-fray', + 'fbog_article_bottom_cntr', + 'fray_article_discussion', 'fray_article_links','bottom_sponsored_links','author_bio', + 'insider_ad_wrapper', + 'js_kit_cntr', + 'recommend_tab', + 'ris_links_wrapper', + 'toolbox', + ]}), dict(attrs={ 'id':['content-top','service-links-bottom','hed']}) ] excludedDescriptionKeywords = ['Slate V','Twitter feed','podcast'] @@ -339,8 +349,8 @@ class PeriodicalNameHere(BasicNewsRecipe): # Change <h1> to <h2> headline = soup.find("h1") - tag = headline.find("span") - tag.name = 'div' + #tag = headline.find("span") + #tag.name = 'div' if headline is not None : h2tag = Tag(soup, "h2") From 786e904186879f37ffd86720dea4c010433364e0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Tue, 14 Sep 2010 13:49:32 -0600 Subject: [PATCH 29/43] ... --- src/calibre/ebooks/oeb/transforms/jacket.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index dc1d2fea41..dd1b14a736 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -81,6 +81,7 @@ class Jacket(object): if XPath(JACKET_XPATH)(x.data): self.remove_images(x, limit=sys.maxint) self.oeb.manifest.remove(x) + self.log('Removed existing jacket') break def __call__(self, oeb, opts, metadata): From d731515ad452f24872d6abcee8516169ac2c6e57 Mon Sep 17 00:00:00 2001 From: GRiker <griker@hotmail.com> Date: Tue, 14 Sep 2010 14:39:13 -0600 Subject: [PATCH 30/43] GwR revisions --- src/calibre/customize/profiles.py | 6 ++++-- src/calibre/ebooks/oeb/transforms/jacket.py | 10 ++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 1563f764ca..b9a159ee7d 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -555,9 +555,11 @@ class KindleOutput(OutputProfile): periodical_date_in_title = False @classmethod +# def tags_to_string(cls, tags): +# return u'%s <br/><span style="color: white">%s</span>' % (', '.join(tags), +# 'ttt '.join(tags)+'ttt ') def tags_to_string(cls, tags): - return u'%s <br/><span style="color: white">%s</span>' % (', '.join(tags), - 'ttt '.join(tags)+'ttt ') + return u'%s' % (', '.join(tags)) class KindleDXOutput(OutputProfile): diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index dc1d2fea41..ff192ca537 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -55,10 +55,11 @@ class Jacket(object): img = I(fname, data=True) if self.opts.output_profile.short_name == 'kindle': + # Original star.png size: 24x23 + # Needs to be scaled by half fname = 'star.jpg' img = save_cover_data_to(img, fname, - return_data=True) - + return_data=True, resize_to=[12,12]) id, href = self.oeb.manifest.generate('calibre_jacket_star', fname) self.oeb.manifest.add(id, href, guess_type(fname)[0], data=img) @@ -97,7 +98,7 @@ class Jacket(object): # Render Jacket {{{ -def get_rating(rating, href): +def get_rating(rating, href, output_profile): ans = '' try: num = float(rating)/2 @@ -114,6 +115,7 @@ def get_rating(rating, href): href, int(num))) else: ans = u' '.join(u'\u2605') + return ans @@ -138,7 +140,7 @@ def render_jacket(mi, output_profile, star_href=None, except: pubdate = '' - rating = get_rating(mi.rating, star_href) + rating = get_rating(mi.rating, star_href, output_profile) tags = mi.tags if mi.tags else alt_tags if tags: From a5ccbbcb216aee219716380d9a74d7a1fe898699 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Tue, 14 Sep 2010 17:49:43 -0600 Subject: [PATCH 31/43] Windows drivers for PRS-(3/6)50 --- src/calibre/devices/prs505/driver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py index c55936be2d..4c14565c2d 100644 --- a/src/calibre/devices/prs505/driver.py +++ b/src/calibre/devices/prs505/driver.py @@ -36,15 +36,15 @@ class PRS505(USBMS): VENDOR_NAME = 'SONY' WINDOWS_MAIN_MEM = re.compile( r'(PRS-(505|300|500))|' - r'(PRS-((700[#/])|((6|9)00&)))' + r'(PRS-((700[#/])|((6|9|3)(0|5)0&)))' ) WINDOWS_CARD_A_MEM = re.compile( r'(PRS-(505|500)[#/]\S+:MS)|' - r'(PRS-((700[/#]\S+:)|((6|9)00[#_]))MS)' + r'(PRS-((700[/#]\S+:)|((6|9)(0|5)0[#_]))MS)' ) WINDOWS_CARD_B_MEM = re.compile( r'(PRS-(505|500)[#/]\S+:SD)|' - r'(PRS-((700[/#]\S+:)|((6|9)00[#_]))SD)' + r'(PRS-((700[/#]\S+:)|((6|9)(0|5)0[#_]))SD)' ) From 724acba1d825fae2bab90b9a49102c2d0889b62a Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Tue, 14 Sep 2010 20:35:51 -0600 Subject: [PATCH 32/43] Tweaks to jacket page and new ratings icon --- imgsrc/rating.svg | 589 ++++++++++++++++++++ resources/images/rating.png | Bin 0 -> 10827 bytes resources/images/star.png | Bin 1737 -> 0 bytes src/calibre/ebooks/oeb/transforms/jacket.py | 49 +- src/calibre/gui2/tag_view.py | 2 +- 5 files changed, 609 insertions(+), 31 deletions(-) create mode 100644 imgsrc/rating.svg create mode 100644 resources/images/rating.png delete mode 100644 resources/images/star.png diff --git a/imgsrc/rating.svg b/imgsrc/rating.svg new file mode 100644 index 0000000000..d289c71b99 --- /dev/null +++ b/imgsrc/rating.svg @@ -0,0 +1,589 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> +<svg + xmlns:i="http://ns.adobe.com/AdobeIllustrator/10.0/" + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://web.resource.org/cc/" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:xlink="http://www.w3.org/1999/xlink" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + width="128" + height="128" + id="svg2" + sodipodi:version="0.32" + inkscape:version="0.45+devel" + version="1.0" + sodipodi:docbase="/Users/david/Progetti/oxygen-svn/theme/svg/actions" + sodipodi:docname="rating.svgz" + inkscape:output_extension="org.inkscape.output.svgz.inkscape" + inkscape:export-filename="/home/pinheiro/pics/oxygen/scalable/actions/rating.png" + inkscape:export-xdpi="11.25" + inkscape:export-ydpi="11.25"> + <defs + id="defs4"> + <linearGradient + id="linearGradient3946"> + <stop + style="stop-color:#552b00;stop-opacity:1;" + offset="0" + id="stop3948" /> + <stop + style="stop-color:#673400;stop-opacity:0;" + offset="1" + id="stop3950" /> + </linearGradient> + <linearGradient + inkscape:collect="always" + id="linearGradient3844"> + <stop + style="stop-color:#faff64;stop-opacity:1;" + offset="0" + id="stop3846" /> + <stop + style="stop-color:#faff64;stop-opacity:0;" + offset="1" + id="stop3848" /> + </linearGradient> + <linearGradient + inkscape:collect="always" + id="linearGradient3379"> + <stop + style="stop-color:#fffc07;stop-opacity:1;" + offset="0" + id="stop3381" /> + <stop + style="stop-color:#fffc07;stop-opacity:0;" + offset="1" + id="stop3383" /> + </linearGradient> + <linearGradient + inkscape:collect="always" + id="linearGradient3363"> + <stop + style="stop-color:#ffffff;stop-opacity:1;" + offset="0" + id="stop3365" /> + <stop + style="stop-color:#ffffff;stop-opacity:0;" + offset="1" + id="stop3367" /> + </linearGradient> + <linearGradient + id="linearGradient3309"> + <stop + style="stop-color:#f8ff8a;stop-opacity:1" + offset="0" + id="stop3311" /> + <stop + style="stop-color:#ffffff;stop-opacity:0;" + offset="1" + id="stop3313" /> + </linearGradient> + <linearGradient + id="linearGradient26907" + gradientUnits="userSpaceOnUse" + x1="-84.002403" + y1="-383.9971" + x2="-12.0029" + y2="-383.9971" + gradientTransform="matrix(0,1,-1,0,-39.9985,140.0029)"> + <stop + offset="0" + style="stop-color:#888a85;stop-opacity:1;" + id="stop26909" /> + <stop + offset="1" + style="stop-color:#2e3436;stop-opacity:1;" + id="stop26911" /> + </linearGradient> + <linearGradient + gradientTransform="matrix(0,1,-1,0,-39.9985,140.0029)" + y2="-383.9975" + x2="-23.516129" + y1="-383.9971" + x1="-84.002403" + gradientUnits="userSpaceOnUse" + id="linearGradient3711"> + <stop + id="stop3713" + style="stop-color:white;stop-opacity:1;" + offset="0" /> + <stop + id="stop3715" + style="stop-color:white;stop-opacity:0;" + offset="1" /> + </linearGradient> + <linearGradient + id="linearGradient3081"> + <stop + id="stop3083" + offset="0" + style="stop-color:#28691f;stop-opacity:1;" /> + <stop + id="stop3085" + offset="1" + style="stop-color:#00bf00;stop-opacity:1;" /> + </linearGradient> + <linearGradient + id="linearGradient3290"> + <stop + style="stop-color:yellow;stop-opacity:1;" + offset="0" + id="stop3292" /> + <stop + style="stop-color:#f07800;stop-opacity:1;" + offset="1" + id="stop3294" /> + </linearGradient> + <linearGradient + id="linearGradient3638"> + <stop + style="stop-color:#ffffff;stop-opacity:0;" + offset="0" + id="stop3640" /> + <stop + id="stop3661" + offset="0.06868132" + style="stop-color:#ffffff;stop-opacity:1;" /> + <stop + id="stop3659" + offset="0.5" + style="stop-color:#ffffff;stop-opacity:1;" /> + <stop + style="stop-color:#ffffff;stop-opacity:0;" + offset="1" + id="stop3642" /> + </linearGradient> + <linearGradient + id="linearGradient1563"> + <stop + id="stop1565" + offset="0" + style="stop-color:#ffffff;stop-opacity:1;" /> + <stop + id="stop1567" + offset="1" + style="stop-color:white;stop-opacity:0;" /> + </linearGradient> + <linearGradient + id="linearGradient3273"> + <stop + id="stop3275" + offset="0" + style="stop-color:#ffffff;stop-opacity:0.55035973;" /> + <stop + id="stop3277" + offset="1" + style="stop-color:#ffffff;stop-opacity:0;" /> + </linearGradient> + <linearGradient + id="linearGradient12948"> + <stop + style="stop-color:#ffffff;stop-opacity:1;" + offset="0" + id="stop12950" /> + <stop + style="stop-color:#c0c0c0;stop-opacity:0;" + offset="1" + id="stop12952" /> + </linearGradient> + <radialGradient + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(1,0,0,0.111111,0,138.1081)" + r="64.796692" + fy="177.29686" + fx="80.738739" + cy="155.37218" + cx="80.738739" + id="radialGradient5079" + xlink:href="#linearGradient5073" + inkscape:collect="always" /> + <linearGradient + id="linearGradient5073" + inkscape:collect="always"> + <stop + id="stop5075" + offset="0" + style="stop-color:#000000;stop-opacity:1;" /> + <stop + id="stop5077" + offset="1" + style="stop-color:#000000;stop-opacity:0;" /> + </linearGradient> + <foreignObject + id="foreignObject7221" + height="1" + width="1" + y="0" + x="0" + requiredExtensions="http://ns.adobe.com/AdobeIllustrator/10.0/"> + <i:pgfRef + xlink:href="#adobe_illustrator_pgf" /> + </foreignObject> + <linearGradient + id="XMLID_1_" + gradientUnits="userSpaceOnUse" + x1="95.693398" + y1="141.1738" + x2="32.308601" + y2="77.789001"> + <stop + offset="0" + style="stop-color:#ffd50a;stop-opacity:1;" + id="stop7227" /> + <stop + offset="1" + style="stop-color:#8d4000;stop-opacity:1;" + id="stop7233" /> + </linearGradient> + <linearGradient + id="XMLID_3_" + gradientUnits="userSpaceOnUse" + x1="63.9995" + y1="92.865196" + x2="63.9995" + y2="120.8652" + gradientTransform="translate(175.0067,11.74752)"> + <stop + offset="0" + style="stop-color:#888A85" + id="stop7261" /> + <stop + offset="0.3226" + style="stop-color:#A6A7A3" + id="stop7263" /> + <stop + offset="1" + style="stop-color:#EEEEEC" + id="stop7265" /> + </linearGradient> + <linearGradient + id="XMLID_4_" + gradientUnits="userSpaceOnUse" + x1="64.000504" + y1="108.8652" + x2="64.000504" + y2="92.865196"> + <stop + offset="0" + style="stop-color:#EEEEEC" + id="stop7270" /> + <stop + offset="1" + style="stop-color:#FFFFFF" + id="stop7272" /> + </linearGradient> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3081" + id="linearGradient2149" + gradientUnits="userSpaceOnUse" + x1="62.112335" + y1="90.513916" + x2="67.887672" + y2="39.095695" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient26907" + id="linearGradient3226" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(0,1,-1,0,-39.9985,140.0029)" + x1="-70.002899" + y1="-383.9971" + x2="-11.91648" + y2="-383.9971" /> + <radialGradient + inkscape:collect="always" + xlink:href="#linearGradient3711" + id="radialGradient3228" + gradientUnits="userSpaceOnUse" + cx="343.99899" + cy="92" + fx="343.99899" + fy="92" + r="36" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3711" + id="linearGradient3230" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(0,1.022977,-1.022977,0,111.9686,137.8125)" + x1="-88.058083" + y1="-131.93112" + x2="-45.096584" + y2="-131.93112" /> + <linearGradient + inkscape:collect="always" + xlink:href="#XMLID_1_" + id="linearGradient2898" + x1="64.07962" + y1="-14.227339" + x2="64.07962" + y2="120.44466" + gradientUnits="userSpaceOnUse" /> + <radialGradient + inkscape:collect="always" + xlink:href="#linearGradient3290" + id="radialGradient2906" + cx="69.526619" + cy="60.115833" + fx="69.526619" + fy="89.655701" + r="111.65377" + gradientTransform="matrix(0.9439139,-0.3301918,0.332644,0.9509241,-16.097695,27.249949)" + gradientUnits="userSpaceOnUse" /> + <radialGradient + inkscape:collect="always" + xlink:href="#linearGradient3290" + id="radialGradient3304" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(0.5227399,0,-1.554444e-8,0.5266221,349.81061,60.575712)" + cx="69.526619" + cy="60.115833" + fx="69.526619" + fy="60.115833" + r="111.65377" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3309" + id="linearGradient3315" + x1="219.22163" + y1="11.902248" + x2="219.22163" + y2="136.85997" + gradientUnits="userSpaceOnUse" + gradientTransform="translate(-170.08594,0)" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient1563" + id="linearGradient3345" + gradientUnits="userSpaceOnUse" + gradientTransform="translate(-37.771032,-0.1213203)" + x1="278.47162" + y1="77.652245" + x2="200.17728" + y2="31.10997" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3363" + id="linearGradient3369" + x1="177.42397" + y1="22.377773" + x2="177.60074" + y2="93.022789" + gradientUnits="userSpaceOnUse" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3379" + id="linearGradient3385" + x1="216.88614" + y1="122.5867" + x2="216.88614" + y2="37.969955" + gradientUnits="userSpaceOnUse" + gradientTransform="translate(-152,0)" /> + <filter + inkscape:collect="always" + id="filter3391"> + <feGaussianBlur + inkscape:collect="always" + stdDeviation="0.55939545" + id="feGaussianBlur3393" /> + </filter> + <filter + inkscape:collect="always" + id="filter3401"> + <feGaussianBlur + inkscape:collect="always" + stdDeviation="0.11157909" + id="feGaussianBlur3403" /> + </filter> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3363" + id="linearGradient3800" + x1="63.948792" + y1="12.034382" + x2="67.219337" + y2="12.034382" + gradientUnits="userSpaceOnUse" + spreadMethod="reflect" /> + <filter + inkscape:collect="always" + id="filter3838" + x="-0.17816916" + width="1.3563383" + y="-0.15506857" + height="1.3101371"> + <feGaussianBlur + inkscape:collect="always" + stdDeviation="0.46259975" + id="feGaussianBlur3840" /> + </filter> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3844" + id="linearGradient3850" + x1="28.637825" + y1="120.84999" + x2="31.289474" + y2="122.08743" + gradientUnits="userSpaceOnUse" + spreadMethod="reflect" /> + <filter + inkscape:collect="always" + id="filter3928"> + <feGaussianBlur + inkscape:collect="always" + stdDeviation="0.18346262" + id="feGaussianBlur3930" /> + </filter> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3844" + id="linearGradient3934" + gradientUnits="userSpaceOnUse" + spreadMethod="reflect" + x1="28.637825" + y1="120.84999" + x2="31.289474" + y2="122.08743" /> + <radialGradient + inkscape:collect="always" + xlink:href="#linearGradient3946" + id="radialGradient3956" + cx="64.07962" + cy="114.47154" + fx="64.07962" + fy="114.47154" + r="60.700505" + gradientTransform="matrix(0.2787307,0,0,0.2689969,46.218665,81.520439)" + gradientUnits="userSpaceOnUse" /> + <filter + inkscape:collect="always" + id="filter3975"> + <feGaussianBlur + inkscape:collect="always" + stdDeviation="1.2948866" + id="feGaussianBlur3977" /> + </filter> + </defs> + <sodipodi:namedview + id="base" + pagecolor="#ffffff" + bordercolor="#666666" + borderopacity="1.0" + inkscape:pageopacity="0.0" + inkscape:pageshadow="2" + inkscape:zoom="1.4142136" + inkscape:cx="-57.231582" + inkscape:cy="95.226607" + inkscape:document-units="px" + inkscape:current-layer="layer1" + inkscape:window-width="1247" + inkscape:window-height="816" + inkscape:window-x="388" + inkscape:window-y="110" + showgrid="true" + gridspacingx="4px" + gridspacingy="4px" + gridempspacing="0" + inkscape:grid-points="true"> + <inkscape:grid + type="xygrid" + id="grid2302" + spacingx="4px" + spacingy="4px" + empspacing="2" /> + </sodipodi:namedview> + <metadata + id="metadata7"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + </cc:Work> + </rdf:RDF> + </metadata> + <g + inkscape:label="Layer 1" + inkscape:groupmode="layer" + id="layer1"> + <path + id="path3961" + d="M 64.03125 8 C 56.162818 8.0100117 46.828561 34.554451 40.46875 39.1875 C 34.10894 43.820548 5.9844574 44.576082 3.5625 52.0625 C 1.1405426 59.548917 23.465249 76.613524 25.90625 84.09375 C 28.347251 91.57398 20.40967 118.5394 26.78125 123.15625 C 33.15283 127.7731 56.287818 111.82251 64.15625 111.8125 C 72.024682 111.80249 95.202691 127.69555 101.5625 123.0625 C 107.92231 118.42945 99.890544 91.486414 102.3125 84 C 104.73446 76.513583 127.03475 59.38648 124.59375 51.90625 C 122.15275 44.426021 94.027829 43.741849 87.65625 39.125 C 81.28467 34.508152 71.899685 7.9899879 64.03125 8 z M 64.03125 11.90625 C 64.208046 12.045423 65.56776 12.712264 67.15625 14.65625 C 68.97167 16.877947 71.031426 20.210059 73.0625 23.75 C 75.093573 27.28994 77.113982 31.048819 79.09375 34.3125 C 81.073519 37.576182 82.75512 40.328991 85.40625 42.25 C 88.057376 44.171009 91.18831 44.91637 94.90625 45.78125 C 98.624192 46.646129 102.81606 47.391152 106.8125 48.21875 C 110.80894 49.046347 114.60465 49.966787 117.28125 51 C 119.62327 51.904061 120.71845 53.000764 120.90625 53.125 C 120.82618 53.333062 120.57672 54.794782 119.21875 56.90625 C 117.66679 59.319356 115.1453 62.318181 112.40625 65.34375 C 109.66721 68.369316 106.71091 71.452346 104.21875 74.34375 C 101.72659 77.235155 99.632744 79.697501 98.625 82.8125 C 97.617256 85.927495 97.892393 89.134266 98.21875 92.9375 C 98.545107 96.740738 99.114622 100.97466 99.5625 105.03125 C 100.01038 109.08783 100.31178 112.97888 100.15625 115.84375 C 100.02016 118.35052 99.34151 119.69095 99.28125 119.90625 C 99.057443 119.89786 97.552762 120.17027 95.125 119.53125 C 92.350417 118.80093 88.723899 117.29504 85 115.625 C 81.276103 113.95497 77.426259 112.10169 73.90625 110.625 C 70.386242 109.1483 67.4302 107.93334 64.15625 107.9375 C 60.882303 107.94167 57.891241 109.1706 54.375 110.65625 C 50.858761 112.1419 47.032137 114.00799 43.3125 115.6875 C 39.592862 117.367 35.960216 118.85638 33.1875 119.59375 C 30.761373 120.23895 29.286908 119.99088 29.0625 120 C 29.004012 119.7864 28.29872 118.4439 28.15625 115.9375 C 27.993428 113.07303 28.281199 109.18271 28.71875 105.125 C 29.156299 101.0673 29.714573 96.835302 30.03125 93.03125 C 30.347928 89.227198 30.609418 85.987425 29.59375 82.875 C 28.578082 79.762573 26.468263 77.322553 23.96875 74.4375 C 21.469238 71.552452 18.527988 68.487339 15.78125 65.46875 C 13.034512 62.450158 10.495601 59.471649 8.9375 57.0625 C 7.5741618 54.954496 7.3592053 53.457399 7.28125 53.25 C 7.2962039 53.337785 8.2681026 52.126785 10.84375 51.125 C 13.517705 50.084977 17.34943 49.150265 21.34375 48.3125 C 25.33807 47.474737 29.534272 46.749339 33.25 45.875 C 36.96573 45.000663 40.103767 44.24025 42.75 42.3125 C 45.396234 40.384748 47.059794 37.612458 49.03125 34.34375 C 51.002705 31.075042 53.009191 27.326347 55.03125 23.78125 C 57.053308 20.236153 59.096493 16.88256 60.90625 14.65625 C 62.489787 12.708229 63.857465 12.044552 64.03125 11.90625 z " + style="opacity:1;fill:url(#linearGradient2898);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80851269000000059;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1;filter:url(#filter3975)" /> + <path + transform="matrix(0.4934214,0.1726044,-0.1726044,0.4934214,42.377875,49.908537)" + d="M 153.09403,94.713757 C 144.53658,107.09689 92.616372,93.013297 78.414631,98.001518 C 64.21289,102.98974 32.50348,146.4474 18.082028,142.13539 C 3.6605746,137.82337 1.0106378,84.092245 -8.1220219,72.127031 C -17.254681,60.161818 -68.384124,43.433534 -68.739625,28.385431 C -69.095125,13.337327 -18.812666,-5.7867426 -10.255219,-18.169872 C -1.697772,-30.553002 -1.5880954,-84.349316 12.613645,-89.337536 C 26.815387,-94.325757 60.541592,-52.41396 74.963045,-48.101941 C 89.384498,-43.789923 140.58172,-60.30959 149.71438,-48.344376 C 158.84704,-36.379162 129.40853,8.6478227 129.76403,23.695927 C 130.11953,38.74403 161.65148,82.330628 153.09403,94.713757 z" + inkscape:randomized="0" + inkscape:rounded="0.20136392" + inkscape:flatsided="false" + sodipodi:arg2="1.2330172" + sodipodi:arg1="0.60469864" + sodipodi:r2="76.832565" + sodipodi:r1="121.72647" + sodipodi:cy="25.510532" + sodipodi:cx="52.952892" + sodipodi:sides="5" + id="path3574" + style="opacity:1;fill:url(#radialGradient2906);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80892944000000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" + sodipodi:type="star" /> + <path + style="opacity:1;fill:url(#linearGradient2898);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80851269000000059;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" + d="M 64.03125 8 C 56.162818 8.0100117 46.828561 34.554451 40.46875 39.1875 C 34.10894 43.820548 5.9844574 44.576082 3.5625 52.0625 C 1.1405426 59.548917 23.465249 76.613524 25.90625 84.09375 C 28.347251 91.57398 20.40967 118.5394 26.78125 123.15625 C 33.15283 127.7731 56.287818 111.82251 64.15625 111.8125 C 72.024682 111.80249 95.202691 127.69555 101.5625 123.0625 C 107.92231 118.42945 99.890544 91.486414 102.3125 84 C 104.73446 76.513583 127.03475 59.38648 124.59375 51.90625 C 122.15275 44.426021 94.027829 43.741849 87.65625 39.125 C 81.28467 34.508152 71.899685 7.9899879 64.03125 8 z M 64.03125 11.90625 C 64.208046 12.045423 65.56776 12.712264 67.15625 14.65625 C 68.97167 16.877947 71.031426 20.210059 73.0625 23.75 C 75.093573 27.28994 77.113982 31.048819 79.09375 34.3125 C 81.073519 37.576182 82.75512 40.328991 85.40625 42.25 C 88.057376 44.171009 91.18831 44.91637 94.90625 45.78125 C 98.624192 46.646129 102.81606 47.391152 106.8125 48.21875 C 110.80894 49.046347 114.60465 49.966787 117.28125 51 C 119.62327 51.904061 120.71845 53.000764 120.90625 53.125 C 120.82618 53.333062 120.57672 54.794782 119.21875 56.90625 C 117.66679 59.319356 115.1453 62.318181 112.40625 65.34375 C 109.66721 68.369316 106.71091 71.452346 104.21875 74.34375 C 101.72659 77.235155 99.632744 79.697501 98.625 82.8125 C 97.617256 85.927495 97.892393 89.134266 98.21875 92.9375 C 98.545107 96.740738 99.114622 100.97466 99.5625 105.03125 C 100.01038 109.08783 100.31178 112.97888 100.15625 115.84375 C 100.02016 118.35052 99.34151 119.69095 99.28125 119.90625 C 99.057443 119.89786 97.552762 120.17027 95.125 119.53125 C 92.350417 118.80093 88.723899 117.29504 85 115.625 C 81.276103 113.95497 77.426259 112.10169 73.90625 110.625 C 70.386242 109.1483 67.4302 107.93334 64.15625 107.9375 C 60.882303 107.94167 57.891241 109.1706 54.375 110.65625 C 50.858761 112.1419 47.032137 114.00799 43.3125 115.6875 C 39.592862 117.367 35.960216 118.85638 33.1875 119.59375 C 30.761373 120.23895 29.286908 119.99088 29.0625 120 C 29.004012 119.7864 28.29872 118.4439 28.15625 115.9375 C 27.993428 113.07303 28.281199 109.18271 28.71875 105.125 C 29.156299 101.0673 29.714573 96.835302 30.03125 93.03125 C 30.347928 89.227198 30.609418 85.987425 29.59375 82.875 C 28.578082 79.762573 26.468263 77.322553 23.96875 74.4375 C 21.469238 71.552452 18.527988 68.487339 15.78125 65.46875 C 13.034512 62.450158 10.495601 59.471649 8.9375 57.0625 C 7.5741618 54.954496 7.3592053 53.457399 7.28125 53.25 C 7.2962039 53.337785 8.2681026 52.126785 10.84375 51.125 C 13.517705 50.084977 17.34943 49.150265 21.34375 48.3125 C 25.33807 47.474737 29.534272 46.749339 33.25 45.875 C 36.96573 45.000663 40.103767 44.24025 42.75 42.3125 C 45.396234 40.384748 47.059794 37.612458 49.03125 34.34375 C 51.002705 31.075042 53.009191 27.326347 55.03125 23.78125 C 57.053308 20.236153 59.096493 16.88256 60.90625 14.65625 C 62.489787 12.708229 63.857465 12.044552 64.03125 11.90625 z " + id="path2304" /> + <path + style="fill:url(#linearGradient3800);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;filter:url(#filter3838)" + d="M 60.98796,9.471226 C 62.846491,8.2143022 64.889907,8.0204702 67.219338,9.471226 L 64.037358,15.614216 L 60.98796,9.471226 z" + id="path3409" + sodipodi:nodetypes="cccc" /> + <path + style="opacity:1;fill:url(#linearGradient3315);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80892944;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" + d="M 64.039064,11.90625 C 63.865274,12.044552 62.497594,12.708229 60.914064,14.65625 C 59.104304,16.88256 57.061124,20.236153 55.039064,23.78125 C 53.017004,27.326347 51.010514,31.075042 49.039064,34.34375 C 47.067604,37.612458 45.404044,40.384748 42.757814,42.3125 C 40.111574,44.24025 36.973544,45.000663 33.257814,45.875 C 29.542084,46.749339 25.345884,47.474737 21.351564,48.3125 C 17.357244,49.150265 13.525514,50.084977 10.851564,51.125 C 8.2759131,52.126785 7.3040131,53.337785 7.2890631,53.25 C 7.3670131,53.457399 7.5819731,54.954496 8.9453131,57.0625 C 10.503414,59.471649 13.042324,62.450158 15.789064,65.46875 C 18.535804,68.487339 21.477054,71.552452 23.976564,74.4375 C 26.476074,77.322553 28.585894,79.762573 29.601564,82.875 C 29.865144,83.682722 30.019904,84.511238 30.132814,85.34375 C 32.540654,85.431079 34.961934,85.5 37.414064,85.5 C 64.456484,85.5 88.974124,80.107134 106.91406,71.34375 C 108.71383,69.370041 110.60784,67.338911 112.41406,65.34375 C 115.15311,62.318181 117.67459,59.319356 119.22656,56.90625 C 120.58453,54.794782 120.83398,53.333062 120.91406,53.125 C 120.72626,53.000764 119.63107,51.904061 117.28906,51 C 114.61246,49.966787 110.81674,49.046347 106.82031,48.21875 C 102.82387,47.391152 98.631994,46.646129 94.914064,45.78125 C 91.196124,44.91637 88.065184,44.171009 85.414064,42.25 C 82.762934,40.328991 81.081334,37.576182 79.101564,34.3125 C 77.121794,31.048819 75.101384,27.28994 73.070314,23.75 C 71.039234,20.210059 68.979484,16.877947 67.164064,14.65625 C 65.575574,12.712264 64.215854,12.045423 64.039064,11.90625 z" + id="path2910" /> + <g + id="g3339" + transform="translate(-132.29928,0)"> + <path + id="path3317" + d="M 196.34375,11.90625 C 196.16996,12.044552 194.80228,12.708229 193.21875,14.65625 C 191.40899,16.88256 189.36581,20.236153 187.34375,23.78125 C 185.32169,27.326347 183.3152,31.075042 181.34375,34.34375 C 179.37229,37.612458 177.70873,40.384748 175.0625,42.3125 C 172.41626,44.24025 169.27823,45.000663 165.5625,45.875 C 161.84677,46.749339 157.65057,47.474737 153.65625,48.3125 C 149.66193,49.150265 145.8302,50.084977 143.15625,51.125 C 140.5806,52.126785 139.6087,53.337785 139.59375,53.25 C 139.62377,53.329884 139.71528,53.638731 139.84375,54.0625 C 140.2595,53.69998 141.25985,52.862595 143.15625,52.125 C 145.8302,51.084977 149.66193,50.150265 153.65625,49.3125 C 157.65057,48.474737 161.84677,47.749339 165.5625,46.875 C 169.27823,46.000663 172.41626,45.24025 175.0625,43.3125 C 177.70873,41.384748 179.37229,38.612458 181.34375,35.34375 C 183.3152,32.075042 185.32169,28.326347 187.34375,24.78125 C 189.36581,21.236153 191.40899,17.88256 193.21875,15.65625 C 194.80228,13.708229 196.16996,13.044552 196.34375,12.90625 C 196.52054,13.045423 197.88026,13.712264 199.46875,15.65625 C 201.28417,17.877947 203.34392,21.210059 205.375,24.75 C 207.40607,28.28994 209.42648,32.048819 211.40625,35.3125 C 213.38602,38.576182 215.06762,41.328991 217.71875,43.25 C 220.36987,45.171009 223.50081,45.91637 227.21875,46.78125 C 230.93668,47.646129 235.12856,48.391152 239.125,49.21875 C 243.12143,50.046347 246.91715,50.966787 249.59375,52 C 251.51448,52.74144 252.56925,53.579608 253,53.9375 C 253.13371,53.522484 253.18802,53.204851 253.21875,53.125 C 253.03095,53.000764 251.93576,51.904061 249.59375,51 C 246.91715,49.966787 243.12143,49.046347 239.125,48.21875 C 235.12856,47.391152 230.93668,46.646129 227.21875,45.78125 C 223.50081,44.91637 220.36987,44.171009 217.71875,42.25 C 215.06762,40.328991 213.38602,37.576182 211.40625,34.3125 C 209.42648,31.048819 207.40607,27.28994 205.375,23.75 C 203.34392,20.210059 201.28417,16.877947 199.46875,14.65625 C 197.88026,12.712264 196.52054,12.045423 196.34375,11.90625 z" + style="opacity:1;fill:url(#linearGradient3369);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80892944000000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" /> + <path + sodipodi:nodetypes="cscsscsccscsscsc" + id="path3325" + d="M 246.78125,49.937636 C 247.42469,50.142466 248.03845,50.379526 248.59375,50.593886 C 250.93576,51.497946 252.03095,52.594656 252.21875,52.718886 C 252.13867,52.926956 251.88922,54.388676 250.53125,56.500136 C 248.97928,58.913246 246.4578,61.912066 243.71875,64.937636 C 241.91253,66.932796 240.01852,68.963926 238.21875,70.937636 C 220.27881,79.701026 195.76117,85.093886 168.71875,85.093886 C 166.59433,85.093886 164.49568,85.039506 162.40625,84.968886 C 162.4184,85.051736 162.42625,85.135936 162.4375,85.218886 C 164.84534,85.306216 167.26662,85.375136 169.71875,85.375136 C 196.76117,85.375136 221.27881,79.982276 239.21875,71.218886 C 241.01852,69.245176 242.91253,67.214046 244.71875,65.218886 C 247.4578,62.193316 249.97928,59.194496 251.53125,56.781386 C 252.88922,54.669926 253.13867,53.208206 253.21875,53.000136 C 253.03095,52.875906 251.93576,51.779196 249.59375,50.875136 C 248.75868,50.552786 247.80629,50.238636 246.78125,49.937636 z" + style="opacity:1;fill:url(#linearGradient3345);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80892944;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" /> + </g> + <path + style="fill:url(#linearGradient3850);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;opacity:0.77153558000000000;filter:url(#filter3928)" + d="M 25.190679,119.77989 C 26.414679,122.74238 27.241162,124.11897 31.289475,123.31542 L 30.638356,120.21008 L 29.079766,120.3986 L 28.261711,118.57341 L 25.190679,119.77989 z" + id="path3842" + sodipodi:nodetypes="cccccc" /> + <path + style="opacity:1;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:url(#linearGradient3385);stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1;filter:url(#filter3391)" + d="M 64.125001,11.90625 C 63.951211,12.044552 62.583531,12.708229 61.000001,14.65625 C 59.190241,16.88256 57.147061,20.236153 55.125001,23.78125 C 53.102941,27.326347 51.096451,31.075042 49.125001,34.34375 C 47.153541,37.612458 45.489981,40.384748 42.843751,42.3125 C 40.197511,44.24025 37.059481,45.000663 33.343751,45.875 C 29.628021,46.749339 25.431821,47.474737 21.437501,48.3125 C 17.443181,49.150265 13.611451,50.084977 10.937501,51.125 C 8.3618506,52.126785 7.3899506,53.337785 7.3750006,53.25 C 7.4529506,53.457399 7.6679106,54.954496 9.0312506,57.0625 C 10.589351,59.471649 13.128261,62.450158 15.875001,65.46875 C 18.621741,68.487339 21.562991,71.552452 24.062501,74.4375 C 26.562011,77.322553 28.671831,79.762573 29.687501,82.875 C 30.703171,85.987425 30.441681,89.227198 30.125001,93.03125 C 29.808321,96.835302 29.250051,101.0673 28.812501,105.125 C 28.374951,109.18271 28.087181,113.07303 28.250001,115.9375 C 28.392471,118.4439 29.097761,119.7864 29.156251,120 C 29.380661,119.99088 30.855121,120.23895 33.281251,119.59375 C 36.053961,118.85638 39.686611,117.367 43.406251,115.6875 C 47.125881,114.00799 50.952511,112.1419 54.468751,110.65625 C 57.984991,109.1706 60.976051,107.94167 64.250001,107.9375 C 67.523951,107.93334 70.479991,109.1483 74.000001,110.625 C 77.520011,112.10169 81.369851,113.95497 85.093751,115.625 C 88.817651,117.29504 92.444151,118.80093 95.218751,119.53125 C 97.646511,120.17027 99.151181,119.89786 99.375001,119.90625 C 99.435261,119.69095 100.1139,118.35052 100.25,115.84375 C 100.40553,112.97888 100.10412,109.08783 99.656251,105.03125 C 99.208371,100.97466 98.638841,96.740738 98.312501,92.9375 C 97.986141,89.134266 97.710991,85.927495 98.718751,82.8125 C 99.726491,79.697501 101.82033,77.235155 104.3125,74.34375 C 106.80466,71.452346 109.76095,68.369316 112.5,65.34375 C 115.23905,62.318181 117.76053,59.319356 119.3125,56.90625 C 120.67047,54.794782 120.91992,53.333062 121,53.125 C 120.8122,53.000764 119.71701,51.904061 117.375,51 C 114.6984,49.966787 110.90268,49.046347 106.90625,48.21875 C 102.90981,47.391152 98.717931,46.646129 95.000001,45.78125 C 91.282061,44.91637 88.151121,44.171009 85.500001,42.25 C 82.848871,40.328991 81.167271,37.576182 79.187501,34.3125 C 77.207731,31.048819 75.187321,27.28994 73.156251,23.75 C 71.125171,20.210059 69.065421,16.877947 67.250001,14.65625 C 65.661511,12.712264 64.301791,12.045423 64.125001,11.90625 z" + id="path3375" + sodipodi:nodetypes="cssssssssssssssscssssssscssssssscsssssssc" /> + <path + sodipodi:nodetypes="cssssssssssssssscssssssscssssssscsssssssc" + id="path3395" + d="M 64.125001,11.90625 C 63.951211,12.044552 62.583531,12.708229 61.000001,14.65625 C 59.190241,16.88256 57.147061,20.236153 55.125001,23.78125 C 53.102941,27.326347 51.096451,31.075042 49.125001,34.34375 C 47.153541,37.612458 45.489981,40.384748 42.843751,42.3125 C 40.197511,44.24025 37.059481,45.000663 33.343751,45.875 C 29.628021,46.749339 25.431821,47.474737 21.437501,48.3125 C 17.443181,49.150265 13.611451,50.084977 10.937501,51.125 C 8.3618506,52.126785 7.3899506,53.337785 7.3750006,53.25 C 7.4529506,53.457399 7.6679106,54.954496 9.0312506,57.0625 C 10.589351,59.471649 13.128261,62.450158 15.875001,65.46875 C 18.621741,68.487339 21.562991,71.552452 24.062501,74.4375 C 26.562011,77.322553 28.671831,79.762573 29.687501,82.875 C 30.703171,85.987425 30.441681,89.227198 30.125001,93.03125 C 29.808321,96.835302 29.250051,101.0673 28.812501,105.125 C 28.374951,109.18271 28.087181,113.07303 28.250001,115.9375 C 28.392471,118.4439 29.097761,119.7864 29.156251,120 C 29.380661,119.99088 30.855121,120.23895 33.281251,119.59375 C 36.053961,118.85638 39.686611,117.367 43.406251,115.6875 C 47.125881,114.00799 50.952511,112.1419 54.468751,110.65625 C 57.984991,109.1706 60.976051,107.94167 64.250001,107.9375 C 67.523951,107.93334 70.479991,109.1483 74.000001,110.625 C 77.520011,112.10169 81.369851,113.95497 85.093751,115.625 C 88.817651,117.29504 92.444151,118.80093 95.218751,119.53125 C 97.646511,120.17027 99.151181,119.89786 99.375001,119.90625 C 99.435261,119.69095 100.1139,118.35052 100.25,115.84375 C 100.40553,112.97888 100.10412,109.08783 99.656251,105.03125 C 99.208371,100.97466 98.638841,96.740738 98.312501,92.9375 C 97.986141,89.134266 97.710991,85.927495 98.718751,82.8125 C 99.726491,79.697501 101.82033,77.235155 104.3125,74.34375 C 106.80466,71.452346 109.76095,68.369316 112.5,65.34375 C 115.23905,62.318181 117.76053,59.319356 119.3125,56.90625 C 120.67047,54.794782 120.91992,53.333062 121,53.125 C 120.8122,53.000764 119.71701,51.904061 117.375,51 C 114.6984,49.966787 110.90268,49.046347 106.90625,48.21875 C 102.90981,47.391152 98.717931,46.646129 95.000001,45.78125 C 91.282061,44.91637 88.151121,44.171009 85.500001,42.25 C 82.848871,40.328991 81.167271,37.576182 79.187501,34.3125 C 77.207731,31.048819 75.187321,27.28994 73.156251,23.75 C 71.125171,20.210059 69.065421,16.877947 67.250001,14.65625 C 65.661511,12.712264 64.301791,12.045423 64.125001,11.90625 z" + style="opacity:1;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:url(#linearGradient3385);stroke-width:0.6;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1;filter:url(#filter3401)" /> + <path + sodipodi:nodetypes="cccccc" + id="path3932" + d="M 25.190679,119.77989 C 26.414679,122.74238 27.241162,124.11897 31.289475,123.31542 L 30.638356,120.21008 L 29.079766,120.3986 L 28.261711,118.57341 L 25.190679,119.77989 z" + style="opacity:0.7715356;fill:url(#linearGradient3934);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;filter:url(#filter3928)" + transform="matrix(-1,0,0,1,128.10515,0)" /> + <path + style="opacity:1;fill:url(#radialGradient3956);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80851269000000059;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" + d="M 26.03125 94.25 C 24.983755 105.1142 22.21942 119.85075 26.78125 123.15625 C 33.15283 127.7731 56.287818 111.82251 64.15625 111.8125 C 72.024682 111.80249 95.202691 127.69555 101.5625 123.0625 C 106.10279 119.75495 103.30815 105.10184 102.21875 94.25 L 98.34375 94.25 C 98.677864 97.707156 99.164649 101.42777 99.5625 105.03125 C 100.01038 109.08783 100.31178 112.97888 100.15625 115.84375 C 100.02016 118.35052 99.34151 119.69095 99.28125 119.90625 C 99.057443 119.89786 97.552762 120.17027 95.125 119.53125 C 92.350417 118.80093 88.723899 117.29504 85 115.625 C 81.276103 113.95497 77.426259 112.10169 73.90625 110.625 C 70.386242 109.1483 67.4302 107.93334 64.15625 107.9375 C 60.882303 107.94167 57.891241 109.1706 54.375 110.65625 C 50.858761 112.1419 47.032137 114.00799 43.3125 115.6875 C 39.592862 117.367 35.960216 118.85638 33.1875 119.59375 C 30.761373 120.23895 29.286908 119.99088 29.0625 120 C 29.004012 119.7864 28.29872 118.4439 28.15625 115.9375 C 27.993428 113.07303 28.281199 109.18271 28.71875 105.125 C 29.110886 101.48845 29.580993 97.733027 29.90625 94.25 L 26.03125 94.25 z " + id="path3936" /> + </g> +</svg> diff --git a/resources/images/rating.png b/resources/images/rating.png new file mode 100644 index 0000000000000000000000000000000000000000..81eba505b98a64f8447aa7e7e88975d612518c3a GIT binary patch literal 10827 zcmV-RDzw#!P)<h;3K|Lk000e1NJLTq004jh004jp1^@s6!#-il00006VoOIv0RI60 z0RN!9r;`8xAOJ~3K~#90?VWj?9LIIvzpuJ`uARLP9K=O{0EmkK36LTsTZSlEx8;vy zlTmETu@yU#lTQ-a@)1jxv$`D1j%i1VqsWpXUs7y9j-{BAY&kM%*&=C*ka&S4D3Am| zfH)Sri#_M+u9rV%XQrpO=U8BeBw)Vz^ryS4tE#Ks`@N&8d#d3jdWl}5m*^#WiC&_Y z=q37RNAf~9;Q6GxMsaui0I772bl%_wLg3`d4fT+dSK+_V?Rz2Up9KI^5e||%)6ELc zxErm?dc<VG&KcDcf$u+kW`eWVy@PoX^o4Hk^GR1c0928qU!%L%WuvHblZty2a1BZ~ zFo)_>D*Ej}xW6!0I=bN@rezfSLbv<*BIku}KqpBRnfMyrDZ{mj`-6z{MpSO0s9QW) zzb3W%w)Jjy&mJ=~`x>Svdn6+#f@|zl`f7@I-~G?S5?rwsKtaY1&?UWWRdrvdIIl-^ zySbo!?wZ|B@~U@`7=9CK3lzt`LgDlm37&oMNYGRKDW}BYp&w>exbi_@Ug!pNijIAb zWG=<}z<RH=IImH3yP2e4`yI#HnML_kbLfEC>dYqok2P#Od4SXl(Cp`(u1Emr0URA+ zh3hyssmiTt+2KrRpY1nB75v;OY>=zuMp1&?NmRl1npI}TJ76=cdf-!dFK9=e2fE?_ zpo+}6^kra|VD_tWotd@+LBHdu1+}1oPcU~BNq6CN4WjuoSU*SQdeG-=Nlqj_zGj$y z9li2+6s9Y#bEoelJ#V;1)%`v!dnKnMnif}8scc?mC)sTuAdtVum%GSpc`u3HmshVV ziuTK!WX}AnvF&7D=stClt|$QLD6a3bDiH5RwQ{qytPxAnx`HaGf|FcN-z{Ijm#<;< zS*$*X&zI@F@lzzac2x7J0ZqFfEta_EN_GWT3;-15<aT--akg9KmcYsuZ-%VRnDW$) z0=V?Q>`(B`-Kb81wO}o(v-o_S-hH0}5>)crlz5)?4y;~Huj-XKV!C1gaP+Gre2;a3 z%1r{hOkM~4ZsSxnTA_dE?_rb2uy(SXW<jl%gFl@mNIXgZuHPeE#_u(o<-Ga1dk<cj zqk<~}06L0EmKa2x-Kg1TEo%f4Dm6i-`+fj-^|e@gtg7MB&DxW=tG19HdSeX)fM(ne zZ1-4w;J}qPadt%j;IZvwe4lGX%}pw7b!X@aQbwv#zj}0E{~`R~KI+svX7hu4>Dm2j zi0Q433X<aGCfq-`zDU;#-Ph%&%N_tairZ~j;iz{#u!oY41R1F_{e#;-i1nTT4H{~H zZL|315Cgk^JK_qQS0#$hE3dSG@Nx%$T-Z&vWSnb(eW+aR<w&bri`NpFO?YefVT02x zwZF2d9mgGBMY`|iYCY;8o{>LTIZTgUK~w0LH2@ADz#A{HMnrC~%1)Eh0l%v}+gA<z zc7K$>K7c4o+JfLg`u6;K9X3im&MN1;`FBS@PU3~``*P7`3jkHb&(nuGJ5aM%l{H2s zVWH}~yI1`Prq=~qY{T?Nw*XC@qkq%Cuhyp)N*VY4nJgn$$_(LU+lhOBmsBdjdPMfB za;-Z{Unwn8)mU-p-u7<@{KFR^{PJe~W285}1>^Qqtw4~Jge^LsIJ=Sb3)1mrrppol zI*RG)X3$k<7jV6(uCSiCVRJpS<-J(%naVKWqBkp#Fm&CA>I1?a=PlDdJMS9BeW7`A zsdQNa;IUyci6omW?6WG@xU+NziO_VfH$ZB`4`BV%mm>S}1W_=ByW(n+-P>zvj!3%X zeP-RN%YRsS*#Y3LQQT}FBO+#x4P>|LGwi3t2|w%^+Vuef`{+fR{^%Bz9%g9w`v~8o z&@DR?)_L>gJ}kWK0I+U5T_wvF5!oxS!O3YlaB3Qx%x=Y7c{OV1mkj`@k8}$?tKU+y z4g!HAzqc}Vc@GOOD*!z3DZIe4I>6i@$PTB(Krv%XI7%Ph{VN2ehcC_U$816A2z}e% ziCU+6uVR-O3TDhNUCzV8%LV`n(ha?o*p8aLYFTSalGKHZD0C0K0@IU*pafQzeN*=| z!&~1~?;cXld*m`67G5@uzV{ANMVAeMb@!>tHaACKF(Xoyg$jdvejZzVV#$*}?QU8+ zO?Kn!Fm7)Z1ErKC-FfGKJa(DBIC5D4Ko8*b^)O<@xj~hESY2g}cwy5&viWCF?;N#9 zc%9j-eVmnh-d{I4RLqz+F~#+lV?g+#2Y`Z{I8ToX+l`o8RAsB1quUP+`;6N|*OoV- zrS|9hFLu*lj^v6Bq<Xg2CWjo6bkY3-FHQgGW#$TA^o_mu4pMUtS1ax<R=M8w87`&j zU-Vdc-7jMOBb}CgZ6c`n7%TSta$Q<JW3KI*H1EA_`0{cEFKPhXHHzEivC_xfD9X(? z&~<J}lPXkAa;j?ssa4ma!OZg`eO0hNrrTlQs@K)r0}v#gU!cJD!v`*(QNfEA00$1> zjI3vfq;rEY&MU35Emfc=pE44b#p;_z35t(Bf5KnbY~gzh?0y%<q-vHSA#Tcbz97?= z&wmPDi~x|sqa?PBv(hg(HwyfqRrZ;p440Dirhj<Dk04zE*izHLEv%&%MGfk+`ZTMq zkGcY9Q)13}9|hLka|daCkxvr6Xc{*<iuv&Yys0AHi6pCVo$J)FUyzrJWrJ%a<0op( zO{Rv}@P<zl<iFj#;^ppP1V~PL4`=@2x0o7xpq6G;rKrDCQ2(GG@GKdo_V!bd+X<Gr zHJ3~;7yuOH(5G<o7f5yYkQODYmSGh(7{_e4$n{3pE>>10eA0!ascY*sul^sH)Ff)> z>MMTUd*oo;T^#+&PZ9+AYO3q2#kU9ZCiuE*c#43tK1_?x{Dmp<`=R(^_#683#;(y( z%x&N@Ok!3^ijYaU^p%RNcEq_#MOUfFRidmHN7k$A%5*_`15ez*t8eDoz3(Bjb{95( zw7niIXD?%1iMWH<LN7<Z{<C%I#zL|XoXsWpYm1yvv1i1$rvuCB1dPpDW|K-`NGTkh zru3>&g7`ahjyeK>igtU`B<38F2_cC?MwC8v8AO~R2OUAoDz&;+kgE`eRkhbu=}!2X zu|}Lya&hxkUh@|E_xu#W+ygOp?o^MO(1N&3Y7_ah$2oTIhZehuK=F#2^Br@%AofY% zl&GFU<ZPh9c~Qn4!-O9&Qz(*8_E0F!QM_dh#ZJ48WntG{qqxZ)ytI%k2+5?9E;{rk z9R>nrSTKVKL#P=79a51Yfg#X-38Xg>Xx4~$zJmmA<0h@$^V9TfyN%M+w_w3~%2Il9 zvAu0mC!Nf0N*A8v>AQcKpcKF4kg<^PZJ<801!JD^J$u{;=hezMV$WME<7hDM8qTXS z={wB00r?^nauXEC4pX}Q;PQcD$t+QjgQIvmhDqddr2GUaphsZPH4I^$AvHP#W(d_G zgdsr&fj(5Tj+HJqAmf0m1}V8Bw#8T^k?v*v&A&)u#V-8mZzI^&Eq^{NK@2cVdMoN1 zPJZjd%$)yTQ}77j`Ksb$1<eP-WXX{ef#_M3aYW8z?YM}X_f2r#fw7>#WXWgNc;pjZ z<Y!J(eAVd1FL2csoAdzAp-GZKiL{-eTNCtq4nxK`BUW@otr;=suo{LJgx#-bw--p4 z7igDb@vOo4p5Qy}OU6s2Sh?*-=-crVXz?t;{E_lgnTzg8D0_?ix>Qdb%5M@YaWcEe zo%#l+ANphFrcR=5hI^vm`HCB8pq7F!OcY!>u8y7&<(#UV13M;a&soDc7se1S*g59T zog)8+Pf%RS$|eojFgl8PZ4b$Wr7I8yT#pqN)?m#V#H><u1tKFTLxT1SlJTr|dzPdM zCNLtUCgwc0F$Ror7+iBR{Tp9Ra`h`we+oNyxcq`g+pS-?+#xH_5*!MMgOk~cNv*@b z@HjKi{sZI3{+{{iQ)MtL0bruw1**m;5OmHr=DC7np8?LIa$2#c0_;igIc0=%QzfSR z`<dVOF8o$^TEwCQx8ZEPiB!Qb;J{VBuu(7@ab&$w)+(|Jw9i$^x`Ad5;#z|#x`HlM z3mp)I-jQwet-X=V@E+WOouDOb{tPyM47AWjtIKIkZ@(O1XuwKsbX@@qCb=3(uEHb+ zv4shW6OS@~=0PT&JH-5j6V<1ROCiUC=UYoa-U?@PE)S{7aj?&b*rOWo%xsafeS=Kz zdl#if^;FrSAa{?FB+XEl!$!pH5RvUx*(@S!5FJSP((9_G0!K_~i2@e`VuYTdP4ukX zNoHs}iGi&+={2a9(BdRo9LE;Vf-Ni-SOcLq?^TDXk#03f7q<uTMi6fRljuP;AecEu zVd^Nku_w52=27M*k1rV%js>q|eN`q3hWpg$A;mu8SRT*zb873e%*D++2!JZ`t=}RW zC>v6)a|6Q5Kz1Rz&Q<A8m9(qqmWAF9i79J$cmH~NR_vr}=o-Af&A8oHBgVlNXVB6# zTADyhlMn{zQU(8f2ZQaH|D~Ku3gZnRUO(dYB8e=vIE_Dbl)~hb%$|FK$uo~o%ukp9 z+SmptL?KzSzEOQ_&gBb;y~nXUzIT?1C<p}LflrYs&aze2-0C2&QelT1$VggA<vcM> zHC@0skWBY5aMkr>S8gXcuoWkJH5h}I=Fs8{TAC^gJF4C79JhPDS$a52o1kbb5Ny{9 zG&_$Ri_^6Lh<+rI1+NR6JC8qogxvYZnLd4($#X~0z+zj(|0!Rs8|XKuCHPP-!FTun zJX7HyQS>&P`)**Zy5@BtZxgsa6EINnj1x2%o{0mgOb;U)UryJ^c9H|zk@QL^&0z~O zXlZgm*u|K_mvG$9d84#WP@M2v2|k+E>R15;ak~+307>*Ai7pVqpM8qL#1STs-NX3l zN6XK|8d!*Q!A_R^;EgGt@9Y?*Dj2!v4$_k620Z7-RC#kYV9mT|Oq7Yng`VN{jBI*2 znUy!-WH+P5IkYf^7RS-jRD<4D?i6!e3EbKh_;L!rRlatk^reEJ#Wih|f065Ok;DL! z=*Of6Q1TSUA7JX)LyR4JkmBsjA`6!Gb;`~88+VK{QAu;nG@i}S4~!(N^v`=@!i_L4 ztlG4f;hk^8?b(dxrpxB&%s;jtrBq(vs4#uuSTuBvm}6qfMX!a=qxqXrT-k7XJG4}P z-{?G)F71LqOKAQqnm>!325~YZQkz+^`({RV`{X7b<;4Ae%<TE+s^#V#>Fsl!J0SR} zN;NKkS`-1#YN{A=_}PIzuKB)q<Mr+!m_CXB+<h^<XhC7&M+B{nfI)nELyBHsq%AGV zE`^pF`8T@Wj-cDI1eIie4xK%Uojr=UU8K7<bM2cy$;9{mf~W8KV{~EixL|lqxSX+5 zlmg2IYjvX9poN=kyzy-~P9Ofc@1Xg!EcuU?MD}rMk!w;(3tx{r4xdM_cWP^NkGS+U z+Ad4v&7!mho0et?E<A$HO*6FXXK<X*?UB00uFT1k6gA+SVvk{&um1ndD&tRn1DiX( zQ1KG^MU%AXUdu`<(Q?g><EYWKW@Xx?rPQG@$I-Sg9E;XZ#x&eFN^7z?z1x|6=Fjkp zwf{6as>i}jHMbum@B+rw=SacwL>NrxPMtu5^DPm#Br6cNN1OhdYGj-xXrz&iuC=Pa z-TLCrLuINzjp<`Myjp@KFzGc&%H{aC{}db4E{WAYuLA(UNtg|kBY6|tnG7_jzTY-) z89#mqk;3u|z0Ho+xr7FcU9$Oa1%#ynrOi!f)9gq4)u!pM)hW2E-@x<}|BadRQ`O`| zpoN`*{hdVsxP6q8SWfu?hm5a}hX3{Q?2}JZm^};x?KhxR$C`C8%&blerCHlArmjYP zi}X*Lm2YNo+60BW@^u9B_wwX7KSxlk$+D}cpNh@|Ru?|J8vLs_P%Igp=u&xYQN#tX z0nE;vVQ|$j&__!W;>Y=o#_XZI)#PaYcIE<}&j}&D$ZH|0j&xep5joaynzE=4r*|99 z;98E{`A(+K)}ElItxjD(YWGKPRh{b_y&FF{#aZ9-Kw$OH!jpir;{{F~`A5W=Y_0WS zq{pE&sx2{TRX?eVeP&B*(kiX8wfZM$yPDp@DlGE*p?)>vK_h-ilf2?~<E`D#+53Kp ziD#>GJ%~ca=U+#K50P&Meda!b<NL`K&~w><8;c^tm3z$1<mm1$lFD8Mw)Kjsbu)b7 zecW-QyrI2>ANMxW`gWfWv||~TVm-oI7%A7t_$?~i%o;@UNK6_S5*y!6?zzwL_+5WN zP^^{hFX@+deK`2fan;w&_6z8KnARePr;PozOrY}>AO<Bk`tXw!<{knmwbcGN4Q}YW zT4<59tKre}M%vtt7B|Wht*@PcG_DVg`V+^?s0CTXOGvE0onY=R4*&JXDdn_URhQMV zaVNMfPWTY>)vyoVjlN<V#eo#FD*8-9<@LUZ0SHRUh4C{CUX{d=A+QzotVdBpEsmzO zum)AFkJI>etiV#A3pCQgi=mjh7FmO4S|2Ib@VbJ#^3{+$!y|wB^URFZ9uFq17J3rF zwKx4b(~X)LYs>FEgnj!S@+Dz1<%qG!O_k8GR8Z!o&N8&78==3}m=?jAG!l3dz%<*} z-R@#sLU?PGzEoEc5=_%a=uyGP0U>JUqb7f0@AU7*9m;a#D?i8i6SK8+L3@4PzH3zP zZ`S;V&iwQD6TE!~IX|JtyI{aKvZHeE{9HgGf0n^DeGv543N_K>hJtP;>;{@IF==u6 zno_Il&8$Hiv4%ti)~fzEVMnWu<S||A@z(9-*x&tIPCk07`fN^sVQb%W-AC*nHts~3 znnrV9IE4S$E0_yJkM@Y}C}7rBuFX%E5NDF!6+;koHP`BPKoM6ilGZd3Y-bp_RLyRb zH!7e8^$9F~`J^4~XTiNU$uFs464$(mv)}%YJoQhH)V&xosQO2{KVpA1zUIYkY)T69 zrT5U=m1cLJn9p7iv#DCxz{cIftlsnrv@lZZY@8Vloi~jIqU9?|jQ|xb*Q{sl#spDn zWTM+SUO3kV=?%@hE}ZJ(JmL-DZT?ZtKlBS6{@ZWjhu)j*NB{B0j|Si0R=;TD@8JUa zP#@EUB8SHV`|q;WUZ_O)Hc*~^_#Ed?-H%C5#7D%Ds5W*S+aeNQG;<a%E0KMp$i30^ zcE<m$<_6jrX;hc9TEa*`;`rxg@izY`Q%}5~!{7LOO8MG|t6S~KSuc21yTXSyza4z{ z-RN7_P>?j!sD3x&$XiN?2cQ-v&dks~G)KC31pLtA7%?V(Q7{;nx6OFrqGN$(=Dk@E z*r?u!h0wUR#95I>b;P+11)MaA%|F51iQnYGJO7H}Y%NQd=+x?j-Fp2fF@K<FOl^My z=ip=L+t!gUWtnu1KAJM}<`UvS`Quv?XQ%03l_QxN0qZr710rcHM+l8#2w2SMb_2n7 z+(9d$HR#`>fUVK}Vgf#GkdC+{HvI(oiBI#LFMXc;R4qeVG}n`{*KYfD#v1me0WIkx zNCzK7x2MPtk8mE<r;?7mzGTIq<%y#4GZz?KTOi@BsLd8OiwB4a2HUU%4TazAINFwW zXeoIk_gplG(I`%cT7lSng17E=O2s=l^u^z2?wnThCWYck?CxD3;@A>f3S7kJ<L^F( z-G3eVjNwee(KANOYy5?8>iGqjI6KMECdD&@pfT0Q1%r(=K62b}VyKlHXms9eYgM*Q zYMC`6^t${efzy8r?uuI}PJWh$?)+a&ovi&a*cI%E6TEuoZ}6S=TU1XM^%=A~4`JWB zj$A3pb4jDKM$GpI3m_=vl?&&_7~TX9L!j|-<|4+#nfgXre=!!Ik-v|lrTFVsqz!Hp zts``95-V_4ydEdJ9skKMF#Em##F2*|X8f5t;p^%YKYHCq^lz5XnniTc-@!Zh7{Tke zlk*HGvaWbOvad4KDb6X=lM{?w<6we8s_Gur{zjVL%H%f#LbR@EzDiQan#68W+v*Fp zxObQ$ro0*#G>i!Zlih~1;)f9HQ2g%4C>{Duip~=pn-I=^Ppf4GSdr5A?D??$qXu_x zn=a`?h+lq?VE-QG6QbYCCRkClvc1y3{IsQ5xWLfW3B(^LCpOZ;D6`v)3t9$(jm-X1 z=pxL2-6){Zab*PQ^x*W}j5F~4Xl|U+q4(oI{%M#wg$@X(3WigUQa{U=(f_*d!}cRf z>`5#wl^wBl{LL?tDdyPP*DW8LDw)@WCq35h$*}o`eK5bFHtP_hcI{3gRn8a10!wiR z&C)6qXXfJqw77jmStDsR?N4H|dobxu=+tBQ&-^*TnS0A){>op+v%+)6;pqKZezdzl zE4%bhZ~U-*N0XK>iIxKZD9Go3obK*^b`56be@+z4&Z@aGu;bPJtX_8unjef8K8w?u z0ifZq5iSl0Ejkx_-Utk#<GSfw6(qF*)3pnN62a-O5gb1V#ksQ37v8wXf+U3VJq}OY z1BKbzjfS$^^YV`buUKLan$ofW0ALi%bw5eJ?dD}8S@YS+ysWHTa}sdl+peNFvkxt0 z+td6?dc<n96%59?6JoBlLs3C5!3tnfNNO{ZSO@+bcH(~QsV}4Rr)!HuRQ3z&gQ>vm zu*;+W1ao7xoAr0;(O3S4-Mqxsx1?nQ0F)P(V33t*!>iL>=F_tUsZI-a4GK5EZ3}K` zPkD}@BG4voAqs+~6GqW}6H5>if7M814;up^s^y8bnA8?j5|DeA;M`ZR=kJ4{P!o1U z<HG<b4H}+!+HmIa+6}uBnj7@&`Wybs(kFUa(XxIG``}~fZC6tu%dDtQdyaXfZ_Dg0 z&4VAzvtm;gE%vn5{)pyETzVrn5hMI?)W0Zym?EyJy2a%K#O=i-H)GPfFp0I`Ph(Gi zp5XX@$Bz9i%pXT8i+Y9I6*D}PZsByv;n=rp5x?hRR}9!!-S8of&;eTZ&!HcDjNnbz zk{5^RtjC&?$~AVOGc)IXhBlQ*rB;A-7Ynt)CJ-M$()>79cx~RZnm1fj9E`UbNo_-t z8xdy!DNbO=zlMG8GuW{&p_AW43(qYo-_q17%=fz-`6nn;=J*9x4(jN>59pt^Dt@uF ztS+EJ3i7{yh7r%@HN(B~I}@|AqH=9yv&X(S+=$MuD<>H0{d-jSVK>l9^Fuvq>+x|@ z62$2PGmJP{)Rw^dkbfGTxev`9L9MO#<Dug^^KL`w(X@qk_9Y`6n|FBPuG(w+!+m=A z`#-8X+iC5^Qb)dgDWG3Xb0HV-<k+<SWi}=6nlF^ue*T#fx%>%IvJ$lQ?VC=@#RPb5 zJtT@&WHN{u0H?ov9^@dHMGMCu_YG{}X{uwfkpCB1I5gCRYRY?o+G}B^R;H25%BKxy z9;uBSTn9~y{%}mImz6s9>C-oSg5saOn=!|7+;eq8^D+qF7vcQzIo58PMGL)k%xs$) z`0dy#1dt?Rvfy-sBtd;d3#iSbTELXX(9(n0!c&MpRV`Zg5=Z49ww9_7M?q`|qzl5Y zg22=!aH?q)(1fj=fxIJ3J!h-;9O%{i_kDz?mcW&ZqK*MTKtGo!pYgabIjzqaBZHM# z;Owzk)^D4|=6h-;yIEz%DRW6c%)+B);~<g%lLRpfj4grspvAI~@lk70y#S@hQU4V9 z=Ru2gFKt$2R2OUL0w{H%GZiVDNFhUcQHO}28;(pQWF0E!Nnvh=x;J|=N&QqamMj-_ z^y9}W1U4XVmAURD%rC5_J~QT1$~}uSRao%t3T|rQxNOJ82x?r=c`yQ6LZnn?oSg#^ zlpGpNBjkZXjma@_MO5p6=BSH|=__EUzMxYPbfv1WwukN!r|}g~)c^ofCu-%5Kv#k< zE`ik-O&tY5%0tD+VzHi4RV^F(QXc%=Nf<v~)}V^e9I65ovyfKVB$Ot#bcqPORZs~r z`zTce6yAp(#MGJmid!HAg0Pi{(fDYZx<WCpRV(9o*lT}<nRa-5snk&b<V#4ZqSGPH z1qDT`Ge9g7PQ%eaM97Qsg@Mvc`0W6ovX6p5q3-p*kk5w(8XrB6SOF{4{>lY7=K1@O zL`MPO`iL4_3~u%PM(P>V&Rjq&Ai|cQ9pQ)O5f;r0gh^{@8q)kYkqDGucZm4>s&<E> zf04yjpzcKwQ{#^fnukQoNgW3O9<Ey4%JM=0Vr1bh*$_pA9ZIV^ZY12O@MD0`@P*XM zc}Vz+{C-84;VnGeiwpjut|X>CDrKK9PK`f7@cezutD^u291IrE7~Ftb2aQW@iMX&( z!W3pf8x|oB5V5X;m_3p6h~O5z(H$Myg?)ci<3qryvjU-hi2*lJ<B#vh*@(_iN0DX= z7*&!A$smyGYwm6$#tcWdh4hBPj+ATY65_zIlr0W;joeJDc7`c@?klP8+NW^+01p3K zy4qs$B~oVr;NYR8)snK6&mk$eDJcj1jkF&cGIUA7V4T3)1ww;7i-6Vy9PP$88bB&5 zb?#-Nm(>@;^QBV905A&11|(53sw4`bB?S^)iW3g1;usdCXnKqlSR{}*O?9XWQWh*n z-Bz(CF%}?J*x@|wwkz5fAJW~m|MaVZak#DxHeWJz3;?V4;;1EKl`erqu@w3XQ=g!v zChn~nvzjrqQCe7o;{+TxzYv~dh_N0`?;GW6BP+K+T3T=cgcjBib~P`xJk&7&%q4Ke z(1nsoBuQ1)x>pNbedXC&pe1vv)XJnn{De?wgwRy*&4AD<CTIfGmQN)Y(nP^a)p%rm zwzSEaOQMbeV9gpF$CNEV(j^f#Z|Pp3823g5jkP^WVF85lHk~twT7{^FH}LQ6`tzpw zTJ%W*C7G=Sg2j@oZ@}7RrOsJ^60Ry4l#Cd>C;&A2DS<d9Mb8@nB5V;=0672v3G_)s zK~#2|2`DOPhs6MhOA7-j;`3Ynzag%FF=T8#07@lRHZQR})G+`U!LgPuAmbQJ6#!87 zci|YIT{jTUAH(c8-yb`4Gn7AA6><%+L10|jW}1Bo<XA{{6S4sGUs{drQ70b<J17~2 zF4giMNC6V*1zuT}_l2}}01*X6SaYFn0!Ro=^HN{uqAA^GTHhu}JW*0TraJXg$kSD8 zP3Nd%7N8X1DrBsZu|aLEgJicLL8MPh>0&D;SEzyCuZ!$sEJJj}(9Wb!bW$riztQyB zL#4UFLU6pmr(6#2K!@jPpo2Ha`FQHng(Y1q)dRq(tY>ox*p6j#i=eBmS;1D;f`~xF z_qh}*DcQjq0DK>Y-^;T94<Je%0l;Xv-lT;LN}^Dx4*(q-lDtOt7eg`3Zy1d?8Y(s# z8n!a?OQi<=)FN~Z5S<L#dx>{zNu7xS_G64oN`<r<lKDcd-`#@(g$9qimjet9Xi>CY z8Q(*_P<7$KPAG5H@7GmG+hAz8;#c^d1trxZ_4xxj`bIEx$N~(U#1TtME$Ng?(uPpo z-7jc~rp&0@H@L1B8#gp_0~J?cs0T~5Y&+CuKkD`YB*H^M%ccgUmyJ3CfQeq5Qb1ai zl;`1vzdw)}(CX)a7cG(%rKK{l6*Y?o!Y(14H+md*|7h~Xv|E89B)e+7@_kmd>e8iB zM*uL~jS)l2TGFn;3kx~jEl|){=4c!k;y@B}eMuGpnp%LS!fqtoW<qZl=)TrH6)Y8H zxQ(7%5_JRsu8Si|3MFL>PW1tYYI(AwG7H#<G0lY9jG3XdMGG^A_F@d>SIxs)CFE~# z&+tBSttD`yWkc7(*1}jwW^23(d=0I`+ZpOe5IE=HB$bp3DWN<I2!KGMOVC1Prez^L zyl<4Yz^v$-WKAtZCCv!d7>++F96t(AJYYF@$}%@y{vt}I8~O%?jXMqNw+WkeLU*5q zz{b|ONZ>JTZAakIYbH>V3onKM6#dS>6w(m@*aAkABrqg21b{?_a%V%YPFIE64nrg9 zb%2PLu^>sq*>i^P9fm{y&vN>hWo}vtf(6+wl$oA@u@lNu4?-#hJp;n_TMWB@z~$<# ziX)ZpCAMf2EQ)Jlmxygp*0GT4t(7ShsQboWC#WL;&;$m>Q;S!b1q4WDL;k<PRu$^P zP)rw$MVx@ERwP_jAn+WXeO7q*9?KK=1WcY$N&)z`yr`vbAyBAHM@Rr_z%fwF!{iy| z{{I{B$Twi!4#Ugd?6P62l1*ByZx;*LHtrAM>nv_>INbzFrq@&fkk5DaolWQv0K5RP zf`fyjq0W0rsvjR)lGeuoByujUQ1BeioD;tLb<5Fj2h2=pRp^zWRYmZsWr2%>S|15) z*(g~D%tD^RJREz#^4!Cg)mw#qKjg4sn?p|~K<8{*ZEbeG2p|C@QZ-&Iyd2t5>MX*v z-qcoyUTq3BmQhlFE@>g3aG0DDzVnrU@7<^5X3K)FTJ428lSy>lDvtFH@x`pmM}FcH zOy361Ew_;9+QB=gW_jntd3McB<LN^CRm#({&poDm^@z{NM&X7x88+SEkxg5esohB9 zEI}jdz+xU?!SQPR3Ih1*zo+MopJ@EWk4vSF0HEZc2_IEK16y+e;iqn~LXI)(6~;;^ zT@{?Hq0r?pI|u*tdCODx`4n>CYmI>dxDKqmin0C`{PA@=`PF^zq7-iHK?n}sOOAW_ z{R8kn<+Y@)*~P!e6?osdGhFTGC_l+ua7X8#R=)Ay1HQA$vgd~!cI@}?d<)Y-`R3u} zUz%Bi=o&!_n{la&8`5cd;Bo^%4s|_z6kin8g=J!p2AbW-3WNZmwme!hgAPHDVeAat z|Azs`zGDe&nfKxPy0Qc-*Ymd{EBM(rj&ib{cHa&GUuO>Z+~^Mev}X%_tGDwT=TGx5 zC(e<n#w7-(Pb=U0jL&!f9Cp0cVb2e{Bt(%57CRNQa6}VppdnB=9<@K20trj>)tP0b zjsPI#6W~!0B`-u=05mu4ab`XN3_I8X!?_c1=(7PQ9xOB2YRIB03=A^UJ;*2jYLfRI z{3NxtRhLFOdYB@IIXAkWcc)AI{MOz4SZ<DAA3Mcr39a^#pN2z!5%9=Y1Gc}`;fA+5 zc-;nb!D1(DP1{2n-$?thAQ20YbSOj{x17`w02GS^p2wUx%!#17u#RwX9!S(?y6Q~3 zE&JQ>2tc|GcEmux!&CPKJoIs&v7=glGcDs~)jG}&t>B|?{1~5D&c2M^jStL>ev419 z+RL9@`*L1hwA^v_8FrS63!_ji2aMnSQozG^25h{=aKl?2`nI^3$pAeYfKN-;APFeU z*5(0>1AEh*{g-e@0FZ~037B*YlLA(?JnEkwQ%tuIL`^#j=N9n5S?%DDIOHuncxS-l z-zbuw)<ySmJ#^$M9_%0DKfK{%{9`BjQlU|>9AaJ%@r@7e=ce_mS)1+WV^e2&^UNe( zC7)k{C%<WV@|%{Sjl%VBaoBWo0-3bPq$L<{u&$5=O$irH)cz)(O5jJo^}F2EnNh$Y zivG(OHx8$P0$RRLVWzy0w9_lt8Lc7T1!t8*X@$WVFdRE%dF-Ii@ds?3Ijm4Bjjr0j zKXk3+-9PvN9`8sW8jyf;H+kUb=nme#Vhep+uH%D~W4vSRx%!#A^G_*%`@25h%)*Y> zIqZC`OLjBpn1!>JV9eIJhEkVd>}XK+Y3U@hiyE-}{pk#S^ku9_c)W4N5WjMMimjFO zTi@-n<0e5Tl@g#mf;V7z_94q-e;07<+qQ10lrZ%UkY9NfUmRM=dtUZF#yi#arPAnr zTykXgzKoyF&GOzeCt2&4;`0uy6Sln4VcV^Su0EkOZZWyCA2X{RKKoMz@(V`)>NVW+ z)<0ocmxhf}#{h80Eu;g7>k=NfJ08DKScqk_L&CMUmgj-3597y`Gml#e4Zd{a7+84~ zCkBT3t=Hbc?{})b%R!@HdTt@PW;eSM@Y`pf;}x^hjbDAu4hbWhg|5D`7-q+nXYQ|A z{OdOJAAa!D%Nx@~sbc^b1(V&&fK774hBbWV*>kLIStvwZS^QNi`DS+yzi{gvJk+6P zc8X+l2VQoVo-I3g|I7v6aqcvOb_w3RznA%Ue!24xgE~6ZcmSLed+8q-X0Igp^29XB zX8BVo?C>B@b@%egnf-kH_S-Ld826%~(FLop;X3vh%jon4ZW$k^yBQc#N$j?byyezU z@|8}vuj6ygqZPBbhOKUb|28?zP5B}TwMfFl@5(Zf?czI`G=Da^pU+<jLT^H5bO-KD z-^Vqoyen5=|J)2$7k!d}k4YuSb!T~^zn`DI^%Iw!)q9CvqL=6;dWl}5m*_I0{|^ay VQeW-~U1tCQ002ovPDHLkV1l%VKLr2) literal 0 HcmV?d00001 diff --git a/resources/images/star.png b/resources/images/star.png deleted file mode 100644 index 6eb1fb890f14077dba504f54c8f9affd15561496..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1737 zcmV;)1~&PLP)<h;3K|Lk000e1NJLTq000;O000*V1^@s65h0H)00004XF*Lt006O% z3;baP00006VoOIv0RI600RN!9r;`8x010qNS#tmY3ljhU3ljkVnw%H_000McNliru z*$E2)AQnSr`SSn(21ZFlK~zY`Rg`ONmUkJ4um5v;&u!nHTiVhChzta}v4Nt(0$T>t z7>(5NVKFgS788t#CY#1cx0oz5YE(>AGQ$==I3aON3EK#>;M~B3lM3TdXiI4+J-p|` zb9kTUx&6>(iTC$k?%yG~V4E0-BoS#qBqfL;NJNmxC}JQI*~!U>oS^^QGaSD09LMD@ z+w(Skh;+#g>g5H}@>iN=f_ACWB|?`7%S345o)AM0L=mY&BsD4+a*~K3rXV^yB_i`A z|N9cpo!ZZZbKmEh`)htE!ZJjfjw05`k`aZ7j_kIOoe^X-ib!0YF4H!Xz$k*%2Kpe# z3do~I3$RT)#KzdLT<kplQ+~U?hh=5}H}5d9{b}|d`<#@A{y>dKfKH$T1!#Z{Xp~S& z1hWKn_n|q2L=7r7%x+Rr1q?=%b)%2HlT}8ZdYW-O?01fke)KLsYI;rneh4aQ2!|j9 zK={AYP-3Q(6j#Aw2V~AdcJmPlo`)C^XA2F}^X%;E#-N=j?dUDtgf{}7V7*G*XP;SN z#2jOnCG0aqozX>IG(@3`LN|bVz(tAEuuT=dJ1Rmm#w$w(f|;{?BihXBwQHF|2NbOS z3{(HP&LiyN*%z<u_$BOyr38W^906UxD1tJ8U`d9~nO;K_f%+`kA@-@^w+^iuyTG3Q zZcMt!ps)&xVtqH+o<5e_i+ua%&d{ISC|(TQ1USQjgOcFmhe`>%+WFGS1~$<<t<ij< zOzLuxWZI;AV;dU=n%FqFiFsBsx9C%yb13yyapJdFQMu0Gn?+6<P)dZ%H!JS<2TZjk zc|bshB({3@Sze7d(&pB&x{{*VZ6NR0k!?xiZEQtvXe5PBlo*9co#~8AzBNO<yFd>v z?O~4kT%NdJ!3oObatlx?;3jL_Pn*medYa`lF|hq{vV5L#96H<pD-yx;r|3Xi>Na$d zKu41rks4U2rH!-}5>5SNnIS?B)!D~2?vS`WNn0X{qrd(W7OsB8=311VmH|Q>dJPSW z7>im3aB=altAbzg(ZnNaIYgr_mgxf)34{z)icXaody$1Rx6$Xy+*<b-N0e~1o`XXx zPtEaitedT0_&S#`FiO{HPxW$vSz@ALlyssMorv#aSuVsX1ThaON6-re#B-P)AE)%e zWvuxOSH7}^KkqugjIxowC@k=@T6wC(5866evwi~^8Y$oqdM>yM73ic*MUxQ_4?V>7 zal#t53NSMTyiA7dm7A0&rn&jdqx|u$7nmB^f1q7mL#%_)@H;5*J+<c8A}_7}6g_L7 zU<{39RuR}CwM310Lya^(b%dw_I61-YEmQYk0rU1feADHhn+7>T2iN%pwt5>~TO-Hg zzZdLI(D~K7H+S#h$4xMMew44#N~#iJnQPFfTy%=~<Zvkxh7PLYW8hMgJgJV5?4}L8 zPYVmY0XN=GnS$iCZ}HVOc)Wi0Fcv%+@($5@i)w!#)xcrEH)*YD)JcF&1)nlCD){1} zya0nm%zPdO;#^~Zan9rN>NfY&6pQN1vrS5NlT=3~1llOyE{j^ZZuTH=bhMGClPO%l zq>P5<xtM-{9)~18Z4_aq#s}#lYhpz_^(1?Sw#wOKLI;qS2dY`4AN^zS)kt~)b3_OT zu*jiNz+%p}@v}L!LJ6-}!pZ0Had1h2X;6(@6#W2j(AO+sCjf*fqOlqE38;$4aL%wS zUnP!%&m573dzcb%FYuTen?lAW2?CoNcwA+U>Rg6|=OI~}dd|~x28>Uj-GFCMB*81E zh39wFsFP|~Lr<B^5$6Fl9M`3I{vwUGgPF9r<vV!qP13$1hW~IU=VDPxR-9U$iG@vM z6g&kX$FG?5qZ_Qk#~H5y=6+t3iF-7NO{8lMYb=L+c%M2uWaf)~{Oh5;oX4Yj`2Y`( zPp~aFO=oR@<nktBIh%+c(r7JJB(!t|tiv@Z1H;2M5M6tk=zNwstwwx)5q+jWadRJk zS@jeb=%T`hz`rRC#(8t_A^yQtHXJSS%>|vtf=$x(seg${6M8kaG`p0r4DO(@1GtGc zydLlR->9H7(b~?r*2j3Ctz6<G98MtkR2cb<1KUk_Jxj~a-W&Q3!X!Il4ly&Nc$i-A fwF+<-xP<jTK4l8oonlKf00000NkvXXu0mjfX%{Ep diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index dd1b14a736..a44a18db95 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -8,16 +8,13 @@ __docformat__ = 'restructuredtext en' import sys from xml.sax.saxutils import escape -from itertools import repeat from lxml import etree from calibre import guess_type, strftime -from calibre.constants import __appname__, __version__ from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML from calibre.library.comments import comments_to_html -from calibre.utils.magick.draw import save_cover_data_to JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]' @@ -51,26 +48,24 @@ class Jacket(object): def insert_metadata(self, mi): self.log('Inserting metadata into book...') - fname = 'star.png' - img = I(fname, data=True) - - if self.opts.output_profile.short_name == 'kindle': - fname = 'star.jpg' - img = save_cover_data_to(img, fname, - return_data=True) - - - id, href = self.oeb.manifest.generate('calibre_jacket_star', fname) - self.oeb.manifest.add(id, href, guess_type(fname)[0], data=img) - try: tags = map(unicode, self.oeb.metadata.subject) except: tags = [] - root = render_jacket(mi, self.opts.output_profile, star_href=href, - alt_title=unicode(self.oeb.metadata.title[0]), alt_tags=tags, - alt_comments=unicode(self.oeb.metadata.description[0])) + try: + comments = unicode(self.oeb.metadata.description[0]) + except: + comments = '' + + try: + title = unicode(self.oeb.metadata.title[0]) + except: + title = _('Unknown') + + root = render_jacket(mi, self.opts.output_profile, + alt_title=title, alt_tags=tags, + alt_comments=comments) id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml') item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root) @@ -98,7 +93,7 @@ class Jacket(object): # Render Jacket {{{ -def get_rating(rating, href): +def get_rating(rating): ans = '' try: num = float(rating)/2 @@ -109,16 +104,11 @@ def get_rating(rating, href): if num < 1: return ans - if href is not None: - ans = ' '.join(repeat( - '<img style="vertical-align:text-bottom" alt="star" src="%s" />'% - href, int(num))) - else: - ans = u' '.join(u'\u2605') + ans = u'\u2605' * int(num) return ans -def render_jacket(mi, output_profile, star_href=None, +def render_jacket(mi, output_profile, alt_title=_('Unknown'), alt_tags=[], alt_comments=''): css = P('jacket/stylesheet.css', data=True).decode('utf-8') @@ -139,7 +129,7 @@ def render_jacket(mi, output_profile, star_href=None, except: pubdate = '' - rating = get_rating(mi.rating, star_href) + rating = get_rating(mi.rating) tags = mi.tags if mi.tags else alt_tags if tags: @@ -153,8 +143,6 @@ def render_jacket(mi, output_profile, star_href=None, if comments: comments = comments_to_html(comments) - footer = 'B<span class="cbj_smallcaps">OOK JACKET GENERATED BY %s %s</span>' % (__appname__.upper(),__version__) - def generate_html(comments): args = dict(xmlns=XHTML_NS, title_str=title_str, @@ -165,7 +153,8 @@ def render_jacket(mi, output_profile, star_href=None, rating_label=_('Rating'), rating=rating, tags_label=_('Tags'), tags=tags, comments=comments, - footer = footer) + footer='' + ) generated_html = P('jacket/template.xhtml', data=True).decode('utf-8').format(**args) diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py index 519d533ff6..6c50a71b92 100644 --- a/src/calibre/gui2/tag_view.py +++ b/src/calibre/gui2/tag_view.py @@ -376,7 +376,7 @@ class TagsModel(QAbstractItemModel): # {{{ 'series' : QIcon(I('series.png')), 'formats' : QIcon(I('book.png')), 'publisher' : QIcon(I('publisher.png')), - 'rating' : QIcon(I('star.png')), + 'rating' : QIcon(I('rating.png')), 'news' : QIcon(I('news.png')), 'tags' : QIcon(I('tags.png')), ':custom' : QIcon(I('column.png')), From 95a02d69ad0948df76aafbad5eb33ae2a6a2a661 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Tue, 14 Sep 2010 21:03:40 -0600 Subject: [PATCH 33/43] ... --- src/calibre/library/database2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 8a5ab75c3c..2df6b3bdc4 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -597,7 +597,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): return identical_book_ids def has_cover(self, index, index_is_id=False): - id = index if index_is_id else self.id(index) + id = index if index_is_id else self.id(index) path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg') return os.access(path, os.R_OK) From e0afe753ccaa45e2e7ca6da602132bf6d25ef0db Mon Sep 17 00:00:00 2001 From: GRiker <griker@hotmail.com> Date: Wed, 15 Sep 2010 04:55:08 -0600 Subject: [PATCH 34/43] GR wip --- src/calibre/customize/profiles.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index b9a159ee7d..97b84f1286 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -555,11 +555,9 @@ class KindleOutput(OutputProfile): periodical_date_in_title = False @classmethod -# def tags_to_string(cls, tags): -# return u'%s <br/><span style="color: white">%s</span>' % (', '.join(tags), -# 'ttt '.join(tags)+'ttt ') def tags_to_string(cls, tags): - return u'%s' % (', '.join(tags)) + return u'%s <br/><span style="color:white">%s</span>' % (', '.join(tags), + 'ttt '.join(tags)+'ttt ') class KindleDXOutput(OutputProfile): From d568fbb3d82c593ba2e496e905f6f400fd958764 Mon Sep 17 00:00:00 2001 From: GRiker <griker@hotmail.com> Date: Wed, 15 Sep 2010 06:56:44 -0600 Subject: [PATCH 35/43] GwR apple driver bug fix --- src/calibre/devices/apple/driver.py | 41 +++++++++++++++++------------ 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index c9bc04a242..5fe36faf75 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -207,8 +207,8 @@ class ITUNES(DriverBase): for (j,p_book) in enumerate(self.update_list): if False: if isosx: - self.log.info(" looking for %s" % - str(p_book['lib_book'])[-9:]) + self.log.info(" looking for '%s' by %s uuid:%s" % + (p_book['title'],p_book['author'], p_book['uuid'])) elif iswindows: self.log.info(" looking for '%s' by %s (%s)" % (p_book['title'],p_book['author'], p_book['uuid'])) @@ -303,7 +303,7 @@ class ITUNES(DriverBase): this_book.device_collections = [] this_book.library_id = library_books[this_book.path] if this_book.path in library_books else None this_book.size = book.size() - this_book.uuid = book.album() + this_book.uuid = book.composer() # Hack to discover if we're running in GUI environment if self.report_progress is not None: this_book.thumbnail = self._generate_thumbnail(this_book.path, book) @@ -732,15 +732,15 @@ class ITUNES(DriverBase): for path in paths: if DEBUG: self._dump_cached_book(self.cached_books[path], indent=2) - self.log.info(" looking for '%s' by '%s' (%s)" % + self.log.info(" looking for '%s' by '%s' uuid:%s" % (self.cached_books[path]['title'], self.cached_books[path]['author'], self.cached_books[path]['uuid'])) # Purge the booklist, self.cached_books, thumb cache for i,bl_book in enumerate(booklists[0]): - if False: - self.log.info(" evaluating '%s' by '%s' (%s)" % + if DEBUG: + self.log.info(" evaluating '%s' by '%s' uuid:%s" % (bl_book.title, bl_book.author,bl_book.uuid)) found = False @@ -781,10 +781,10 @@ class ITUNES(DriverBase): zf.close() break -# else: -# if DEBUG: -# self.log.error(" unable to find '%s' by '%s' (%s)" % -# (bl_book.title, bl_book.author,bl_book.uuid)) + else: + if DEBUG: + self.log.error(" unable to find '%s' by '%s' (%s)" % + (bl_book.title, bl_book.author,bl_book.uuid)) if False: self._dump_booklist(booklists[0], indent = 2) @@ -905,7 +905,8 @@ class ITUNES(DriverBase): # Add new_book to self.cached_books if DEBUG: - self.log.info(" adding '%s' by '%s' ['%s'] to self.cached_books" % + self.log.info("ITUNES.upload_books()") + self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" % ( metadata[i].title, metadata[i].author, metadata[i].uuid)) self.cached_books[this_book.path] = { 'author': metadata[i].author, @@ -943,7 +944,11 @@ class ITUNES(DriverBase): new_booklist.append(this_book) self._update_iTunes_metadata(metadata[i], db_added, lb_added, this_book) - # Add new_book to self.cached_paths + # Add new_book to self.cached_books + if DEBUG: + self.log.info("ITUNES.upload_books()") + self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" % + ( metadata[i].title, metadata[i].author, metadata[i].uuid)) self.cached_books[this_book.path] = { 'author': metadata[i].author[0], 'dev_book': db_added, @@ -1406,8 +1411,8 @@ class ITUNES(DriverBase): for book in booklist: if isosx: - self.log.info("%s%-40.40s %-30.30s %-10.10s" % - (' '*indent,book.title, book.author, str(book.library_id)[-9:])) + self.log.info("%s%-40.40s %-30.30s %-10.10s %s" % + (' '*indent,book.title, book.author, str(book.library_id)[-9:], book.uuid)) elif iswindows: self.log.info("%s%-40.40s %-30.30s" % (' '*indent,book.title, book.author)) @@ -1547,11 +1552,12 @@ class ITUNES(DriverBase): if isosx: for ub in self.update_list: - self.log.info("%s%-40.40s %-30.30s %-10.10s" % + self.log.info("%s%-40.40s %-30.30s %-10.10s %s" % (' '*indent, ub['title'], ub['author'], - str(ub['lib_book'])[-9:])) + str(ub['lib_book'])[-9:], + ub['uuid'])) elif iswindows: for ub in self.update_list: self.log.info("%s%-40.40s %-30.30s" % @@ -2804,7 +2810,7 @@ class ITUNES_ASYNC(ITUNES): #this_book.library_id = library_books[this_book.path] if this_book.path in library_books else None this_book.library_id = library_books[book] this_book.size = library_books[book].size() - this_book.uuid = library_books[book].album() + this_book.uuid = library_books[book].composer() # Hack to discover if we're running in GUI environment if self.report_progress is not None: this_book.thumbnail = self._generate_thumbnail(this_book.path, library_books[book]) @@ -2844,6 +2850,7 @@ class ITUNES_ASYNC(ITUNES): this_book.device_collections = [] this_book.library_id = library_books[book] this_book.size = library_books[book].Size + this_book.uuid = library_books[book].Composer # Hack to discover if we're running in GUI environment if self.report_progress is not None: this_book.thumbnail = self._generate_thumbnail(this_book.path, library_books[book]) From 8052d91e337345d9b0f59fa917ce2915c0bf931c Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Wed, 15 Sep 2010 10:01:49 -0600 Subject: [PATCH 36/43] Use asterisk for ratings on most output profiles --- src/calibre/customize/profiles.py | 5 ++++ src/calibre/ebooks/oeb/transforms/jacket.py | 6 ++-- .../gui2/convert/structure_detection.ui | 28 +++++++++---------- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 1563f764ca..a8d7eb2e0d 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -248,6 +248,9 @@ class OutputProfile(Plugin): #: If True, the date is appended to the title of downloaded news periodical_date_in_title = True + #: The character used to represent a star in ratings + ratings_char = u'*' + @classmethod def tags_to_string(cls, tags): return escape(', '.join(tags)) @@ -273,6 +276,7 @@ class iPadOutput(OutputProfile): 'macros': {'border-width': '{length}|medium|thick|thin'} } ] + ratings_char = u'\u2605' touchscreen = True # touchscreen_news_css {{{ touchscreen_news_css = u''' @@ -553,6 +557,7 @@ class KindleOutput(OutputProfile): fsizes = [12, 12, 14, 16, 18, 20, 22, 24] supports_mobi_indexing = True periodical_date_in_title = False + ratings_char = u'\u2605' @classmethod def tags_to_string(cls, tags): diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index a44a18db95..88c7a4ff0e 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -93,7 +93,7 @@ class Jacket(object): # Render Jacket {{{ -def get_rating(rating): +def get_rating(rating, rchar): ans = '' try: num = float(rating)/2 @@ -104,7 +104,7 @@ def get_rating(rating): if num < 1: return ans - ans = u'\u2605' * int(num) + ans = rchar * int(num) return ans @@ -129,7 +129,7 @@ def render_jacket(mi, output_profile, except: pubdate = '' - rating = get_rating(mi.rating) + rating = get_rating(mi.rating, output_profile.ratings_char) tags = mi.tags if mi.tags else alt_tags if tags: diff --git a/src/calibre/gui2/convert/structure_detection.ui b/src/calibre/gui2/convert/structure_detection.ui index eb2892a07a..c0b3de3bd9 100644 --- a/src/calibre/gui2/convert/structure_detection.ui +++ b/src/calibre/gui2/convert/structure_detection.ui @@ -41,24 +41,17 @@ </property> </widget> </item> - <item row="3" column="0"> + <item row="4" column="0"> <widget class="QCheckBox" name="opt_insert_metadata"> <property name="text"> <string>Insert &metadata as page at start of book</string> </property> </widget> </item> - <item row="8" column="0" colspan="2"> - <widget class="QCheckBox" name="opt_preprocess_html"> - <property name="text"> - <string>&Preprocess input file to possibly improve structure detection</string> - </property> - </widget> - </item> - <item row="9" column="0" colspan="2"> + <item row="10" column="0" colspan="2"> <widget class="XPathEdit" name="opt_page_breaks_before" native="true"/> </item> - <item row="10" column="0" colspan="2"> + <item row="11" column="0" colspan="2"> <spacer name="verticalSpacer"> <property name="orientation"> <enum>Qt::Vertical</enum> @@ -71,26 +64,33 @@ </property> </spacer> </item> - <item row="6" column="0"> + <item row="7" column="0"> <widget class="QCheckBox" name="opt_remove_footer"> <property name="text"> <string>Remove F&ooter</string> </property> </widget> </item> - <item row="4" column="0"> + <item row="5" column="0"> <widget class="QCheckBox" name="opt_remove_header"> <property name="text"> <string>Remove H&eader</string> </property> </widget> </item> - <item row="5" column="0" colspan="2"> + <item row="6" column="0" colspan="2"> <widget class="RegexEdit" name="opt_header_regex" native="true"/> </item> - <item row="7" column="0" colspan="2"> + <item row="8" column="0" colspan="2"> <widget class="RegexEdit" name="opt_footer_regex" native="true"/> </item> + <item row="3" column="0"> + <widget class="QCheckBox" name="opt_preprocess_html"> + <property name="text"> + <string>&Preprocess input file to possibly improve structure detection</string> + </property> + </widget> + </item> </layout> </widget> <customwidgets> From 30fafed01710d563a6aaf67b12be0f7db189f4f2 Mon Sep 17 00:00:00 2001 From: GRiker <griker@hotmail.com> Date: Wed, 15 Sep 2010 10:12:04 -0600 Subject: [PATCH 37/43] GwR fix #6822 --- src/calibre/library/catalog.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index ef7569bd88..e14d092727 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -2590,7 +2590,7 @@ class EPUB_MOBI(CatalogPlugin): aTag = Tag(soup, 'a') aTag['name'] = "%s_series" % re.sub('\W','',book['series']).lower() pSeriesTag.insert(0,aTag) - pSeriesTag.insert(1,NavigableString(self.NOT_READ_SYMBOL + '%s' % book['series'])) + pSeriesTag.insert(1,NavigableString('%s' % book['series'])) divTag.insert(dtc,pSeriesTag) dtc += 1 @@ -2599,7 +2599,14 @@ class EPUB_MOBI(CatalogPlugin): ptc = 0 # book with read/reading/unread symbol - if 'read' in book and book['read']: + for tag in book['tags']: + if tag == self.opts.read_tag: + book['read'] = True + break + else: + book['read'] = False + + if book['read']: # check mark pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL)) pBookTag['class'] = "read_book" From a20015e1e7b656a47fd87d474c08bdaef61b0bae Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Wed, 15 Sep 2010 11:09:40 -0600 Subject: [PATCH 38/43] Workaround for bug that affects some windows install causing white backgrounds on default covers to be rendered as yellow --- src/calibre/utils/magick/__init__.py | 2 +- src/calibre/utils/magick/draw.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/calibre/utils/magick/__init__.py b/src/calibre/utils/magick/__init__.py index 073a030361..2707430c67 100644 --- a/src/calibre/utils/magick/__init__.py +++ b/src/calibre/utils/magick/__init__.py @@ -194,7 +194,7 @@ class Image(_magick.Image): # {{{ # }}} -def create_canvas(width, height, bgcolor='white'): +def create_canvas(width, height, bgcolor='#ffffff'): canvas = Image() canvas.create_canvas(int(width), int(height), str(bgcolor)) return canvas diff --git a/src/calibre/utils/magick/draw.py b/src/calibre/utils/magick/draw.py index 301bf9912a..ed9e3d3d83 100644 --- a/src/calibre/utils/magick/draw.py +++ b/src/calibre/utils/magick/draw.py @@ -11,7 +11,7 @@ from calibre.utils.magick import Image, DrawingWand, create_canvas from calibre.constants import __appname__, __version__ from calibre import fit_image -def save_cover_data_to(data, path, bgcolor='white', resize_to=None, +def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None, return_data=False): ''' Saves image in data to path, in the format specified by the path @@ -28,7 +28,7 @@ def save_cover_data_to(data, path, bgcolor='white', resize_to=None, return canvas.export(os.path.splitext(path)[1][1:]) canvas.save(path) -def thumbnail(data, width=120, height=120, bgcolor='white', fmt='jpg'): +def thumbnail(data, width=120, height=120, bgcolor='#ffffff', fmt='jpg'): img = Image() img.load(data) owidth, oheight = img.size @@ -61,7 +61,7 @@ def identify(path): return identify_data(data) def add_borders_to_image(path_to_image, left=0, top=0, right=0, bottom=0, - border_color='white'): + border_color='#ffffff'): img = Image() img.open(path_to_image) lwidth, lheight = img.size @@ -80,7 +80,7 @@ def create_text_wand(font_size, font_path=None): ans.text_alias = True return ans -def create_text_arc(text, font_size, font=None, bgcolor='white'): +def create_text_arc(text, font_size, font=None, bgcolor='#ffffff'): if isinstance(text, unicode): text = text.encode('utf-8') @@ -148,7 +148,7 @@ class TextLine(object): def create_cover_page(top_lines, logo_path, width=590, height=750, - bgcolor='white', output_format='jpg'): + bgcolor='#ffffff', output_format='jpg'): ''' Create the standard calibre cover page and return it as a byte string in the specified output_format. From 57ca76e68efb7c3f615d948231ac741e60251dd1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Wed, 15 Sep 2010 11:12:53 -0600 Subject: [PATCH 39/43] ... --- src/calibre/web/feeds/news.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 9ba9583c73..a140dfbf05 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -290,10 +290,12 @@ class BasicNewsRecipe(Recipe): #: the cover for the periodical. Overriding this in your recipe instructs #: calibre to render the downloaded cover into a frame whose width and height #: are expressed as a percentage of the downloaded cover. - #: cover_margins = (10,15,'white') pads the cover with a white margin + #: cover_margins = (10, 15, '#ffffff') pads the cover with a white margin #: 10px on the left and right, 15px on the top and bottom. - #: Colors name defined at http://www.imagemagick.org/script/color.php - cover_margins = (0,0,'white') + #: Color names defined at http://www.imagemagick.org/script/color.php + #: Note that for some reason, white does not always work on windows. Use + #: #ffffff instead + cover_margins = (0, 0, '#ffffff') #: Set to a non empty string to disable this recipe #: The string will be used as the disabled message From c006e2e14bebef07898a934bdb6225ea14b6280f Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Wed, 15 Sep 2010 11:27:39 -0600 Subject: [PATCH 40/43] Database: Update has_cover cache when setting/removing covers so that the search returns correct results. Also fix an exception that could occur when adding books with a db that has been upgraded from very old SQL. --- src/calibre/library/database2.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 2df6b3bdc4..f5f0f724ba 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -598,7 +598,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def has_cover(self, index, index_is_id=False): id = index if index_is_id else self.id(index) - path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg') + try: + path = os.path.join(self.abspath(id, index_is_id=True), 'cover.jpg') + except: + # Can happen if path has not yet been set + return False return os.access(path, os.R_OK) def remove_cover(self, id, notify=True): @@ -609,6 +613,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): except (IOError, OSError): time.sleep(0.2) os.remove(path) + self.data.set(id, self.FIELD_MAP['cover'], False, row_is_id=True) if notify: self.notify('cover', [id]) @@ -629,6 +634,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): except (IOError, OSError): time.sleep(0.2) save_cover_data_to(data, path) + self.data.set(id, self.FIELD_MAP['cover'], True, row_is_id=True) if notify: self.notify('cover', [id]) @@ -1087,8 +1093,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.set_path(id, True) self.notify('metadata', [id]) - # Given a book, return the list of author sort strings for the book's authors def authors_sort_strings(self, id, index_is_id=False): + ''' + Given a book, return the list of author sort strings + for the book's authors + ''' id = id if index_is_id else self.id(id) aut_strings = self.conn.get(''' SELECT sort @@ -1744,10 +1753,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): series_index = 1.0 if mi.series_index is None else mi.series_index aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors) title = mi.title - if isinstance(aus, str): + if isbytestring(aus): aus = aus.decode(preferred_encoding, 'replace') - if isinstance(title, str): - title = title.decode(preferred_encoding) + if isbytestring(title): + title = title.decode(preferred_encoding, 'replace') obj = self.conn.execute('INSERT INTO books(title, series_index, author_sort) VALUES (?, ?, ?)', (title, series_index, aus)) id = obj.lastrowid From 6bbbb0a1f57635d8d5aae6398f27914e20d333e6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Wed, 15 Sep 2010 12:19:26 -0600 Subject: [PATCH 41/43] Fix #6819 (Not recognising New sony PRS 650) --- src/calibre/devices/prs505/driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py index 4c14565c2d..094c12cf0c 100644 --- a/src/calibre/devices/prs505/driver.py +++ b/src/calibre/devices/prs505/driver.py @@ -35,7 +35,7 @@ class PRS505(USBMS): VENDOR_NAME = 'SONY' WINDOWS_MAIN_MEM = re.compile( - r'(PRS-(505|300|500))|' + r'(PRS-(505|500))|' r'(PRS-((700[#/])|((6|9|3)(0|5)0&)))' ) WINDOWS_CARD_A_MEM = re.compile( From 062d369b43b435991d0b140bdba6217c0b5b0ccf Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Wed, 15 Sep 2010 13:22:05 -0600 Subject: [PATCH 42/43] ... --- src/calibre/gui2/device.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 45c78ce6da..f839e1d519 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -627,12 +627,11 @@ class DeviceMixin(object): # {{{ def connect_to_folder(self): dir = choose_dir(self, 'Select Device Folder', _('Select folder to open as device')) - kls = FOLDER_DEVICE - self.device_manager.mount_device(kls=kls, kind='folder', path=dir) + if dir is not None: + self.device_manager.mount_device(kls=FOLDER_DEVICE, kind='folder', path=dir) def connect_to_itunes(self): - kls = ITUNES_ASYNC - self.device_manager.mount_device(kls=kls, kind='itunes', path=None) + self.device_manager.mount_device(kls=ITUNES_ASYNC, kind='itunes', path=None) # disconnect from both folder and itunes devices def disconnect_mounted_device(self): From ff319ccc4daae0b1d01ccab78733412fb4edb53f Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Wed, 15 Sep 2010 14:05:47 -0600 Subject: [PATCH 43/43] Add an option to split the toolbar into two toolbars --- src/calibre/gui2/__init__.py | 1 + src/calibre/gui2/actions/__init__.py | 6 +++ src/calibre/gui2/actions/add.py | 1 + src/calibre/gui2/actions/add_to_library.py | 1 + src/calibre/gui2/actions/annotate.py | 1 + src/calibre/gui2/actions/convert.py | 1 + src/calibre/gui2/actions/copy_to_library.py | 1 + src/calibre/gui2/actions/delete.py | 1 + src/calibre/gui2/actions/edit_collections.py | 1 + src/calibre/gui2/actions/edit_metadata.py | 1 + src/calibre/gui2/actions/open.py | 1 + src/calibre/gui2/actions/save_to_disk.py | 1 + src/calibre/gui2/actions/show_book_details.py | 1 + src/calibre/gui2/actions/similar_books.py | 1 + src/calibre/gui2/actions/view.py | 1 + src/calibre/gui2/layout.py | 43 ++++++++++++++++--- src/calibre/gui2/preferences/look_feel.py | 1 + src/calibre/gui2/preferences/look_feel.ui | 7 +++ 18 files changed, 65 insertions(+), 6 deletions(-) diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 1b61404589..e58dce5559 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -50,6 +50,7 @@ gprefs.defaults['action-layout-context-menu-device'] = ( gprefs.defaults['show_splash_screen'] = True gprefs.defaults['toolbar_icon_size'] = 'medium' gprefs.defaults['toolbar_text'] = 'auto' +gprefs.defaults['show_child_bar'] = False # }}} diff --git a/src/calibre/gui2/actions/__init__.py b/src/calibre/gui2/actions/__init__.py index 57ad900fba..b2d1656367 100644 --- a/src/calibre/gui2/actions/__init__.py +++ b/src/calibre/gui2/actions/__init__.py @@ -71,6 +71,12 @@ class InterfaceAction(QObject): all_locations = frozenset(['toolbar', 'toolbar-device', 'context-menu', 'context-menu-device']) + #: Type of action + #: 'current' means acts on the current view + #: 'global' means an action that does not act on the current view, but rather + #: on calibre as a whole + action_type = 'global' + def __init__(self, parent, site_customization): QObject.__init__(self, parent) self.setObjectName(self.name) diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py index f0ff794fab..add7bf1d5b 100644 --- a/src/calibre/gui2/actions/add.py +++ b/src/calibre/gui2/actions/add.py @@ -25,6 +25,7 @@ class AddAction(InterfaceAction): action_spec = (_('Add books'), 'add_book.png', _('Add books to the calibre library/device from files on your computer') , _('A')) + action_type = 'current' def genesis(self): self._add_filesystem_book = self.Dispatcher(self.__add_filesystem_book) diff --git a/src/calibre/gui2/actions/add_to_library.py b/src/calibre/gui2/actions/add_to_library.py index 6fc0d5fb1f..05aea8f1dd 100644 --- a/src/calibre/gui2/actions/add_to_library.py +++ b/src/calibre/gui2/actions/add_to_library.py @@ -13,6 +13,7 @@ class AddToLibraryAction(InterfaceAction): action_spec = (_('Add books to library'), 'add_book.png', _('Add books to your calibre library from the connected device'), None) dont_add_to = frozenset(['toolbar', 'context-menu']) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.add_books_to_library) diff --git a/src/calibre/gui2/actions/annotate.py b/src/calibre/gui2/actions/annotate.py index 5356d63e98..dfafcd1a39 100644 --- a/src/calibre/gui2/actions/annotate.py +++ b/src/calibre/gui2/actions/annotate.py @@ -18,6 +18,7 @@ class FetchAnnotationsAction(InterfaceAction): name = 'Fetch Annotations' action_spec = (_('Fetch annotations (experimental)'), None, None, None) + action_type = 'current' def genesis(self): pass diff --git a/src/calibre/gui2/actions/convert.py b/src/calibre/gui2/actions/convert.py index ee0f06ab71..29acfc52b1 100644 --- a/src/calibre/gui2/actions/convert.py +++ b/src/calibre/gui2/actions/convert.py @@ -21,6 +21,7 @@ class ConvertAction(InterfaceAction): name = 'Convert Books' action_spec = (_('Convert books'), 'convert.png', None, _('C')) dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) + action_type = 'current' def genesis(self): cm = QMenu() diff --git a/src/calibre/gui2/actions/copy_to_library.py b/src/calibre/gui2/actions/copy_to_library.py index 7127c91e8c..6b7654f644 100644 --- a/src/calibre/gui2/actions/copy_to_library.py +++ b/src/calibre/gui2/actions/copy_to_library.py @@ -80,6 +80,7 @@ class CopyToLibraryAction(InterfaceAction): _('Copy selected books to the specified library'), None) popup_type = QToolButton.InstantPopup dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) + action_type = 'current' def genesis(self): self.menu = QMenu(self.gui) diff --git a/src/calibre/gui2/actions/delete.py b/src/calibre/gui2/actions/delete.py index 0343c6df84..406860e4ec 100644 --- a/src/calibre/gui2/actions/delete.py +++ b/src/calibre/gui2/actions/delete.py @@ -16,6 +16,7 @@ class DeleteAction(InterfaceAction): name = 'Remove Books' action_spec = (_('Remove books'), 'trash.png', None, _('Del')) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.delete_books) diff --git a/src/calibre/gui2/actions/edit_collections.py b/src/calibre/gui2/actions/edit_collections.py index e45d36fc62..7f5dd76538 100644 --- a/src/calibre/gui2/actions/edit_collections.py +++ b/src/calibre/gui2/actions/edit_collections.py @@ -13,6 +13,7 @@ class EditCollectionsAction(InterfaceAction): action_spec = (_('Manage collections'), None, _('Manage the collections on this device'), None) dont_add_to = frozenset(['toolbar', 'context-menu']) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.edit_collections) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index 878ba77a43..ac04652efa 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -22,6 +22,7 @@ class EditMetadataAction(InterfaceAction): name = 'Edit Metadata' action_spec = (_('Edit metadata'), 'edit_input.png', None, _('E')) + action_type = 'current' def genesis(self): self.create_action(spec=(_('Merge book records'), 'merge_books.png', diff --git a/src/calibre/gui2/actions/open.py b/src/calibre/gui2/actions/open.py index 106bfa24f6..141ff01a66 100644 --- a/src/calibre/gui2/actions/open.py +++ b/src/calibre/gui2/actions/open.py @@ -14,6 +14,7 @@ class OpenFolderAction(InterfaceAction): action_spec = (_('Open containing folder'), 'document_open.png', None, _('O')) dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.gui.iactions['View'].view_folder) diff --git a/src/calibre/gui2/actions/save_to_disk.py b/src/calibre/gui2/actions/save_to_disk.py index bfcc02e130..e9664b9980 100644 --- a/src/calibre/gui2/actions/save_to_disk.py +++ b/src/calibre/gui2/actions/save_to_disk.py @@ -38,6 +38,7 @@ class SaveToDiskAction(InterfaceAction): name = "Save To Disk" action_spec = (_('Save to disk'), 'save.png', None, _('S')) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.save_to_disk) diff --git a/src/calibre/gui2/actions/show_book_details.py b/src/calibre/gui2/actions/show_book_details.py index d17d0998f1..18b0a694bf 100644 --- a/src/calibre/gui2/actions/show_book_details.py +++ b/src/calibre/gui2/actions/show_book_details.py @@ -16,6 +16,7 @@ class ShowBookDetailsAction(InterfaceAction): action_spec = (_('Show book details'), 'dialog_information.png', None, _('I')) dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.show_book_info) diff --git a/src/calibre/gui2/actions/similar_books.py b/src/calibre/gui2/actions/similar_books.py index 1a14869a9c..644cd3160a 100644 --- a/src/calibre/gui2/actions/similar_books.py +++ b/src/calibre/gui2/actions/similar_books.py @@ -16,6 +16,7 @@ class SimilarBooksAction(InterfaceAction): name = 'Similar Books' action_spec = (_('Similar books...'), None, None, None) popup_type = QToolButton.InstantPopup + action_type = 'current' def genesis(self): m = QMenu(self.gui) diff --git a/src/calibre/gui2/actions/view.py b/src/calibre/gui2/actions/view.py index 2f6be24e5b..0fbf86c567 100644 --- a/src/calibre/gui2/actions/view.py +++ b/src/calibre/gui2/actions/view.py @@ -22,6 +22,7 @@ class ViewAction(InterfaceAction): name = 'View' action_spec = (_('View'), 'view.png', None, _('V')) + action_type = 'current' def genesis(self): self.persistent_files = [] diff --git a/src/calibre/gui2/layout.py b/src/calibre/gui2/layout.py index 58d5267c8e..ec7e023dc1 100644 --- a/src/calibre/gui2/layout.py +++ b/src/calibre/gui2/layout.py @@ -61,7 +61,7 @@ class LocationManager(QObject): # {{{ ac('library', _('Library'), 'lt.png', _('Show books in calibre library')) - ac('main', _('Reader'), 'reader.png', + ac('main', _('Device'), 'reader.png', _('Show books in the main memory of the device')) ac('carda', _('Card A'), 'sd.png', _('Show books in storage card A')) @@ -197,11 +197,21 @@ class SearchBar(QWidget): # {{{ # }}} +class Spacer(QWidget): + + def __init__(self, parent): + QWidget.__init__(self, parent) + self.l = QHBoxLayout() + self.setLayout(self.l) + self.l.addStretch(10) + + class ToolBar(QToolBar): # {{{ - def __init__(self, donate, location_manager, parent): + def __init__(self, donate, location_manager, child_bar, parent): QToolBar.__init__(self, parent) self.gui = parent + self.child_bar = child_bar self.setContextMenuPolicy(Qt.PreventContextMenu) self.setMovable(False) self.setFloatable(False) @@ -223,16 +233,19 @@ class ToolBar(QToolBar): # {{{ sz = gprefs['toolbar_icon_size'] sz = {'small':24, 'medium':48, 'large':64}[sz] self.setIconSize(QSize(sz, sz)) + self.child_bar.setIconSize(QSize(sz, sz)) style = Qt.ToolButtonTextUnderIcon if gprefs['toolbar_text'] == 'never': style = Qt.ToolButtonIconOnly self.setToolButtonStyle(style) + self.child_bar.setToolButtonStyle(style) self.donate_button.set_normal_icon_size(sz, sz) def contextMenuEvent(self, *args): pass def build_bar(self): + self.child_bar.setVisible(gprefs['show_child_bar']) self.showing_donate = False showing_device = self.location_manager.has_device actions = '-device' if showing_device else '' @@ -244,10 +257,16 @@ class ToolBar(QToolBar): # {{{ m.setVisible(False) self.clear() + self.child_bar.clear() self.added_actions = [] + self.spacers = [Spacer(self.child_bar), Spacer(self.child_bar), + Spacer(self), Spacer(self)] + self.child_bar.addWidget(self.spacers[0]) + if gprefs['show_child_bar']: + self.addWidget(self.spacers[2]) for what in actions: - if what is None: + if what is None and not gprefs['show_child_bar']: self.addSeparator() elif what == 'Location Manager': for ac in self.location_manager.available_actions: @@ -262,12 +281,21 @@ class ToolBar(QToolBar): # {{{ self.showing_donate = True elif what in self.gui.iactions: action = self.gui.iactions[what] - self.addAction(action.qaction) + bar = self + if action.action_type == 'current' and gprefs['show_child_bar']: + bar = self.child_bar + bar.addAction(action.qaction) self.added_actions.append(action.qaction) self.setup_tool_button(action.qaction, action.popup_type) + self.child_bar.addWidget(self.spacers[1]) + if gprefs['show_child_bar']: + self.addWidget(self.spacers[3]) + def setup_tool_button(self, ac, menu_mode=None): ch = self.widgetForAction(ac) + if ch is None: + ch = self.child_bar.widgetForAction(ac) ch.setCursor(Qt.PointingHandCursor) ch.setAutoRaise(True) if ac.menu() is not None and menu_mode is not None: @@ -280,7 +308,8 @@ class ToolBar(QToolBar): # {{{ if p == 'never': style = Qt.ToolButtonIconOnly - if p == 'auto' and self.preferred_width > self.width()+35: + if p == 'auto' and self.preferred_width > self.width()+35 and \ + not gprefs['show_child_bar']: style = Qt.ToolButtonIconOnly self.setToolButtonStyle(style) @@ -309,9 +338,11 @@ class MainWindowMixin(object): # {{{ self.iactions['Fetch News'].init_scheduler(db) self.search_bar = SearchBar(self) + self.child_bar = QToolBar(self) self.tool_bar = ToolBar(self.donate_button, - self.location_manager, self) + self.location_manager, self.child_bar, self) self.addToolBar(Qt.TopToolBarArea, self.tool_bar) + self.addToolBar(Qt.BottomToolBarArea, self.child_bar) l = self.centralwidget.layout() l.addWidget(self.search_bar) diff --git a/src/calibre/gui2/preferences/look_feel.py b/src/calibre/gui2/preferences/look_feel.py index f30b2fddbb..10c2fcfe95 100644 --- a/src/calibre/gui2/preferences/look_feel.py +++ b/src/calibre/gui2/preferences/look_feel.py @@ -46,6 +46,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): r('use_roman_numerals_for_series_number', config) r('separate_cover_flow', config, restart_required=True) r('search_as_you_type', config) + r('show_child_bar', gprefs) choices = [(_('Small'), 'small'), (_('Medium'), 'medium'), (_('Large'), 'large')] diff --git a/src/calibre/gui2/preferences/look_feel.ui b/src/calibre/gui2/preferences/look_feel.ui index 7c6c736b24..1de55d51ef 100644 --- a/src/calibre/gui2/preferences/look_feel.ui +++ b/src/calibre/gui2/preferences/look_feel.ui @@ -173,6 +173,13 @@ </property> </widget> </item> + <item row="2" column="0" colspan="2"> + <widget class="QCheckBox" name="opt_show_child_bar"> + <property name="text"> + <string>&Split the toolbar into two toolbars</string> + </property> + </widget> + </item> </layout> </widget> </item>