From 0ad1f3c088f2ff0872de49171fd99a91a50a031a Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Wed, 25 Aug 2010 10:49:42 +1000
Subject: [PATCH 01/43] preprocessing regex tweaks

---
 src/calibre/ebooks/conversion/preprocess.py | 2 +-
 src/calibre/ebooks/rtf/input.py             | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index da652c1a38..940c27344b 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -209,7 +209,7 @@ class HTMLPreProcessor(object):
                   (re.compile(ur'\u00a0'), lambda match : ' '),
 
                   # Detect Chapters to match default XPATH in GUI
-                  (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part)\s*([\d\w-]+(\s\w+)?)?(</(i|b)>(</(i|b)>)?)?)</?(br|p)[^>]*>\s*(?P<title>(<(i|b)>)?\s*\w+(\s*\w+)?\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head),
+                  (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)>(</(i|b)>)?)?)</?(br|p)[^>]*>\s*(?P<title>(<(i|b)>)?\s*\w+(\s*\w+)?\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head),
                   (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>([A-Z \'"!]{5,})\s*(\d+|\w+)?)(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head),
 
                   # Have paragraphs show better
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index dcffbe68ca..eaba28e429 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -231,12 +231,12 @@ class RTFInput(InputFormatPlugin):
             if self.options.preprocess_html:
                 print "*********  Preprocessing HTML  *********\n"
                 # Detect Chapters to match the xpath in the GUI
-                chapdetect = re.compile(r'<p[^>]*>\s*<span[^>]*>\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)\s*</span>\s*</p>', re.IGNORECASE)
+                chapdetect = re.compile(r'<p[^>]*>\s*<span[^>]*>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)>(<(/i|b)>)?)?)\s*</span>\s*</p>', re.IGNORECASE)
                 res = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', res)
                 # Unwrap lines using punctation if the median length of all lines is less than 150
                 length = line_length('html', res, 0.4)
                 print "*** Median length is " + str(length) + " ***\n"
-                unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</span>\s*(</p>)?\s*(?P<up2threeblanks><p[^>]*>\s*(<span[^>]*>\s*</span>\s*)</p>\s*){0,3}\s*<p[^>]*>\s*(<span[^>]*>)?\s*" % length, re.UNICODE)
+                unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</span>\s*</p>\s*(?P<up2threeblanks><p[^>]*>\s*(<span[^>]*>\s*</span>\s*)</p>\s*){0,3}\s*<p[^>]*>\s*<span[^>]*>\s*" % length, re.UNICODE)
                 if length < 150:
                     res = unwrap.sub(' ', res)
             f.write(res)

From 5c951fb9628617133f17ead6d1393ea84b7c6412 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Sat, 4 Sep 2010 15:12:29 +1000
Subject: [PATCH 02/43] Preprocessing Updates

---
 src/calibre/ebooks/conversion/preprocess.py |  26 +++--
 src/calibre/ebooks/html/input.py            |   2 +-
 src/calibre/ebooks/lit/input.py             | 104 ++++++++++++++++++--
 src/calibre/ebooks/mobi/input.py            |  10 ++
 src/calibre/ebooks/pdf/reflow.py            |   4 +
 5 files changed, 132 insertions(+), 14 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 957418f1fd..2954fd7c26 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -62,6 +62,7 @@ def wrap_lines(match):
     else:
                return ital+' '
 
+
 def line_length(format, raw, percent):
     '''
     raw is the raw text to find the line length to use for wrapping.
@@ -191,32 +192,36 @@ class HTMLPreProcessor(object):
                   (re.compile(u'¸\s*(<br.*?>)*\s*c', re.UNICODE), lambda match: u'ç'),
                   (re.compile(u'¸\s*(<br.*?>)*\s*C', re.UNICODE), lambda match: u'Ç'),
 
+                  # If pdf printed from a browser then the header/footer has a reliable pattern
+                  (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
+
+                  # Center separator lines
+                  (re.compile(u'<br>\s*(?P<break>([*#•]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
+
                   # Remove page links
                   (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
                   # Remove <hr> tags
                   (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
                   # Replace <br><br> with <p>
-                  (re.compile(r'<br.*?>\s*<br.*?>', re.IGNORECASE), lambda match: '<p>'),
+                  # (re.compile(r'<br>\s*<br>', re.IGNORECASE), lambda match: '\n<p>'),
 
-                  # Remove hyphenation
-                  (re.compile(r'-<br.*?>\n\r?'), lambda match: ''),
+                  # unwrap hyphenation - don't delete the hyphen (often doesn't split words)
+                  (re.compile(r'(?<=[-–])\s*<br>\s*(?=[[a-z\d])'), lambda match: ''),
 
                   # Remove gray background
                   (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
 
                   # Detect Chapters to match default XPATH in GUI
-                  (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)>(</(i|b)>)?)?)</?(br|p)[^>]*>\s*(?P<title>(<(i|b)>)?\s*\w+(\s*\w+)?\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head),
-                  (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>([A-Z \'"!]{5,})\s*(\d+|\w+)?)(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head),
+                  (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+(\s\w+)?)?\s*(</(i|b)>(</(i|b)>)?)?)\s*(</?(br|p)[^>]*>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head),
 
                   # Have paragraphs show better
                   (re.compile(r'<br.*?>'), lambda match : '<p>'),
                   # Clean up spaces
                   (re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '),
-                  # Connect paragraphs split by -
-                  (re.compile(u'(?<=[^\s][-–])[\s]*(</p>)*[\s]*(<p>)*\s*(?=[^\s])'), lambda match: ''),
                   # Add space before and after italics
                   (re.compile(u'(?<!“)<i>'), lambda match: ' <i>'),
                   (re.compile(r'</i>(?=\w)'), lambda match: '</i> '),
+                                   
                  ]
 
     # Fix Book Designer markup
@@ -293,6 +298,13 @@ class HTMLPreProcessor(object):
                 import traceback
                 print 'Failed to parse remove_footer regexp'
                 traceback.print_exc()
+        
+        # Make the more aggressive chapter marking regex optional with the preprocess option to reduce false positives
+        if getattr(self.extra_opts, 'preprocess_html', None):
+            if is_pdftohtml:
+                end_rules.append(
+                    (re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head),
+                )
 
         if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
             length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor'))
diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py
index d57bfddd3e..35a8a1a9bc 100644
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@@ -494,7 +494,7 @@ class HTMLInput(InputFormatPlugin):
         if not hasattr(self, 'log'):
             from calibre.utils.logging import default_log
             self.log = default_log
-		self.log("*********  Preprocessing HTML  *********")
+		self.log("*********  Preprocessing HTML - HTML Input plugin *********")
 		# Detect Chapters to match the xpath in the GUI
 		chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE)
 		html = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', html)
diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/lit/input.py
index 9bf20fb1d4..f7bb0fbfd9 100644
--- a/src/calibre/ebooks/lit/input.py
+++ b/src/calibre/ebooks/lit/input.py
@@ -11,12 +11,14 @@ import re
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.conversion.preprocess import line_length
 
+
 class LITInput(InputFormatPlugin):
 
     name        = 'LIT Input'
     author      = 'Marshall T. Vandegrift'
     description = 'Convert LIT files to HTML'
     file_types  = set(['lit'])
+    html_preprocess_sections = 0
 
     def convert(self, stream, options, file_ext, log,
                 accelerators):
@@ -55,14 +57,104 @@ class LITInput(InputFormatPlugin):
 
 
 	def preprocess_html(self, html):
+
+        def chapter_head(match):
+            chap = match.group('chap')
+            title = match.group('title')
+            if not title:
+                       self.html_preprocess_sections = self.html_preprocess_sections + 1
+                       self.log("marked " + str(self.html_preprocess_sections) + " chapters. - " + str(chap))
+                       return '<h2>'+chap+'</h2>\n'
+            else:
+                       self.html_preprocess_sections = self.html_preprocess_sections + 1
+                       self.log("marked " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title))
+                       return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n'
+
+        def chapter_link(match):
+            chap = match.group('sectionlink')
+            if not chap:
+                       self.html_preprocess_sections = self.html_preprocess_sections + 1
+                       self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links")
+                       return '<br style="page-break-before:always">'
+            else:
+                       self.html_preprocess_sections = self.html_preprocess_sections + 1
+                       self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links. - " + str(chap))
+                       return '<br clear="all" style="page-break-before:always">\n<h2>'+chap+'</h2>'
+
+
+        def no_markup(raw, percent):
+            '''
+            Detects total marked up line endings in the file. raw is the text to 
+            inspect.  Percent is the minimum percent of line endings which should 
+            be marked up to return true.
+            '''
+            htm_end_ere = re.compile('</p>', re.DOTALL)
+            line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL)
+            htm_end = htm_end_ere.findall(raw)
+            line_end = line_end_ere.findall(raw)
+            tot_htm_ends = len(htm_end)
+            tot_ln_fds = len(line_end)
+            self.log("*** There are " + str(tot_ln_fds) + " total Line feeds, and " + str(tot_htm_ends) + " marked endings***")
+    
+            if percent > 1:
+                percent = 1
+            if percent < 0:
+                percent = 0    
+    
+            min_lns = tot_ln_fds * percent
+            self.log("There must be more than " + str(min_lns) + " unmarked lines to be true")
+            if min_lns > tot_htm_ends:
+                return True
+                
 		self.log("*********  Preprocessing HTML  *********")
-		# Detect Chapters to match the xpath in the GUI
-		chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE)
-		html = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', html)
-		# Unwrap lines using punctation if the median length of all lines is less than 150
+		# remove non-breaking spaces
+		html = re.sub(ur'\u00a0', ' ', html)
+		# Get rid of empty <o:p> tags to simplify other processing
+		html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
+		# Get rid of empty span tags
+        html = re.sub(r"\s*<span[^>]*>\s*</span>", " ", html)
+        
+        # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing
+		linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE)
+        blankreg = re.compile(r'\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>', re.IGNORECASE)
+        blanklines = blankreg.findall(html)
+        lines = linereg.findall(html)
+        if len(lines) > 1:
+            self.log("There are " + str(len(blanklines)) + " blank lines. " + str(float(len(blanklines)) / float(len(lines))) + " percent blank")
+            if float(len(blanklines)) / float(len(lines)) > 0.40:
+                self.log("deleting blank lines")
+                html = blankreg.sub('', html)
+		# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
+		html = re.sub(r"\s*</p>", "</p>\n", html)
+		
+		# some lit files don't have any <p> tags or equivalent, check and 
+		# mark up line endings if required before proceeding
+		if no_markup(html, 0.1):
+		     self.log("not enough paragraph markers, adding now")
+             add_markup = re.compile('(?<!>)(\n)')
+             html = add_markup.sub('</p>\n<p>', html)
+        
+		# detect chapters/sections to match xpath or splitting logic
 		#
-		# Insert extra line feeds so the line length regex functions properly
-		html = re.sub(r"</p>", "</p>\n", html)
+		# Mark split points based on embedded links
+		chaplink = re.compile(r'<a\sname[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<sectionlink>[^\s<]+(\s*[^\s<]+){0,4})?\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*</a>', re.IGNORECASE)
+        html = chaplink.sub(chapter_link, html)
+        # Continue with alternate patterns, start with most typical chapter headings
+		if self.html_preprocess_sections < 10:        
+            chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}.?(\d+\.?|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\s*){0,4}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE)
+            html = chapdetect.sub(chapter_head, html)
+		if self.html_preprocess_sections < 10:
+		    self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying a more aggressive pattern")
+            chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#]+\s*){1,9}|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE)
+		    html = chapdetect2.sub(chapter_head, html)
+		    
+        # search for places where a first or second level heading is immediately followed by another
+        # top level heading.  demote the second heading to h3 to prevent splitting between chapter
+        # headings and titles, images, etc
+        doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
+        html = doubleheading.sub('\g<firsthead>'+'<h3'+'\g<secondhead>'+'</h3>', html)
+		#    
+		# Unwrap lines using punctation if the median length of all lines is less than 150		
 		length = line_length('html', html, 0.4)
 		self.log("*** Median length is " + str(length) + " ***")
 		unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py
index 487e70c04f..b8dc7a9560 100644
--- a/src/calibre/ebooks/mobi/input.py
+++ b/src/calibre/ebooks/mobi/input.py
@@ -3,6 +3,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
+import re
 from calibre.customize.conversion import InputFormatPlugin
 
 class MOBIInput(InputFormatPlugin):
@@ -37,3 +38,12 @@ class MOBIInput(InputFormatPlugin):
                     include_meta_content_type=False))
                 accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]'
         return mr.created_opf_path
+
+    def preprocess_html(self, html):
+        # search for places where a first or second level heading is immediately followed by another
+        # top level heading.  demote the second heading to h3 to prevent splitting between chapter
+        # headings and titles, images, etc
+        doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
+        html = doubleheading.sub('\g<firsthead>'+'<h3'+'\g<secondhead>'+'</h3>', html)
+        return html
+
diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py
index 584d631d0b..36848ddb8b 100644
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@@ -408,6 +408,10 @@ class Page(object):
     # Fraction of text height that two strings' bottoms can differ by
     # for them to be considered to be part of the same text fragment
     LINE_FACTOR = 0.4
+    
+    # Percentage of the page heigth which should be considered header
+    # or footer to be discarded from reflow considerations
+    HEAD_FOOTER_MARGIN
 
     # Multiplies the average line height when determining row height
     # of a particular element to detect columns.

From c9cb61a40e015059716478255ad67aa30716ea6f Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Fri, 10 Sep 2010 13:46:01 -0700
Subject: [PATCH 03/43] GwR jacket work

---
 src/calibre/ebooks/oeb/transforms/jacket.py | 72 ++++++++++++++-------
 1 file changed, 49 insertions(+), 23 deletions(-)

diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py
index fec4d230c3..030067850c 100644
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@@ -6,14 +6,14 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import textwrap
+import os, textwrap
 from xml.sax.saxutils import escape
 from itertools import repeat
 
 from lxml import etree
 
+from calibre import guess_type, strftime
 from calibre.ebooks.oeb.base import XPath, XPNSMAP
-from calibre import guess_type
 from calibre.library.comments import comments_to_html
 class Jacket(object):
     '''
@@ -24,22 +24,18 @@ class Jacket(object):
     JACKET_TEMPLATE = textwrap.dedent(u'''\
     <html xmlns="%(xmlns)s">
         <head>
-            <title>%(title)s</title>
+            <title>%(title_str)s</title>
             <meta name="calibre-content" content="jacket"/>
+            <style type="text/css" media="screen">%(css)s</style>
         </head>
         <body>
-            <div class="calibre_rescale_100">
-                <div style="text-align:center">
-                    <h1 class="calibre_rescale_180">%(title)s</h1>
-                    <h2 class="calibre_rescale_140">%(jacket)s</h2>
-                    <div class="calibre_rescale_100">%(series)s</div>
-                    <div class="calibre_rescale_100">%(rating)s</div>
-                    <div class="calibre_rescale_100">%(tags)s</div>
-                </div>
-                <div style="margin-top:2em" class="calibre_rescale_100">
-                    %(comments)s
-                </div>
+            <div class="banner">
+                <div class="meta_div">%(title)s</div>
+                <div class="meta_div">%(series)s</div>
+                <div class="meta_div">%(rating)s</div>
+                <div class="meta_div">%(tags)s</div>
             </div>
+            <div class="comments">%(comments)s</div>
         </body>
     </html>
     ''')
@@ -71,11 +67,18 @@ class Jacket(object):
             return ans
         id, href = self.oeb.manifest.generate('star', 'star.png')
         self.oeb.manifest.add(id, href, 'image/png', data=I('star.png', data=True))
-        ans = 'Rating: ' + ''.join(repeat('<img style="vertical-align:text-top" alt="star" src="%s" />'%href, num))
+        ans = '<span class="rating">Rating: </span> ' + ''.join(repeat('<img style="vertical-align:text-top" alt="star" src="%s" />'%href, num))
         return ans
 
     def insert_metadata(self, mi):
         self.log('Inserting metadata into book...')
+        jacket_resources = P("jacket")
+
+        if os.path.isdir(jacket_resources):
+            stylesheet = os.path.join(jacket_resources, 'stylesheet.css')
+            with open(stylesheet) as f:
+                css_data = f.read()
+
         comments = mi.comments
         if not comments:
             try:
@@ -87,11 +90,13 @@ class Jacket(object):
         orig_comments = comments
         if comments:
             comments = comments_to_html(comments)
-        series = '<b>Series: </b>' + escape(mi.series if mi.series else '')
+
+        series = '<span class="meta_label">Series: </span><span class="series">%s</span>' % escape(mi.series if mi.series else '')
         if mi.series and mi.series_index is not None:
-            series += escape(' [%s]'%mi.format_series_index())
+            series += '<span class="series">%s</span>' % escape(' [%s]'%mi.format_series_index())
         if not mi.series:
             series = ''
+
         tags = mi.tags
         if not tags:
             try:
@@ -99,23 +104,30 @@ class Jacket(object):
             except:
                 tags = []
         if tags:
-            tags = '<b>Tags: </b>' + self.opts.dest.tags_to_string(tags)
+            tags = '<span class="meta_label">Tags:</span><span class="tags">%s</span>' % self.opts.dest.tags_to_string(tags)
         else:
             tags = ''
+
         try:
-            title = mi.title if mi.title else unicode(self.oeb.metadata.title[0])
+            title_str = mi.title if mi.title else unicode(self.oeb.metadata.title[0])
         except:
-            title = _('Unknown')
+            title_str = _('Unknown')
+        title = '<span class="title">%s</span><span class="pubdate"> (%s)</span>' % (escape(title_str), strftime(u'%Y', mi.pubdate.timetuple()))
+
 
         def generate_html(comments):
             return self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'],
-                title=escape(title), comments=comments,
-                jacket=escape(_('Book Jacket')), series=series,
-                tags=tags, rating=self.get_rating(mi.rating))
+                title=title, comments=comments,
+                series=series,
+                tags=tags, rating=self.get_rating(mi.rating),
+                css=css_data, title_str=title_str)
+
         id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml')
         from calibre.ebooks.oeb.base import RECOVER_PARSER, XPath
         try:
             root = etree.fromstring(generate_html(comments), parser=RECOVER_PARSER)
+#             print "root: %s" % etree.tostring(root, encoding='utf-8',
+#                     xml_declaration=True, pretty_print=True)
         except:
             root = etree.fromstring(generate_html(escape(orig_comments)),
                     parser=RECOVER_PARSER)
@@ -137,8 +149,22 @@ class Jacket(object):
 
 
     def __call__(self, oeb, opts, metadata):
+        '''
+        Add metadata in jacket.xhtml if specifed in opts
+        If not specified, remove previous jacket instance
+        '''
         self.oeb, self.opts, self.log = oeb, opts, oeb.log
         if opts.remove_first_image:
             self.remove_first_image()
         if opts.insert_metadata:
             self.insert_metadata(metadata)
+        else:
+            jacket = XPath('//h:meta[@name="calibre-content" and @content="jacket"]')
+            for item in list(self.oeb.spine)[:4]:
+                if jacket(item.data):
+                    try:
+                        self.log.info("Removing previous jacket instance")
+                        self.oeb.manifest.remove(item)
+                        break
+                    except:
+                        continue

From 4c7373026b9ee8a618dccf8602740d6a7d578aa2 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Sat, 11 Sep 2010 12:10:49 +1000
Subject: [PATCH 04/43] preprocessing changes for lit & pdf, added utils.py,
 changed default unwrap_factor

---
 src/calibre/ebooks/conversion/preprocess.py | 15 ++++++++---
 src/calibre/ebooks/conversion/utils.py      |  6 +++++
 src/calibre/ebooks/lit/input.py             | 29 +++++++++++++--------
 src/calibre/ebooks/pdf/input.py             |  4 +--
 4 files changed, 37 insertions(+), 17 deletions(-)
 create mode 100644 src/calibre/ebooks/conversion/utils.py

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 2954fd7c26..452a322d95 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -77,6 +77,7 @@ def line_length(format, raw, percent):
     elif format == 'pdf':
         linere = re.compile('(?<=<br>).*?(?=<br>)', re.DOTALL)
     lines = linere.findall(raw)
+    print "percent is " + str(percent)
 
     lengths = []
     for line in lines:
@@ -165,6 +166,11 @@ class HTMLPreProcessor(object):
                   (re.compile(u'`\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ì'),
                   (re.compile(u'`\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'à'),
                   (re.compile(u'`\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'À'),
+                  
+                  #(re.compile(u'a\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'à'),
+                  #(re.compile(u'A\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'À'),
+                  #(re.compile(u'o\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ò'),
+                  #(re.compile(u'O\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ò'),
 
                   (re.compile(u'´\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ó'),
                   (re.compile(u'´\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ó'),
@@ -206,13 +212,13 @@ class HTMLPreProcessor(object):
                   # (re.compile(r'<br>\s*<br>', re.IGNORECASE), lambda match: '\n<p>'),
 
                   # unwrap hyphenation - don't delete the hyphen (often doesn't split words)
-                  (re.compile(r'(?<=[-–])\s*<br>\s*(?=[[a-z\d])'), lambda match: ''),
+                  (re.compile(u'(?<=[-–—])\s*<br>\s*(?=[[a-z\d])'), lambda match: ''),
 
                   # Remove gray background
                   (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
 
                   # Detect Chapters to match default XPATH in GUI
-                  (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+(\s\w+)?)?\s*(</(i|b)>(</(i|b)>)?)?)\s*(</?(br|p)[^>]*>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head),
+                  (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</(i|b)>(</(i|b)>)?)?)\s*(</?(br|p)[^>]*>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head),
 
                   # Have paragraphs show better
                   (re.compile(r'<br.*?>'), lambda match : '<p>'),
@@ -303,15 +309,16 @@ class HTMLPreProcessor(object):
         if getattr(self.extra_opts, 'preprocess_html', None):
             if is_pdftohtml:
                 end_rules.append(
-                    (re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head),
+                    (re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?|\d+\.?\s*([\d\w-]+\s*){0,4}\s*)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head),
                 )
 
         if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
             length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor'))
             if length:
+                print "The pdf line length returned is " + str(length)
                 end_rules.append(
                     # Un wrap using punctuation
-                    (re.compile(r'(?<=.{%i}[a-z\.,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
+                    (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
                 )
 
         for rule in self.PREPROCESS + start_rules:
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
new file mode 100644
index 0000000000..52be473372
--- /dev/null
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
\ No newline at end of file
diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/lit/input.py
index f7bb0fbfd9..35dad501be 100644
--- a/src/calibre/ebooks/lit/input.py
+++ b/src/calibre/ebooks/lit/input.py
@@ -102,7 +102,7 @@ class LITInput(InputFormatPlugin):
                 percent = 0    
     
             min_lns = tot_ln_fds * percent
-            self.log("There must be more than " + str(min_lns) + " unmarked lines to be true")
+            self.log("There must be more than " + str(min_lns) + " unmarked lines to return true")
             if min_lns > tot_htm_ends:
                 return True
                 
@@ -141,24 +141,31 @@ class LITInput(InputFormatPlugin):
         html = chaplink.sub(chapter_link, html)
         # Continue with alternate patterns, start with most typical chapter headings
 		if self.html_preprocess_sections < 10:        
-            chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}.?(\d+\.?|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\s*){0,4}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE)
+            chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}s*(<span[^>]*>)?\s*.?(\d+\.?|Introduction|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</(i|b|u)>){0,2})\s*(</span>)?s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE)
             html = chapdetect.sub(chapter_head, html)
 		if self.html_preprocess_sections < 10:
 		    self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying a more aggressive pattern")
             chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#]+\s*){1,9}|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE)
-		    html = chapdetect2.sub(chapter_head, html)
-		    
+		    html = chapdetect2.sub(chapter_head, html)    
+        #    
+		# Unwrap lines using punctation if the median length of all lines is less than 150		
+		length = line_length('html', html, 0.4)
+		self.log("*** Median line length is " + str(length) + " ***")
+		unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
+		if length < 150:
+		    self.log("Unwrapping Lines")
+			html = unwrap.sub(' ', html)		
+		# If still no sections after unwrapping lines break on lines with no punctuation
+		if self.html_preprocess_sections < 10:
+		    self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", splitting based on punctuation")
+		    #self.log(html)
+            chapdetect3 = re.compile(r'(<p[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?([a-z]+\s*){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</p>)(?P<title>)?', re.IGNORECASE)
+            html = chapdetect3.sub(chapter_head, html)    	
         # search for places where a first or second level heading is immediately followed by another
         # top level heading.  demote the second heading to h3 to prevent splitting between chapter
         # headings and titles, images, etc
         doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
         html = doubleheading.sub('\g<firsthead>'+'<h3'+'\g<secondhead>'+'</h3>', html)
-		#    
-		# Unwrap lines using punctation if the median length of all lines is less than 150		
-		length = line_length('html', html, 0.4)
-		self.log("*** Median length is " + str(length) + " ***")
-		unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
-		if length < 150:
-			html = unwrap.sub(' ', html)
+
         return html
 
diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py
index 64a089281e..113c3d99d8 100644
--- a/src/calibre/ebooks/pdf/input.py
+++ b/src/calibre/ebooks/pdf/input.py
@@ -22,10 +22,10 @@ class PDFInput(InputFormatPlugin):
     options = set([
         OptionRecommendation(name='no_images', recommended_value=False,
             help=_('Do not extract images from the document')),
-        OptionRecommendation(name='unwrap_factor', recommended_value=0.5,
+        OptionRecommendation(name='unwrap_factor', recommended_value=0.45,
             help=_('Scale used to determine the length at which a line should '
             'be unwrapped. Valid values are a decimal between 0 and 1. The '
-            'default is 0.5, this is the median line length.')),
+            'default is 0.45, this is the median line length.')),
         OptionRecommendation(name='new_pdf_engine', recommended_value=False,
             help=_('Use the new PDF conversion engine.'))
     ])

From faf15b2f3d611594352721d4d06407025fea1320 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Sat, 11 Sep 2010 13:09:23 +1000
Subject: [PATCH 05/43] preprocess merge gone wrong, fixing

---
 src/calibre/ebooks/conversion/preprocess.py | 25 ++++++---------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index e2364d961f..24a389e65c 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -168,7 +168,6 @@ class HTMLPreProcessor(object):
                   (re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'),
                   (re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'),
                   (re.compile(u'`\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ù'),
-<<<<<<< TREE
                   (re.compile(u'`\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'è'),
                   (re.compile(u'`\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'È'),
                   (re.compile(u'`\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'ì'),
@@ -176,13 +175,6 @@ class HTMLPreProcessor(object):
                   (re.compile(u'`\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'à'),
                   (re.compile(u'`\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'À'),
                   
-                  #(re.compile(u'a\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'à'),
-                  #(re.compile(u'A\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'À'),
-                  #(re.compile(u'o\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ò'),
-                  #(re.compile(u'O\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ò'),
-=======
->>>>>>> MERGE-SOURCE
-
                   # ´
                   (re.compile(u'´\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'á'),
                   (re.compile(u'´\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Á'),
@@ -218,14 +210,7 @@ class HTMLPreProcessor(object):
                   # ¸
                   (re.compile(u'¸\s*(<br.*?>)*\s*c', re.UNICODE), lambda match: u'ç'),
                   (re.compile(u'¸\s*(<br.*?>)*\s*C', re.UNICODE), lambda match: u'Ç'),
-
-<<<<<<< TREE
-                  # If pdf printed from a browser then the header/footer has a reliable pattern
-                  (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
-
-                  # Center separator lines
-                  (re.compile(u'<br>\s*(?P<break>([*#•]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
-=======
+                  
                   # ˛
                   (re.compile(u'˛\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ą'),
                   (re.compile(u'˛\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ą'),
@@ -235,8 +220,12 @@ class HTMLPreProcessor(object):
                   # ˙
                   (re.compile(u'˙\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ż'),
                   (re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'),
-                  
->>>>>>> MERGE-SOURCE
+
+                  # If pdf printed from a browser then the header/footer has a reliable pattern
+                  (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
+
+                  # Center separator lines
+                  (re.compile(u'<br>\s*(?P<break>([*#•]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
 
                   # Remove page links
                   (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),

From 2a906184ad4c56d3018806c03bf2647bd8ecc242 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Sat, 11 Sep 2010 13:17:21 +1000
Subject: [PATCH 06/43] preprocess merge gone wrong, merged original accent
 code back

---
 src/calibre/ebooks/conversion/preprocess.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 24a389e65c..f2b19efa9b 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -168,13 +168,7 @@ class HTMLPreProcessor(object):
                   (re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'),
                   (re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'),
                   (re.compile(u'`\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ù'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'è'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'È'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'ì'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ì'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'à'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'À'),
-                  
+
                   # ´
                   (re.compile(u'´\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'á'),
                   (re.compile(u'´\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Á'),
@@ -210,7 +204,7 @@ class HTMLPreProcessor(object):
                   # ¸
                   (re.compile(u'¸\s*(<br.*?>)*\s*c', re.UNICODE), lambda match: u'ç'),
                   (re.compile(u'¸\s*(<br.*?>)*\s*C', re.UNICODE), lambda match: u'Ç'),
-                  
+
                   # ˛
                   (re.compile(u'˛\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ą'),
                   (re.compile(u'˛\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ą'),
@@ -221,6 +215,7 @@ class HTMLPreProcessor(object):
                   (re.compile(u'˙\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ż'),
                   (re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'),
 
+
                   # If pdf printed from a browser then the header/footer has a reliable pattern
                   (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
 

From 480eccb0b0c3921fd356d329e6d601b9207c2d26 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Sat, 11 Sep 2010 15:33:10 +1000
Subject: [PATCH 07/43] Fixed unwrapping for various hyphen and dash types,
 other minor tweaks to pdf

---
 src/calibre/ebooks/conversion/preprocess.py | 28 ++++++++++++++-------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index f2b19efa9b..c120f0a560 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -77,7 +77,6 @@ def line_length(format, raw, percent):
     elif format == 'pdf':
         linere = re.compile('(?<=<br>).*?(?=<br>)', re.DOTALL)
     lines = linere.findall(raw)
-    print "percent is " + str(percent)
 
     lengths = []
     for line in lines:
@@ -230,14 +229,17 @@ class HTMLPreProcessor(object):
                   # (re.compile(r'<br>\s*<br>', re.IGNORECASE), lambda match: '\n<p>'),
 
                   # unwrap hyphenation - don't delete the hyphen (often doesn't split words)
-                  (re.compile(u'(?<=[-–—])\s*<br>\s*(?=[[a-z\d])'), lambda match: ''),
+                  #(re.compile(u'(?<=[-–—])\s*<br>\s*(?=[[a-z\d])'), lambda match: ''),
+                  # unwrap/delete soft hyphens
+                  #(re.compile(u'[­]\s*<br>\s*(?=[[a-z\d])'), lambda match: ''),
 
                   # Remove gray background
                   (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
 
                   # Detect Chapters to match default XPATH in GUI
                   (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</(i|b)>(</(i|b)>)?)?)\s*(</?(br|p)[^>]*>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head),
-
+                  (re.compile(r'<br\s*/?>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?'), chap_head),
+                  
                   # Have paragraphs show better
                   (re.compile(r'<br.*?>'), lambda match : '<p>'),
                   # Clean up spaces
@@ -322,21 +324,29 @@ class HTMLPreProcessor(object):
                 import traceback
                 print 'Failed to parse remove_footer regexp'
                 traceback.print_exc()
+      
+        # unwrap hyphenation - moved here so it's executed after header/footer removal
+        if is_pdftohtml:
+            # unwrap visible dashes and hyphens - don't delete as 50% or more of the time these
+            # hyphens are for compound words, formatting, etc
+            end_rules.append((re.compile(u'(?<=[-–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: ''))
+            # unwrap/delete soft hyphens
+            end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: ''))
+            # unwrap/delete soft hyphens with formatting
+            end_rules.append((re.compile(u'[­]\s*(</(i|u|b)>)+(\s*<p>)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: ''))
         
         # Make the more aggressive chapter marking regex optional with the preprocess option to reduce false positives
         if getattr(self.extra_opts, 'preprocess_html', None):
             if is_pdftohtml:
-                end_rules.append(
-                    (re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?|\d+\.?\s*([\d\w-]+\s*){0,4}\s*)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head),
-                )
-
+                end_rules.append((re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?|\d+\.?\s*([\d\w-]+\s*){0,4}\s*)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head))
+                
         if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
             length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor'))
             if length:
-                print "The pdf line length returned is " + str(length)
+                # print "The pdf line length returned is " + str(length)
                 end_rules.append(
                     # Un wrap using punctuation
-                    (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
+                    (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
                 )
 
         for rule in self.PREPROCESS + start_rules:

From cf7cc4de4d9b9fa5e4b22c5ce2cb63c099165589 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Sat, 11 Sep 2010 21:02:44 +1000
Subject: [PATCH 08/43] preprocess updates for lit, html, and pdf

---
 src/calibre/ebooks/conversion/preprocess.py |   8 --
 src/calibre/ebooks/conversion/utils.py      | 122 +++++++++++++++++++-
 src/calibre/ebooks/html/input.py            |  20 +---
 src/calibre/ebooks/lit/input.py             | 117 +------------------
 src/calibre/ebooks/pdb/pdf/reader.py        |   2 +-
 src/calibre/ebooks/pdf/input.py             |   2 +-
 6 files changed, 129 insertions(+), 142 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index c120f0a560..6123577191 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -214,7 +214,6 @@ class HTMLPreProcessor(object):
                   (re.compile(u'˙\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ż'),
                   (re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'),
 
-
                   # If pdf printed from a browser then the header/footer has a reliable pattern
                   (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
 
@@ -225,13 +224,6 @@ class HTMLPreProcessor(object):
                   (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
                   # Remove <hr> tags
                   (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
-                  # Replace <br><br> with <p>
-                  # (re.compile(r'<br>\s*<br>', re.IGNORECASE), lambda match: '\n<p>'),
-
-                  # unwrap hyphenation - don't delete the hyphen (often doesn't split words)
-                  #(re.compile(u'(?<=[-–—])\s*<br>\s*(?=[[a-z\d])'), lambda match: ''),
-                  # unwrap/delete soft hyphens
-                  #(re.compile(u'[­]\s*<br>\s*(?=[[a-z\d])'), lambda match: ''),
 
                   # Remove gray background
                   (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 52be473372..68cebb3a11 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -3,4 +3,124 @@
 
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
\ No newline at end of file
+__docformat__ = 'restructuredtext en'
+
+import re
+from calibre.ebooks.conversion.preprocess import line_length
+from calibre.utils.logging import default_log
+from lxml import etree
+
+class PreProcessor(object):
+    html_preprocess_sections = 0
+
+    def __init__(self, args):
+        self.args = args
+        self.log = default_log
+   
+    def chapter_head(self, match):
+        chap = match.group('chap')
+        title = match.group('title')
+        if not title:
+                   self.html_preprocess_sections = self.html_preprocess_sections + 1
+                   self.log("marked " + str(self.html_preprocess_sections) + " chapters. - " + str(chap))
+                   return '<h2>'+chap+'</h2>\n'
+        else:
+                   self.html_preprocess_sections = self.html_preprocess_sections + 1
+                   self.log("marked " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title))
+                   return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n'
+
+    def chapter_link(self, match):
+        chap = match.group('sectionlink')
+        if not chap:
+                   self.html_preprocess_sections = self.html_preprocess_sections + 1
+                   self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links")
+                   return '<br style="page-break-before:always">'
+        else:
+                   self.html_preprocess_sections = self.html_preprocess_sections + 1
+                   self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links. - " + str(chap))
+                   return '<br clear="all" style="page-break-before:always">\n<h2>'+chap+'</h2>'
+
+    def no_markup(self, raw, percent):
+        '''
+        Detects total marked up line endings in the file. raw is the text to 
+        inspect.  Percent is the minimum percent of line endings which should 
+        be marked up to return true.
+        '''
+        htm_end_ere = re.compile('</p>', re.DOTALL)
+        line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL)
+        htm_end = htm_end_ere.findall(raw)
+        line_end = line_end_ere.findall(raw)
+        tot_htm_ends = len(htm_end)
+        tot_ln_fds = len(line_end)
+        self.log("*** There are " + str(tot_ln_fds) + " total Line feeds, and " + str(tot_htm_ends) + " marked endings***")
+
+        if percent > 1:
+            percent = 1
+        if percent < 0:
+            percent = 0    
+    
+        min_lns = tot_ln_fds * percent
+        self.log("There must be fewer than " + str(min_lns) + " unmarked lines to return true")
+        if min_lns > tot_htm_ends:
+            return True
+            
+    def __call__(self, html):
+        self.log("*********  Preprocessing HTML  *********")
+        # remove non-breaking spaces
+        html = re.sub(ur'\u00a0', ' ', html)
+        # Get rid of empty <o:p> tags to simplify other processing
+        html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
+        # Get rid of empty span tags
+        html = re.sub(r"\s*<span[^>]*>\s*</span>", " ", html)
+        
+        # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing
+        linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE)
+        blankreg = re.compile(r'\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>', re.IGNORECASE)
+        blanklines = blankreg.findall(html)
+        lines = linereg.findall(html)
+        if len(lines) > 1:
+            self.log("There are " + str(len(blanklines)) + " blank lines. " + str(float(len(blanklines)) / float(len(lines))) + " percent blank")
+            if float(len(blanklines)) / float(len(lines)) > 0.40:
+                self.log("deleting blank lines")
+                html = blankreg.sub('', html)
+        # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
+        html = re.sub(r"\s*</p>", "</p>\n", html)
+        html = re.sub(r"\s*<p>\s*", "\n<p>", html)
+        
+        # some lit files don't have any <p> tags or equivalent, check and 
+        # mark up line endings if required before proceeding
+        if self.no_markup(html, 0.1):
+             self.log("not enough paragraph markers, adding now")
+             add_markup = re.compile('(?<!>)(\n)')
+             html = add_markup.sub('</p>\n<p>', html)
+        
+        # detect chapters/sections to match xpath or splitting logic
+        # 
+        # Start with most typical chapter headings       
+        chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}s*(<span[^>]*>)?\s*.?(Introduction|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</(i|b|u)>){0,2})\s*(</span>)?s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE)
+        html = chapdetect.sub(self.chapter_head, html)
+        if self.html_preprocess_sections < 10:
+            self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying a more aggressive pattern")
+            chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#-]+\s*){1,9}|\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE)
+            html = chapdetect2.sub(self.chapter_head, html)    
+        #    
+        # Unwrap lines using punctation if the median length of all lines is less than 200        
+        length = line_length('html', html, 0.4)
+        self.log("*** Median line length is " + str(length) + " ***")
+        unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
+        if length < 200:
+            self.log("Unwrapping Lines")
+            html = unwrap.sub(' ', html)        
+        # If still no sections after unwrapping lines break on lines with no punctuation
+        if self.html_preprocess_sections < 10:
+            self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", splitting based on punctuation")
+            #self.log(html)
+            chapdetect3 = re.compile(r'(<p[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?([a-z]+\s*){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</p>)(?P<title>)?', re.IGNORECASE)
+            html = chapdetect3.sub(self.chapter_head, html)        
+        # search for places where a first or second level heading is immediately followed by another
+        # top level heading.  demote the second heading to h3 to prevent splitting between chapter
+        # headings and titles, images, etc
+        doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
+        html = doubleheading.sub('\g<firsthead>'+'<h3'+'\g<secondhead>'+'</h3>', html)
+        
+        return html
\ No newline at end of file
diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py
index 35a8a1a9bc..e83216ae1f 100644
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@@ -24,7 +24,7 @@ from calibre.constants import islinux, isfreebsd, iswindows
 from calibre import unicode_path
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename
-from calibre.ebooks.conversion.preprocess import line_length
+from calibre.ebooks.conversion.utils import PreProcessor
 
 class Link(object):
     '''
@@ -491,20 +491,6 @@ class HTMLInput(InputFormatPlugin):
         return (None, raw)
 
 	def preprocess_html(self, html):
-        if not hasattr(self, 'log'):
-            from calibre.utils.logging import default_log
-            self.log = default_log
-		self.log("*********  Preprocessing HTML - HTML Input plugin *********")
-		# Detect Chapters to match the xpath in the GUI
-		chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE)
-		html = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', html)
-		# Unwrap lines using punctation if the median length of all lines is less than 150
-		#
-		# Insert extra line feeds so the line length regex functions properly
-		html = re.sub(r"</p>", "</p>\n", html)
-		length = line_length('html', html, 0.4)
-		self.log.debug("*** Median length is " + str(length) + " ***")
-		unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
-		if length < 150:
-			html = unwrap.sub(' ', html)
+        preprocessor = PreProcessor(html)
+        html = preprocessor(html)
         return html
diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/lit/input.py
index 35dad501be..58e7bc84bf 100644
--- a/src/calibre/ebooks/lit/input.py
+++ b/src/calibre/ebooks/lit/input.py
@@ -6,10 +6,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import re
-
 from calibre.customize.conversion import InputFormatPlugin
-from calibre.ebooks.conversion.preprocess import line_length
+from calibre.ebooks.conversion.utils import PreProcessor
 
 
 class LITInput(InputFormatPlugin):
@@ -18,7 +16,6 @@ class LITInput(InputFormatPlugin):
     author      = 'Marshall T. Vandegrift'
     description = 'Convert LIT files to HTML'
     file_types  = set(['lit'])
-    html_preprocess_sections = 0
 
     def convert(self, stream, options, file_ext, log,
                 accelerators):
@@ -57,115 +54,7 @@ class LITInput(InputFormatPlugin):
 
 
 	def preprocess_html(self, html):
-
-        def chapter_head(match):
-            chap = match.group('chap')
-            title = match.group('title')
-            if not title:
-                       self.html_preprocess_sections = self.html_preprocess_sections + 1
-                       self.log("marked " + str(self.html_preprocess_sections) + " chapters. - " + str(chap))
-                       return '<h2>'+chap+'</h2>\n'
-            else:
-                       self.html_preprocess_sections = self.html_preprocess_sections + 1
-                       self.log("marked " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title))
-                       return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n'
-
-        def chapter_link(match):
-            chap = match.group('sectionlink')
-            if not chap:
-                       self.html_preprocess_sections = self.html_preprocess_sections + 1
-                       self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links")
-                       return '<br style="page-break-before:always">'
-            else:
-                       self.html_preprocess_sections = self.html_preprocess_sections + 1
-                       self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links. - " + str(chap))
-                       return '<br clear="all" style="page-break-before:always">\n<h2>'+chap+'</h2>'
-
-
-        def no_markup(raw, percent):
-            '''
-            Detects total marked up line endings in the file. raw is the text to 
-            inspect.  Percent is the minimum percent of line endings which should 
-            be marked up to return true.
-            '''
-            htm_end_ere = re.compile('</p>', re.DOTALL)
-            line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL)
-            htm_end = htm_end_ere.findall(raw)
-            line_end = line_end_ere.findall(raw)
-            tot_htm_ends = len(htm_end)
-            tot_ln_fds = len(line_end)
-            self.log("*** There are " + str(tot_ln_fds) + " total Line feeds, and " + str(tot_htm_ends) + " marked endings***")
-    
-            if percent > 1:
-                percent = 1
-            if percent < 0:
-                percent = 0    
-    
-            min_lns = tot_ln_fds * percent
-            self.log("There must be more than " + str(min_lns) + " unmarked lines to return true")
-            if min_lns > tot_htm_ends:
-                return True
-                
-		self.log("*********  Preprocessing HTML  *********")
-		# remove non-breaking spaces
-		html = re.sub(ur'\u00a0', ' ', html)
-		# Get rid of empty <o:p> tags to simplify other processing
-		html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
-		# Get rid of empty span tags
-        html = re.sub(r"\s*<span[^>]*>\s*</span>", " ", html)
-        
-        # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing
-		linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE)
-        blankreg = re.compile(r'\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>', re.IGNORECASE)
-        blanklines = blankreg.findall(html)
-        lines = linereg.findall(html)
-        if len(lines) > 1:
-            self.log("There are " + str(len(blanklines)) + " blank lines. " + str(float(len(blanklines)) / float(len(lines))) + " percent blank")
-            if float(len(blanklines)) / float(len(lines)) > 0.40:
-                self.log("deleting blank lines")
-                html = blankreg.sub('', html)
-		# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
-		html = re.sub(r"\s*</p>", "</p>\n", html)
-		
-		# some lit files don't have any <p> tags or equivalent, check and 
-		# mark up line endings if required before proceeding
-		if no_markup(html, 0.1):
-		     self.log("not enough paragraph markers, adding now")
-             add_markup = re.compile('(?<!>)(\n)')
-             html = add_markup.sub('</p>\n<p>', html)
-        
-		# detect chapters/sections to match xpath or splitting logic
-		#
-		# Mark split points based on embedded links
-		chaplink = re.compile(r'<a\sname[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<sectionlink>[^\s<]+(\s*[^\s<]+){0,4})?\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*</a>', re.IGNORECASE)
-        html = chaplink.sub(chapter_link, html)
-        # Continue with alternate patterns, start with most typical chapter headings
-		if self.html_preprocess_sections < 10:        
-            chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}s*(<span[^>]*>)?\s*.?(\d+\.?|Introduction|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</(i|b|u)>){0,2})\s*(</span>)?s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE)
-            html = chapdetect.sub(chapter_head, html)
-		if self.html_preprocess_sections < 10:
-		    self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying a more aggressive pattern")
-            chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#]+\s*){1,9}|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE)
-		    html = chapdetect2.sub(chapter_head, html)    
-        #    
-		# Unwrap lines using punctation if the median length of all lines is less than 150		
-		length = line_length('html', html, 0.4)
-		self.log("*** Median line length is " + str(length) + " ***")
-		unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
-		if length < 150:
-		    self.log("Unwrapping Lines")
-			html = unwrap.sub(' ', html)		
-		# If still no sections after unwrapping lines break on lines with no punctuation
-		if self.html_preprocess_sections < 10:
-		    self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", splitting based on punctuation")
-		    #self.log(html)
-            chapdetect3 = re.compile(r'(<p[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?([a-z]+\s*){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</p>)(?P<title>)?', re.IGNORECASE)
-            html = chapdetect3.sub(chapter_head, html)    	
-        # search for places where a first or second level heading is immediately followed by another
-        # top level heading.  demote the second heading to h3 to prevent splitting between chapter
-        # headings and titles, images, etc
-        doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
-        html = doubleheading.sub('\g<firsthead>'+'<h3'+'\g<secondhead>'+'</h3>', html)
-
+        preprocessor = PreProcessor(html)
+        html = preprocessor(html)
         return html
 
diff --git a/src/calibre/ebooks/pdb/pdf/reader.py b/src/calibre/ebooks/pdb/pdf/reader.py
index 3ae9f8ccca..c151551866 100644
--- a/src/calibre/ebooks/pdb/pdf/reader.py
+++ b/src/calibre/ebooks/pdb/pdf/reader.py
@@ -21,7 +21,7 @@ class Reader(FormatReader):
         self.options = options
         setattr(self.options, 'new_pdf_engine', False)
         setattr(self.options, 'no_images', False)
-        setattr(self.options, 'unwrap_factor', 0.5)
+        setattr(self.options, 'unwrap_factor', 0.45)
 
     def extract_content(self, output_dir):
         self.log.info('Extracting PDF...')
diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py
index 113c3d99d8..14b3552b04 100644
--- a/src/calibre/ebooks/pdf/input.py
+++ b/src/calibre/ebooks/pdf/input.py
@@ -25,7 +25,7 @@ class PDFInput(InputFormatPlugin):
         OptionRecommendation(name='unwrap_factor', recommended_value=0.45,
             help=_('Scale used to determine the length at which a line should '
             'be unwrapped. Valid values are a decimal between 0 and 1. The '
-            'default is 0.45, this is the median line length.')),
+            'default is 0.45, just below the median line length.')),
         OptionRecommendation(name='new_pdf_engine', recommended_value=False,
             help=_('Use the new PDF conversion engine.'))
     ])

From f6de0bef13d7d1001b951d465cff3135aad616ed Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Sat, 11 Sep 2010 22:15:09 +1000
Subject: [PATCH 09/43] replaced messed up rtf file

---
 src/calibre/ebooks/rtf/preprocess.py | 624 +++++++++++++--------------
 1 file changed, 289 insertions(+), 335 deletions(-)

diff --git a/src/calibre/ebooks/rtf/preprocess.py b/src/calibre/ebooks/rtf/preprocess.py
index ee45da697f..a3076651fd 100644
--- a/src/calibre/ebooks/rtf/preprocess.py
+++ b/src/calibre/ebooks/rtf/preprocess.py
@@ -1,390 +1,344 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
 
 __license__   = 'GPL v3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__copyright__ = '2010, Gerendi Sandor Attila'
 __docformat__ = 'restructuredtext en'
 
-import functools, re
+"""
+RTF tokenizer and token parser. v.1.0 (1/17/2010)
+Author: Gerendi Sandor Attila
 
-from calibre import entity_to_unicode
+At this point this will tokenize a RTF file then rebuild it from the tokens.
+In the process the UTF8 tokens are altered to be supported by the RTF2XML and also remain RTF specification compilant.
+"""
 
-XMLDECL_RE    = re.compile(r'^\s*<[?]xml.*?[?]>')
-SVG_NS       = 'http://www.w3.org/2000/svg'
-XLINK_NS     = 'http://www.w3.org/1999/xlink'
+class tokenDelimitatorStart():
+    def __init__(self):
+        pass
+    def toRTF(self):
+        return b'{'
+    def __repr__(self):
+        return '{'
 
-convert_entities = functools.partial(entity_to_unicode,
-        result_exceptions = {
-            u'<' : '&lt;',
-            u'>' : '&gt;',
-            u"'" : '&apos;',
-            u'"' : '&quot;',
-            u'&' : '&amp;',
-        })
-_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
+class tokenDelimitatorEnd():
+    def __init__(self):
+        pass
+    def toRTF(self):
+        return b'}'
+    def __repr__(self):
+        return '}'
 
-LIGATURES = {
-#        u'\u00c6': u'AE',
-#        u'\u00e6': u'ae',
-#        u'\u0152': u'OE',
-#        u'\u0153': u'oe',
-#        u'\u0132': u'IJ',
-#        u'\u0133': u'ij',
-#        u'\u1D6B': u'ue',
-        u'\uFB00': u'ff',
-        u'\uFB01': u'fi',
-        u'\uFB02': u'fl',
-        u'\uFB03': u'ffi',
-        u'\uFB04': u'ffl',
-        u'\uFB05': u'ft',
-        u'\uFB06': u'st',
-        }
+class tokenControlWord():
+    def __init__(self, name, separator = ''):
+        self.name = name
+        self.separator = separator
+    def toRTF(self):
+        return self.name + self.separator
+    def __repr__(self):
+        return self.name + self.separator
 
-_ligpat = re.compile(u'|'.join(LIGATURES))
+class tokenControlWordWithNumericArgument():
+    def __init__(self, name, argument, separator = ''):
+        self.name = name
+        self.argument = argument
+        self.separator = separator
+    def toRTF(self):
+        return self.name + repr(self.argument) + self.separator
+    def __repr__(self):
+        return self.name + repr(self.argument) + self.separator
 
-def sanitize_head(match):
-    x = match.group(1)
-    x = _span_pat.sub('', x)
-    return '<head>\n%s\n</head>' % x
+class tokenControlSymbol():
+    def __init__(self, name):
+        self.name = name
+    def toRTF(self):
+        return self.name
+    def __repr__(self):
+        return self.name
 
-def chap_head(match):
-    chap = match.group('chap')
-    title = match.group('title')
-    if not title:
-               return '<h1>'+chap+'</h1><br/>\n'
-    else:
-               return '<h1>'+chap+'</h1>\n<h3>'+title+'</h3>\n'
+class tokenData():
+    def __init__(self, data):
+        self.data = data
+    def toRTF(self):
+        return self.data
+    def __repr__(self):
+        return self.data
 
-def wrap_lines(match):
-    ital = match.group('ital')
-    if not ital:
-               return ' '
-    else:
-               return ital+' '
+class tokenBinN():
+    def __init__(self, data, separator = ''):
+        self.data = data
+        self.separator = separator
+    def toRTF(self):
+        return "\\bin" + repr(len(self.data)) + self.separator + self.data
+    def __repr__(self):
+        return "\\bin" + repr(len(self.data)) + self.separator + self.data
+
+class token8bitChar():
+    def __init__(self, data):
+        self.data = data
+    def toRTF(self):
+        return "\\'" + self.data
+    def __repr__(self):
+        return "\\'" + self.data
+
+class tokenUnicode():
+    def __init__(self, data, separator = '', current_ucn = 1, eqList = []):
+        self.data = data
+        self.separator = separator
+        self.current_ucn = current_ucn
+        self.eqList = eqList
+    def toRTF(self):
+        result = '\\u' + repr(self.data) + ' '
+        ucn = self.current_ucn
+        if len(self.eqList) < ucn:
+            ucn = len(self.eqList)
+            result =  tokenControlWordWithNumericArgument('\\uc', ucn).toRTF() + result
+        i = 0
+        for eq in self.eqList:
+            if i >= ucn:
+                break
+            result = result + eq.toRTF()
+        return result
+    def __repr__(self):
+        return '\\u' + repr(self.data)
 
 
-def line_length(format, raw, percent):
-    '''
-    raw is the raw text to find the line length to use for wrapping.
-    percentage is a decimal number, 0 - 1 which is used to determine
-    how far in the list of line lengths to use. The list of line lengths is
-    ordered smallest to larged and does not include duplicates. 0.5 is the
-    median value.
-    '''
-    raw = raw.replace('&nbsp;', ' ')
-    if format == 'html':
-        linere = re.compile('(?<=<p).*?(?=</p>)', re.DOTALL)
-    elif format == 'pdf':
-        linere = re.compile('(?<=<br>).*?(?=<br>)', re.DOTALL)
-    lines = linere.findall(raw)
-    print "percent is " + str(percent)
+def isAsciiLetter(value):
+    return ((value >= 'a') and (value <= 'z')) or ((value >= 'A') and (value <= 'Z'))
 
-    lengths = []
-    for line in lines:
-        if len(line) > 0:
-            lengths.append(len(line))
+def isDigit(value):
+    return (value >= '0') and (value <= '9')
 
-    if not lengths:
-        return 0
+def isChar(value, char):
+    return value == char
 
-    lengths = list(set(lengths))
-    total = sum(lengths)
-    avg = total / len(lengths)
-    max_line = avg * 2
-
-    lengths = sorted(lengths)
-    for i in range(len(lengths) - 1, -1, -1):
-        if lengths[i] > max_line:
-            del lengths[i]
-
-    if percent > 1:
-        percent = 1
-    if percent < 0:
-        percent = 0
-
-    index = int(len(lengths) * percent) - 1
-
-    return lengths[index]
+def isString(buffer, string):
+    return buffer == string
 
 
-class CSSPreProcessor(object):
+class RtfTokenParser():
+    def __init__(self, tokens):
+        self.tokens = tokens
+        self.process()
+        self.processUnicode()
 
-    PAGE_PAT   = re.compile(r'@page[^{]*?{[^}]*?}')
+    def process(self):
+        i = 0
+        newTokens = []
+        while i < len(self.tokens):
+            if isinstance(self.tokens[i], tokenControlSymbol):
+                if isString(self.tokens[i].name, "\\'"):
+                    i = i + 1
+                    if not isinstance(self.tokens[i], tokenData):
+                        raise Exception('Error: token8bitChar without data.')
+                    if len(self.tokens[i].data) < 2:
+                        raise Exception('Error: token8bitChar without data.')
+                    newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
+                    if len(self.tokens[i].data) > 2:
+                        newTokens.append(tokenData(self.tokens[i].data[2:]))
+                    i = i + 1
+                    continue
 
-    def __call__(self, data, add_namespace=False):
-        from calibre.ebooks.oeb.base import XHTML_CSS_NAMESPACE
-        data = self.PAGE_PAT.sub('', data)
-        if not add_namespace:
-            return data
-        ans, namespaced = [], False
-        for line in data.splitlines():
-            ll = line.lstrip()
-            if not (namespaced or ll.startswith('@import') or
-                        ll.startswith('@charset')):
-                ans.append(XHTML_CSS_NAMESPACE.strip())
-                namespaced = True
-            ans.append(line)
+            newTokens.append(self.tokens[i])
+            i = i + 1
 
-        return u'\n'.join(ans)
+        self.tokens = list(newTokens)
 
-class HTMLPreProcessor(object):
+    def processUnicode(self):
+        i = 0
+        newTokens = []
+        ucNbStack = [1]
+        while i < len(self.tokens):
+            if isinstance(self.tokens[i], tokenDelimitatorStart):
+                ucNbStack.append(ucNbStack[len(ucNbStack) - 1])
+                newTokens.append(self.tokens[i])
+                i = i + 1
+                continue
+            if isinstance(self.tokens[i], tokenDelimitatorEnd):
+                ucNbStack.pop()
+                newTokens.append(self.tokens[i])
+                i = i + 1
+                continue
+            if isinstance(self.tokens[i], tokenControlWordWithNumericArgument):
+                if isString(self.tokens[i].name, '\\uc'):
+                    ucNbStack[len(ucNbStack) - 1] = self.tokens[i].argument
+                    newTokens.append(self.tokens[i])
+                    i = i + 1
+                    continue
+                if isString(self.tokens[i].name, '\\u'):
+                    x = i
+                    j = 0
+                    i = i + 1
+                    replace = []
+                    partialData = None
+                    ucn = ucNbStack[len(ucNbStack) - 1]
+                    while (i < len(self.tokens)) and (j < ucn):
+                        if isinstance(self.tokens[i], tokenDelimitatorStart):
+                            break
+                        if isinstance(self.tokens[i], tokenDelimitatorEnd):
+                            break
+                        if isinstance(self.tokens[i], tokenData):
+                            if len(self.tokens[i].data) >= ucn - j:
+                                replace.append(tokenData(self.tokens[i].data[0 : ucn - j]))
+                                if len(self.tokens[i].data) > ucn - j:
+                                    partialData = tokenData(self.tokens[i].data[ucn - j:])
+                                i = i + 1
+                                break
+                            else:
+                                replace.append(self.tokens[i])
+                                j = j + len(self.tokens[i].data)
+                                i = i + 1
+                                continue
+                        if isinstance(self.tokens[i], token8bitChar) or isinstance(self.tokens[i], tokenBinN):
+                            replace.append(self.tokens[i])
+                            i = i + 1
+                            j = j + 1
+                            continue
+                        raise Exception('Error: incorect utf replacement.')
 
-    PREPROCESS = [
-                  # Some idiotic HTML generators (Frontpage I'm looking at you)
-                  # Put all sorts of crap into <head>. This messes up lxml
-                  (re.compile(r'<head[^>]*>\n*(.*?)\n*</head>', re.IGNORECASE|re.DOTALL),
-                   sanitize_head),
-                  # Convert all entities, since lxml doesn't handle them well
-                  (re.compile(r'&(\S+?);'), convert_entities),
-                  # Remove the <![if/endif tags inserted by everybody's darling, MS Word
-                  (re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
-                   lambda match: ''),
-                  ]
+                    #calibre rtf2xml does not support utfreplace
+                    replace = []
 
-    # Fix pdftohtml markup
-    PDFTOHTML  = [
-                  # Fix umlauts
-                  # ¨
-                  (re.compile(u'¨\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ä'),
-                  (re.compile(u'¨\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ä'),
-                  (re.compile(u'¨\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ë'),
-                  (re.compile(u'¨\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'Ë'),
-                  (re.compile(u'¨\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'ï'),
-                  (re.compile(u'¨\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ï'),
-                  (re.compile(u'¨\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ö'),
-                  (re.compile(u'¨\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ö'),
-                  (re.compile(u'¨\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ü'),
-                  (re.compile(u'¨\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ü'),
+                    newTokens.append(tokenUnicode(self.tokens[x].argument, self.tokens[x].separator, ucNbStack[len(ucNbStack) - 1], replace))
+                    if partialData != None:
+                        newTokens.append(partialData)
+                    continue
 
-                  # Fix accents
-                  # `
-                  (re.compile(u'`\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'à'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'À'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'è'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'È'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'ì'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ì'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ò'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'),
-                  (re.compile(u'`\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ù'),
+            newTokens.append(self.tokens[i])
+            i = i + 1
 
-                  # ´
-                  (re.compile(u'´\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'á'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Á'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*c', re.UNICODE), lambda match: u'ć'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*C', re.UNICODE), lambda match: u'Ć'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'é'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'É'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'í'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Í'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ó'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ó'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*n', re.UNICODE), lambda match: u'ń'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*N', re.UNICODE), lambda match: u'Ń'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*s', re.UNICODE), lambda match: u'ś'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*S', re.UNICODE), lambda match: u'Ś'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ú'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ú'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ź'),
-                  (re.compile(u'´\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ź'),
+        self.tokens = list(newTokens)
 
-                  # ˆ
-                  (re.compile(u'ˆ\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'â'),
-                  (re.compile(u'ˆ\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Â'),
-                  (re.compile(u'ˆ\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ê'),
-                  (re.compile(u'ˆ\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'Ê'),
-                  (re.compile(u'ˆ\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'î'),
-                  (re.compile(u'ˆ\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Î'),
-                  (re.compile(u'ˆ\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ô'),
-                  (re.compile(u'ˆ\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ô'),
-                  (re.compile(u'ˆ\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'û'),
-                  (re.compile(u'ˆ\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Û'),
 
-                  # ¸
-                  (re.compile(u'¸\s*(<br.*?>)*\s*c', re.UNICODE), lambda match: u'ç'),
-                  (re.compile(u'¸\s*(<br.*?>)*\s*C', re.UNICODE), lambda match: u'Ç'),
+    def toRTF(self):
+        result = []
+        for token in self.tokens:
+            result.append(token.toRTF())
+        return "".join(result)
 
-                  # ˛
-                  (re.compile(u'˛\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ą'),
-                  (re.compile(u'˛\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ą'),
-                  (re.compile(u'˛\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ę'),
-                  (re.compile(u'˛\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'Ę'),
-                  
-                  # ˙
-                  (re.compile(u'˙\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ż'),
-                  (re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'),
-                  
 
-                  # If pdf printed from a browser then the header/footer has a reliable pattern
-                  (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
+class RtfTokenizer():
+    def __init__(self, rtfData):
+        self.rtfData = []
+        self.tokens = []
+        self.rtfData = rtfData
+        self.tokenize()
 
-                  # Center separator lines
-                  (re.compile(u'<br>\s*(?P<break>([*#•]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
+    def tokenize(self):
+        i = 0
+        lastDataStart = -1
+        while i < len(self.rtfData):
 
-                  # Remove page links
-                  (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
-                  # Remove <hr> tags
-                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
-                  # Replace <br><br> with <p>
-                  # (re.compile(r'<br>\s*<br>', re.IGNORECASE), lambda match: '\n<p>'),
+            if isChar(self.rtfData[i], '{'):
+                if lastDataStart > -1:
+                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
+                    lastDataStart = -1
+                self.tokens.append(tokenDelimitatorStart())
+                i = i + 1
+                continue
 
-                  # unwrap hyphenation - don't delete the hyphen (often doesn't split words)
-                  (re.compile(u'(?<=[-–—])\s*<br>\s*(?=[[a-z\d])'), lambda match: ''),
+            if isChar(self.rtfData[i], '}'):
+                if lastDataStart > -1:
+                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
+                    lastDataStart = -1
+                self.tokens.append(tokenDelimitatorEnd())
+                i = i + 1
+                continue
 
-                  # Remove gray background
-                  (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
+            if isChar(self.rtfData[i], '\\'):
+                if i + 1 >= len(self.rtfData):
+                    raise Exception('Error: Control character found at the end of the document.')
 
-                  # Detect Chapters to match default XPATH in GUI
-                  (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</(i|b)>(</(i|b)>)?)?)\s*(</?(br|p)[^>]*>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head),
+                if lastDataStart > -1:
+                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
+                    lastDataStart = -1
 
-                  # Have paragraphs show better
-                  (re.compile(r'<br.*?>'), lambda match : '<p>'),
-                  # Clean up spaces
-                  (re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '),
-                  # Add space before and after italics
-                  (re.compile(u'(?<!“)<i>'), lambda match: ' <i>'),
-                  (re.compile(r'</i>(?=\w)'), lambda match: '</i> '),
-                                   
-                 ]
+                tokenStart = i
+                i = i + 1
 
-    # Fix Book Designer markup
-    BOOK_DESIGNER = [
-                     # HR
-                     (re.compile('<hr>', re.IGNORECASE),
-                      lambda match : '<span style="page-break-after:always"> </span>'),
-                     # Create header tags
-                     (re.compile('<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
-                      lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
-                     (re.compile('<h2[^><]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
-                      lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
-                     (re.compile('<span[^><]*?id=title[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
-                      lambda match : '<h2 class="title">%s</h2>'%(match.group(1),)),
-                     (re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
-                      lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
-                     ]
-    def __init__(self, input_plugin_preprocess, plugin_preprocess,
-            extra_opts=None):
-        self.input_plugin_preprocess = input_plugin_preprocess
-        self.plugin_preprocess = plugin_preprocess
-        self.extra_opts = extra_opts
+                #Control Words
+                if isAsciiLetter(self.rtfData[i]):
+                    #consume <ASCII Letter Sequence>
+                    consumed = False
+                    while i < len(self.rtfData):
+                        if not isAsciiLetter(self.rtfData[i]):
+                            tokenEnd = i
+                            consumed = True
+                            break
+                        i = i + 1
 
-    def is_baen(self, src):
-        return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
-                          re.IGNORECASE).search(src) is not None
+                    if not consumed:
+                        raise Exception('Error (at:%d): Control Word without end.'%(tokenStart))
 
-    def is_book_designer(self, raw):
-        return re.search('<H2[^><]*id=BookTitle', raw) is not None
+                    #we have numeric argument before delimiter
+                    if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
+                        #consume the numeric argument
+                        consumed = False
+                        l = 0
+                        while i < len(self.rtfData):
+                            if not isDigit(self.rtfData[i]):
+                                consumed = True
+                                break
+                            l = l + 1
+                            i = i + 1
+                            if l > 10 :
+                                raise Exception('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
 
-    def is_pdftohtml(self, src):
-        return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
+                        if not consumed:
+                            raise Exception('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
 
-    def __call__(self, html, remove_special_chars=None,
-            get_preprocess_html=False):
-        if remove_special_chars is not None:
-            html = remove_special_chars.sub('', html)
-        html = html.replace('\0', '')
-        is_pdftohtml = self.is_pdftohtml(html)
-        if self.is_baen(html):
-            rules = []
-        elif self.is_book_designer(html):
-            rules = self.BOOK_DESIGNER
-        elif is_pdftohtml:
-            rules = self.PDFTOHTML
-        else:
-            rules = []
+                    separator = ''
+                    if isChar(self.rtfData[i], ' '):
+                        separator = ' '
 
-        start_rules = []
-        if is_pdftohtml:
-            # Remove non breaking spaces
-            start_rules.append((re.compile(ur'\u00a0'), lambda match : ' '))
+                    controlWord = self.rtfData[tokenStart: tokenEnd]
+                    if tokenEnd < i:
+                        value = int(self.rtfData[tokenEnd: i])
+                        if isString(controlWord, "\\bin"):
+                            i = i + value
+                            self.tokens.append(tokenBinN(self.rtfData[tokenStart:i], separator))
+                        else:
+                            self.tokens.append(tokenControlWordWithNumericArgument(controlWord, value, separator))
+                    else:
+                        self.tokens.append(tokenControlWord(controlWord, separator))
+                    #space delimiter, we should discard it
+                    if self.rtfData[i] == ' ':
+                        i = i + 1
 
-        if not getattr(self.extra_opts, 'keep_ligatures', False):
-            html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)
+                #Control Symbol
+                else:
+                    self.tokens.append(tokenControlSymbol(self.rtfData[tokenStart : i + 1]))
+                    i = i + 1
+                continue
 
-        end_rules = []
-        if getattr(self.extra_opts, 'remove_header', None):
-            try:
-                rules.insert(0,
-                    (re.compile(self.extra_opts.header_regex), lambda match : '')
-                )
-            except:
-                import traceback
-                print 'Failed to parse remove_header regexp'
-                traceback.print_exc()
+            if lastDataStart < 0:
+                lastDataStart = i
+            i = i + 1
 
-        if getattr(self.extra_opts, 'remove_footer', None):
-            try:
-                rules.insert(0,
-                    (re.compile(self.extra_opts.footer_regex), lambda match : '')
-                )
-            except:
-                import traceback
-                print 'Failed to parse remove_footer regexp'
-                traceback.print_exc()
+    def toRTF(self):
+        result = []
+        for token in self.tokens:
+            result.append(token.toRTF())
+        return "".join(result)
 
-        # Make the more aggressive chapter marking regex optional with the preprocess option to reduce false positives
-        if getattr(self.extra_opts, 'preprocess_html', None):
-            if is_pdftohtml:
-                end_rules.append(
-                    (re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?|\d+\.?\s*([\d\w-]+\s*){0,4}\s*)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head),
-                )
 
-        if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
-            length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor'))
-            if length:
-                print "The pdf line length returned is " + str(length)
-                end_rules.append(
-                    # Un wrap using punctuation
-                    (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
-                )
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 2:
+        print ("Usage %prog rtfFileToConvert")
+        sys.exit()
+    f = open(sys.argv[1], 'rb')
+    data = f.read()
+    f.close()
 
-        for rule in self.PREPROCESS + start_rules:
-            html = rule[0].sub(rule[1], html)
+    tokenizer = RtfTokenizer(data)
+    parsedTokens = RtfTokenParser(tokenizer.tokens)
 
-        if get_preprocess_html:
-            return html
+    data = parsedTokens.toRTF()
 
-        def dump(raw, where):
-            import os
-            dp = getattr(self.extra_opts, 'debug_pipeline', None)
-            if dp and os.path.exists(dp):
-                odir = os.path.join(dp, 'input')
-                if os.path.exists(odir):
-                    odir = os.path.join(odir, where)
-                    if not os.path.exists(odir):
-                        os.makedirs(odir)
-                    name, i = None, 0
-                    while not name or os.path.exists(os.path.join(odir, name)):
-                        i += 1
-                        name = '%04d.html'%i
-                    with open(os.path.join(odir, name), 'wb') as f:
-                        f.write(raw.encode('utf-8'))
+    f = open(sys.argv[1], 'w')
+    f.write(data)
+    f.close()
 
-        #dump(html, 'pre-preprocess')
-
-        for rule in rules + end_rules:
-            html = rule[0].sub(rule[1], html)
-
-        #dump(html, 'post-preprocess')
-
-        # Handle broken XHTML w/ SVG (ugh)
-        if 'svg:' in html and SVG_NS not in html:
-            html = html.replace(
-                '<html', '<html xmlns:svg="%s"' % SVG_NS, 1)
-        if 'xlink:' in html and XLINK_NS not in html:
-            html = html.replace(
-                '<html', '<html xmlns:xlink="%s"' % XLINK_NS, 1)
-
-        html = XMLDECL_RE.sub('', html)
-
-        if getattr(self.extra_opts, 'asciiize', False):
-            from calibre.ebooks.unidecode.unidecoder import Unidecoder
-            unidecoder = Unidecoder()
-            html = unidecoder.decode(html)
-
-        if self.plugin_preprocess:
-            html = self.input_plugin_preprocess(html)
-
-        return html
 

From 9a06996b16486a3511e4055535a6be48f484a90a Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Sun, 12 Sep 2010 11:17:49 +1000
Subject: [PATCH 10/43] minor tweaks to preprocessing, backed out reflow change

---
 src/calibre/ebooks/conversion/preprocess.py |  4 +--
 src/calibre/ebooks/conversion/utils.py      | 36 +++++++++++----------
 src/calibre/ebooks/pdf/reflow.py            |  4 ---
 3 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 6123577191..46308b2ea0 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -319,8 +319,8 @@ class HTMLPreProcessor(object):
       
         # unwrap hyphenation - moved here so it's executed after header/footer removal
         if is_pdftohtml:
-            # unwrap visible dashes and hyphens - don't delete as 50% or more of the time these
-            # hyphens are for compound words, formatting, etc
+            # unwrap visible dashes and hyphens - don't delete they are often hyphens for
+            # for compound words, formatting, etc
             end_rules.append((re.compile(u'(?<=[-–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: ''))
             # unwrap/delete soft hyphens
             end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: ''))
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 68cebb3a11..fb683bdb12 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -29,16 +29,12 @@ class PreProcessor(object):
                    self.log("marked " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title))
                    return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n'
 
-    def chapter_link(self, match):
-        chap = match.group('sectionlink')
-        if not chap:
-                   self.html_preprocess_sections = self.html_preprocess_sections + 1
-                   self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links")
-                   return '<br style="page-break-before:always">'
-        else:
-                   self.html_preprocess_sections = self.html_preprocess_sections + 1
-                   self.log("marked " + str(self.html_preprocess_sections) + " section markers based on links. - " + str(chap))
-                   return '<br clear="all" style="page-break-before:always">\n<h2>'+chap+'</h2>'
+    def chapter_break(self, match):
+        chap = match.group('section')
+        styles = match.group('styles')
+        self.html_preprocess_sections = self.html_preprocess_sections + 1
+        self.log("marked " + str(self.html_preprocess_sections) + " section markers based on punctuation. - " + str(chap))
+        return '<'+styles+' style="page-break-before:always">'+chap
 
     def no_markup(self, raw, percent):
         '''
@@ -74,7 +70,7 @@ class PreProcessor(object):
         html = re.sub(r"\s*<span[^>]*>\s*</span>", " ", html)
         
         # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing
-        linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE)
+        linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
         blankreg = re.compile(r'\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>', re.IGNORECASE)
         blanklines = blankreg.findall(html)
         lines = linereg.findall(html)
@@ -100,8 +96,13 @@ class PreProcessor(object):
         chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}s*(<span[^>]*>)?\s*.?(Introduction|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</(i|b|u)>){0,2})\s*(</span>)?s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE)
         html = chapdetect.sub(self.chapter_head, html)
         if self.html_preprocess_sections < 10:
-            self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying a more aggressive pattern")
-            chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#-]+\s*){1,9}|\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE)
+            self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters")
+            chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE)
+            html = chapdetect2.sub(self.chapter_head, html)    
+
+        if self.html_preprocess_sections < 10:
+            self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words")
+            chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#-]+\s*){1,9})\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE)
             html = chapdetect2.sub(self.chapter_head, html)    
         #    
         # Unwrap lines using punctation if the median length of all lines is less than 200        
@@ -110,13 +111,14 @@ class PreProcessor(object):
         unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
         if length < 200:
             self.log("Unwrapping Lines")
-            html = unwrap.sub(' ', html)        
+            html = unwrap.sub(' ', html)
+            
         # If still no sections after unwrapping lines break on lines with no punctuation
         if self.html_preprocess_sections < 10:
-            self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", splitting based on punctuation")
+            self.log(str(self.html_preprocess_sections) + " split points marked, matching based on punctuation")
             #self.log(html)
-            chapdetect3 = re.compile(r'(<p[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?([a-z]+\s*){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</p>)(?P<title>)?', re.IGNORECASE)
-            html = chapdetect3.sub(self.chapter_head, html)        
+            chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</(i|b|u)>){0,2}\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
+            html = chapdetect3.sub(self.chapter_break, html)      
         # search for places where a first or second level heading is immediately followed by another
         # top level heading.  demote the second heading to h3 to prevent splitting between chapter
         # headings and titles, images, etc
diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py
index 36848ddb8b..584d631d0b 100644
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@@ -408,10 +408,6 @@ class Page(object):
     # Fraction of text height that two strings' bottoms can differ by
     # for them to be considered to be part of the same text fragment
     LINE_FACTOR = 0.4
-    
-    # Percentage of the page heigth which should be considered header
-    # or footer to be discarded from reflow considerations
-    HEAD_FOOTER_MARGIN
 
     # Multiplies the average line height when determining row height
     # of a particular element to detect columns.

From cdb696f63bc39b9327abe809fa71e94baa6e0b86 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Mon, 13 Sep 2010 00:12:21 +1000
Subject: [PATCH 11/43] enhanced preprocessing class - looking pretty good

---
 src/calibre/ebooks/conversion/preprocess.py | 18 ++--
 src/calibre/ebooks/conversion/utils.py      | 98 +++++++++++++++------
 2 files changed, 82 insertions(+), 34 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 46308b2ea0..f6277956c8 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -62,7 +62,6 @@ def wrap_lines(match):
     else:
                return ital+' '
 
-
 def line_length(format, raw, percent):
     '''
     raw is the raw text to find the line length to use for wrapping.
@@ -76,6 +75,8 @@ def line_length(format, raw, percent):
         linere = re.compile('(?<=<p).*?(?=</p>)', re.DOTALL)
     elif format == 'pdf':
         linere = re.compile('(?<=<br>).*?(?=<br>)', re.DOTALL)
+    elif format == 'spanned_html':
+        linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL)
     lines = linere.findall(raw)
 
     lengths = []
@@ -223,14 +224,15 @@ class HTMLPreProcessor(object):
                   # Remove page links
                   (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
                   # Remove <hr> tags
-                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
+                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br>'),
 
                   # Remove gray background
                   (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
 
                   # Detect Chapters to match default XPATH in GUI
-                  (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</(i|b)>(</(i|b)>)?)?)\s*(</?(br|p)[^>]*>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head),
-                  (re.compile(r'<br\s*/?>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<(i|b)>)?(\s*\w+){1,4}\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?'), chap_head),
+                  (re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head),
+                  # Cover the case where every letter in a chapter title is separated by a space
+                  (re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head),
                   
                   # Have paragraphs show better
                   (re.compile(r'<br.*?>'), lambda match : '<p>'),
@@ -238,8 +240,7 @@ class HTMLPreProcessor(object):
                   (re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '),
                   # Add space before and after italics
                   (re.compile(u'(?<!“)<i>'), lambda match: ' <i>'),
-                  (re.compile(r'</i>(?=\w)'), lambda match: '</i> '),
-                                   
+                  (re.compile(r'</i>(?=\w)'), lambda match: '</i> '),                            
                  ]
 
     # Fix Book Designer markup
@@ -327,10 +328,11 @@ class HTMLPreProcessor(object):
             # unwrap/delete soft hyphens with formatting
             end_rules.append((re.compile(u'[­]\s*(</(i|u|b)>)+(\s*<p>)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: ''))
         
-        # Make the more aggressive chapter marking regex optional with the preprocess option to reduce false positives
+        # Make the more aggressive chapter marking regex optional with the preprocess option to 
+        # reduce false positives and move after header/footer removal
         if getattr(self.extra_opts, 'preprocess_html', None):
             if is_pdftohtml:
-                end_rules.append((re.compile(r'(?=<(/?br|p|hr))(<(/?br|p|hr)[^>]*)?>\s*(<(i|b)>(<(i|b)>)?)?\s*(?P<chap>([A-Z-\'"!]{3,})\s*(\d+|[A-Z]+(\s*[A-Z]+)?)?|\d+\.?\s*([\d\w-]+\s*){0,4}\s*)\s*(</(i|b)>(</(i|b)>)?)?\s*(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head))
+                end_rules.append((re.compile(r'<p>\s*(?P<chap>(<[ibu]>){0,2}\s*([A-Z \'"!]{3,})\s*([\dA-Z:]+\s){0,4}\s*(</[ibu]>){0,2})\s*<p>\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<p>)?'), chap_head),)
                 
         if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
             length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor'))
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index fb683bdb12..abfa43e7ed 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -8,10 +8,10 @@ __docformat__ = 'restructuredtext en'
 import re
 from calibre.ebooks.conversion.preprocess import line_length
 from calibre.utils.logging import default_log
-from lxml import etree
 
 class PreProcessor(object):
     html_preprocess_sections = 0
+    found_indents = 0
 
     def __init__(self, args):
         self.args = args
@@ -22,11 +22,11 @@ class PreProcessor(object):
         title = match.group('title')
         if not title:
                    self.html_preprocess_sections = self.html_preprocess_sections + 1
-                   self.log("marked " + str(self.html_preprocess_sections) + " chapters. - " + str(chap))
+                   self.log("found " + str(self.html_preprocess_sections) + " chapters. - " + str(chap))
                    return '<h2>'+chap+'</h2>\n'
         else:
                    self.html_preprocess_sections = self.html_preprocess_sections + 1
-                   self.log("marked " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title))
+                   self.log("found " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title))
                    return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n'
 
     def chapter_break(self, match):
@@ -35,7 +35,22 @@ class PreProcessor(object):
         self.html_preprocess_sections = self.html_preprocess_sections + 1
         self.log("marked " + str(self.html_preprocess_sections) + " section markers based on punctuation. - " + str(chap))
         return '<'+styles+' style="page-break-before:always">'+chap
-
+    
+    def insert_indent(self, match):
+        pstyle = match.group('formatting')
+        span = match.group('span')
+        self.found_indents = self.found_indents + 1
+        if pstyle:
+            if not span:
+                return '<p '+pstyle+' style="text-indent:3%">'
+            else:
+                return '<p '+pstyle+' style="text-indent:3%">'+span
+        else:
+            if not span:
+                return '<p style="text-indent:3%">'
+            else:
+                return '<p style="text-indent:3%">'+span
+        
     def no_markup(self, raw, percent):
         '''
         Detects total marked up line endings in the file. raw is the text to 
@@ -48,7 +63,7 @@ class PreProcessor(object):
         line_end = line_end_ere.findall(raw)
         tot_htm_ends = len(htm_end)
         tot_ln_fds = len(line_end)
-        self.log("*** There are " + str(tot_ln_fds) + " total Line feeds, and " + str(tot_htm_ends) + " marked endings***")
+        self.log("There are " + str(tot_ln_fds) + " total Line feeds, and " + str(tot_htm_ends) + " marked up endings")
 
         if percent > 1:
             percent = 1
@@ -56,13 +71,18 @@ class PreProcessor(object):
             percent = 0    
     
         min_lns = tot_ln_fds * percent
-        self.log("There must be fewer than " + str(min_lns) + " unmarked lines to return true")
+        self.log("There must be fewer than " + str(min_lns) + " unmarked lines to add markup")
         if min_lns > tot_htm_ends:
             return True
             
     def __call__(self, html):
         self.log("*********  Preprocessing HTML  *********")
-        # remove non-breaking spaces
+        # Replace series of non-breaking spaces with text-indent
+        txtindent = re.compile(ur'<p(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}', re.IGNORECASE)
+        html = txtindent.sub(self.insert_indent, html)
+        if self.found_indents > 1:
+            self.log("replaced "+str(self.found_indents)+ " nbsp indents with inline styles")
+        # remove remaining non-breaking spaces
         html = re.sub(ur'\u00a0', ' ', html)
         # Get rid of empty <o:p> tags to simplify other processing
         html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
@@ -83,41 +103,67 @@ class PreProcessor(object):
         html = re.sub(r"\s*</p>", "</p>\n", html)
         html = re.sub(r"\s*<p>\s*", "\n<p>", html)
         
-        # some lit files don't have any <p> tags or equivalent, check and 
-        # mark up line endings if required before proceeding
+        # some lit files don't have any <p> tags or equivalent (generally just plain text between 
+        # <pre> tags), check and  mark up line endings if required before proceeding
         if self.no_markup(html, 0.1):
              self.log("not enough paragraph markers, adding now")
              add_markup = re.compile('(?<!>)(\n)')
              html = add_markup.sub('</p>\n<p>', html)
         
         # detect chapters/sections to match xpath or splitting logic
+        heading = re.compile('<h(1|2)[^>]*>', re.IGNORECASE)
+        self.html_preprocess_sections = len(heading.findall(html))
+        self.log("found " + str(self.html_preprocess_sections) + " pre-existing headings")
         # 
-        # Start with most typical chapter headings       
-        chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}s*(<span[^>]*>)?\s*.?(Introduction|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</(i|b|u)>){0,2})\s*(</span>)?s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.IGNORECASE)
-        html = chapdetect.sub(self.chapter_head, html)
+        # Start with most typical chapter headings, get more aggressive until one works
+        if self.html_preprocess_sections < 10:
+            chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}s*(<span[^>]*>)?\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE)
+            html = chapdetect.sub(self.chapter_head, html)
         if self.html_preprocess_sections < 10:
             self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters")
-            chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE)
+            chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
             html = chapdetect2.sub(self.chapter_head, html)    
 
         if self.html_preprocess_sections < 10:
             self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words")
-            chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<(i|b|u)>){0,2}\s*.?(([A-Z#-]+\s*){1,9})\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<(i|b|u)>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</(i|b|u)>){0,2})\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</(br|p)>))?', re.UNICODE)
-            html = chapdetect2.sub(self.chapter_head, html)    
-        #    
-        # Unwrap lines using punctation if the median length of all lines is less than 200        
-        length = line_length('html', html, 0.4)
-        self.log("*** Median line length is " + str(length) + " ***")
-        unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
-        if length < 200:
-            self.log("Unwrapping Lines")
-            html = unwrap.sub(' ', html)
+            chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(([A-Z#-]+\s*){1,9})\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
+            html = chapdetect2.sub(self.chapter_head, html)
             
-        # If still no sections after unwrapping lines break on lines with no punctuation
+        # Unwrap lines
+        # 
+        self.log("Unwrapping Lines")
+        # Some OCR sourced files have line breaks in the html using a combination of span & p tags
+        # span are used for hard line breaks, p for new paragraphs.  Determine which is used so 
+        # that lines can be wrapped across page boundaries
+        paras_reg = re.compile('<p[^>]*>', re.IGNORECASE)
+        spans_reg = re.compile('<span[^>]*>', re.IGNORECASE)
+        paras = len(paras_reg.findall(html))
+        spans = len(spans_reg.findall(html))
+        if spans > 1:
+            if float(paras) / float(spans) < 0.75:
+                format = 'spanned_html'
+            else:
+                format = 'html'
+        else:
+            format = 'html'
+        
+        # Calculate Length
+        length = line_length(format, html, 0.4)
+        self.log("*** Median line length is " + str(length) + ",calculated with " + format + " format ***")
+        #
+        # Unwrap and/or delete soft-hyphens, hyphens
+        html = re.sub(u'­\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
+        html = re.sub(u'(?<=[-–—])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html)
+        
+        # Unwrap lines using punctation if the median length of all lines is less than 200        
+        unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
+        html = unwrap.sub(' ', html)
+
+        # If still no sections after unwrapping mark split points on lines with no punctuation
         if self.html_preprocess_sections < 10:
-            self.log(str(self.html_preprocess_sections) + " split points marked, matching based on punctuation")
+            self.log("Looking for more split points based on punctuation, currently have " + str(self.html_preprocess_sections))
             #self.log(html)
-            chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*(<(i|b|u)>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</(i|b|u)>){0,2}\s*(</span>)?\s*(</(i|b|u)>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
+            chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
             html = chapdetect3.sub(self.chapter_break, html)      
         # search for places where a first or second level heading is immediately followed by another
         # top level heading.  demote the second heading to h3 to prevent splitting between chapter

From 6cc332089a421e6100fa4937c5126309c483e132 Mon Sep 17 00:00:00 2001
From: Starson17 <starson17@gmail.com>
Date: Sun, 12 Sep 2010 11:28:24 -0400
Subject: [PATCH 12/43] Change Merge and Safe Merge warnings re ISBN

---
 src/calibre/gui2/actions/edit_metadata.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py
index f0232d9859..878ba77a43 100644
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@@ -209,8 +209,9 @@ class EditMetadataAction(InterfaceAction):
         dest_id, src_books, src_ids = self.books_to_merge(rows)
         if safe_merge:
             if not confirm('<p>'+_(
-                'All book formats and metadata from the selected books '
-                'will be added to the <b>first selected book.</b><br><br> '
+                'Book formats and metadata from the selected books '
+                'will be added to the <b>first selected book.</b> '
+                'ISBN will <i>not</i> be merged.<br><br> '
                 'The second and subsequently selected books will not '
                 'be deleted or changed.<br><br>'
                 'Please confirm you want to proceed.')
@@ -220,8 +221,9 @@ class EditMetadataAction(InterfaceAction):
             self.merge_metadata(dest_id, src_ids)
         else:
             if not confirm('<p>'+_(
-                'All book formats and metadata from the selected books will be merged '
-                'into the <b>first selected book</b>.<br><br>'
+                'Book formats and metadata from the selected books will be merged '
+                'into the <b>first selected book</b>. '
+                'ISBN will <i>not</i> be merged.<br><br>'
                 'After merger the second and '
                 'subsequently selected books will be <b>deleted</b>. <br><br>'
                 'All book formats of the first selected book will be kept '

From 78874a9117941de749f3b09934be8588181dd4b7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 Sep 2010 09:32:16 -0600
Subject: [PATCH 13/43] Use the new sorting code in the content server as well.

---
 src/calibre/library/caches.py         | 153 +-------------------------
 src/calibre/library/server/content.py |  38 +++----
 2 files changed, 18 insertions(+), 173 deletions(-)

diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index dfd7086076..4f795ab733 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import re, itertools, functools
+import re, itertools
 from itertools import repeat
 from datetime import timedelta
 from threading import Thread, RLock
@@ -584,39 +584,7 @@ class ResultCache(SearchQueryParser):
 
     # Sorting functions {{{
 
-    def seriescmp(self, sidx, siidx, x, y, library_order=None):
-        try:
-            if library_order:
-                ans = cmp(title_sort(self._data[x][sidx].lower()),
-                                title_sort(self._data[y][sidx].lower()))
-            else:
-                ans = cmp(self._data[x][sidx].lower(),
-                                                self._data[y][sidx].lower())
-        except AttributeError: # Some entries may be None
-            ans = cmp(self._data[x][sidx], self._data[y][sidx])
-        if ans != 0: return ans
-        return cmp(self._data[x][siidx], self._data[y][siidx])
-
-    def cmp(self, loc, x, y, asstr=True, subsort=False):
-        try:
-            ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if \
-                asstr else cmp(self._data[x][loc], self._data[y][loc])
-        except AttributeError: # Some entries may be None
-            ans = cmp(self._data[x][loc], self._data[y][loc])
-        except TypeError: ## raised when a datetime is None
-            x = self._data[x][loc]
-            if x is None:
-                x = UNDEFINED_DATE
-            y = self._data[y][loc]
-            if y is None:
-                y = UNDEFINED_DATE
-            return cmp(x, y)
-        if subsort and ans == 0:
-            idx = self.FIELD_MAP['sort']
-            return cmp(self._data[x][idx].lower(), self._data[y][idx].lower())
-        return ans
-
-    def sanitize_field_name(self, field):
+    def sanitize_sort_field_name(self, field):
         field = field.lower().strip()
         if field not in self.field_metadata.iterkeys():
             if field in ('author', 'tag', 'comment'):
@@ -627,38 +595,10 @@ class ResultCache(SearchQueryParser):
         return field
 
     def sort(self, field, ascending, subsort=False):
-        field = self.sanitize_field_name(field)
-        as_string = field not in ('size', 'rating', 'timestamp')
-
-        if self.first_sort:
-            subsort = True
-            self.first_sort = False
-        if self.field_metadata[field]['is_custom']:
-            if self.field_metadata[field]['datatype'] == 'series':
-                fcmp = functools.partial(self.seriescmp,
-                    self.field_metadata[field]['rec_index'],
-                    self.field_metadata.cc_series_index_column_for(field),
-                    library_order=tweaks['title_series_sorting'] == 'library_order')
-            else:
-                as_string = self.field_metadata[field]['datatype'] in ('comments', 'text')
-                field = self.field_metadata[field]['colnum']
-                fcmp = functools.partial(self.cmp, self.FIELD_MAP[field],
-                                     subsort=subsort, asstr=as_string)
-        elif field == 'series':
-            fcmp = functools.partial(self.seriescmp, self.FIELD_MAP['series'],
-                self.FIELD_MAP['series_index'],
-                library_order=tweaks['title_series_sorting'] == 'library_order')
-        else:
-            fcmp = functools.partial(self.cmp, self.field_metadata[field]['rec_index'],
-                                     subsort=subsort, asstr=as_string)
-        self._map.sort(cmp=fcmp, reverse=not ascending)
-        tmap = list(itertools.repeat(False, len(self._data)))
-        for x in self._map_filtered:
-            tmap[x] = True
-        self._map_filtered = [x for x in self._map if tmap[x]]
+        self.multisort([(field, ascending)])
 
     def multisort(self, fields=[], subsort=False):
-        fields = [(self.sanitize_field_name(x), bool(y)) for x, y in fields]
+        fields = [(self.sanitize_sort_field_name(x), bool(y)) for x, y in fields]
         keys = self.field_metadata.field_keys()
         fields = [x for x in fields if x[0] in keys]
         if subsort and 'sort' not in [x[0] for x in fields]:
@@ -671,6 +611,7 @@ class ResultCache(SearchQueryParser):
             self._map.sort(key=keyg, reverse=not fields[0][1])
         else:
             self._map.sort(key=keyg)
+
         tmap = list(itertools.repeat(False, len(self._data)))
         for x in self._map_filtered:
             tmap[x] = True
@@ -733,87 +674,3 @@ class SortKeyGenerator(object):
     # }}}
 
 
-if __name__ == '__main__':
-    # Testing.timing for new multi-sort {{{
-    import time
-
-    from calibre.library import db
-    db = db()
-
-    db.refresh()
-
-    fields = db.field_metadata.field_keys()
-
-    print fields
-
-
-    def do_single_sort(meth, field, order):
-        if meth == 'old':
-            db.data.sort(field, order)
-        else:
-            db.data.multisort([(field, order)])
-
-    def test_single_sort(field):
-        for meth in ('old', 'new'):
-            ttime = 0
-            NUM = 10
-            asc = desc = None
-            for i in range(NUM):
-                db.data.sort('id', False)
-                st = time.time()
-                do_single_sort(meth, field, True)
-                asc = db.data._map
-                do_single_sort(meth, field, False)
-                desc = db.data._map
-                ttime += time.time() - st
-            yield (ttime/NUM, asc, desc)
-
-
-    print 'Running single sort differentials'
-    for field in fields:
-        if field in ('search', 'id', 'news', 'flags'): continue
-        print '\t', field, db.field_metadata[field]['datatype']
-        old, new = test_single_sort(field)
-        if old[1] != new[1] or old[2] != new[2]:
-            print '\t\t', 'Sort failure!'
-            raise SystemExit(1)
-        print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0])
-
-    def do_multi_sort(meth, ms):
-        if meth == 'new':
-            db.data.multisort(ms)
-        else:
-            for s in reversed(ms):
-                db.data.sort(*s)
-
-    def test_multi_sort(ms):
-        for meth in ('old', 'new'):
-            ttime = 0
-            NUM = 10
-            for i in range(NUM):
-                db.data.sort('id', False)
-                st = time.time()
-                do_multi_sort(meth, ms)
-                ttime += time.time() - st
-            yield (ttime/NUM, db.data._map)
-
-    print 'Running multi-sort differentials'
-
-    for ms in [
-            [('timestamp', False), ('author', True), ('title', False)],
-            [('size', True), ('tags', True), ('author', False)],
-            [('series', False), ('title', True)],
-            [('size', True), ('tags', True), ('author', False), ('pubdate',
-                True), ('tags', False), ('formats', False), ('uuid', True)],
-
-            ]:
-        print '\t', ms
-        db.data.sort('id', False)
-        old, new = test_multi_sort(ms)
-        if old[1] != new[1]:
-            print '\t\t', 'Sort failure!'
-            raise SystemExit()
-        print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0])
-
-    # }}}
-
diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py
index 6784abd8f4..ecb467b4c2 100644
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@@ -5,7 +5,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import re, os, cStringIO, operator
+import re, os, cStringIO
 
 import cherrypy
 try:
@@ -16,7 +16,15 @@ except ImportError:
 
 from calibre import fit_image, guess_type
 from calibre.utils.date import fromtimestamp
-from calibre.ebooks.metadata import title_sort
+from calibre.library.caches import SortKeyGenerator
+
+class CSSortKeyGenerator(SortKeyGenerator):
+
+    def __init__(self, fields, fm):
+        SortKeyGenerator.__init__(self, fields, fm, None)
+
+    def __call__(self, record):
+        return self.itervals(record).next()
 
 class ContentServer(object):
 
@@ -47,32 +55,12 @@ class ContentServer(object):
 
 
     def sort(self, items, field, order):
-        field = field.lower().strip()
-        if field == 'author':
-            field = 'authors'
-        if field == 'date':
-            field = 'timestamp'
+        field = self.db.data.sanitize_sort_field_name(field)
         if field not in ('title', 'authors', 'rating', 'timestamp', 'tags', 'size', 'series'):
             raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field)
-        cmpf = cmp if field in ('rating', 'size', 'timestamp') else \
-                lambda x, y: cmp(x.lower() if x else '', y.lower() if y else '')
-        if field == 'series':
-            items.sort(cmp=self.seriescmp, reverse=not order)
-        else:
-            lookup = 'sort' if field == 'title' else field
-            lookup = 'author_sort' if field == 'authors' else field
-            field = self.db.FIELD_MAP[lookup]
-            getter = operator.itemgetter(field)
-            items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order)
+        keyg = CSSortKeyGenerator([(field, order)], self.db.field_metadata)
+        items.sort(key=keyg, reverse=not order)
 
-    def seriescmp(self, x, y):
-        si = self.db.FIELD_MAP['series']
-        try:
-            ans = cmp(title_sort(x[si].lower()), title_sort(y[si].lower()))
-        except AttributeError: # Some entries may be None
-            ans = cmp(x[si], y[si])
-        if ans != 0: return ans
-        return cmp(x[self.db.FIELD_MAP['series_index']], y[self.db.FIELD_MAP['series_index']])
     # }}}
 
 

From 80c976e0f24f05a5ee7a9bfce50bf7745215e339 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 Sep 2010 11:11:00 -0600
Subject: [PATCH 14/43] Fix #6794 (Updated recipes for Infobae and NSPM)

---
 resources/recipes/infobae.recipe | 82 ++++++++------------------------
 resources/recipes/nspm.recipe    | 11 ++++-
 2 files changed, 30 insertions(+), 63 deletions(-)

diff --git a/resources/recipes/infobae.recipe b/resources/recipes/infobae.recipe
index cda9bf83d2..b7f9cd3c6c 100644
--- a/resources/recipes/infobae.recipe
+++ b/resources/recipes/infobae.recipe
@@ -1,12 +1,8 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 infobae.com
 '''
-import re
-import urllib, urlparse
 
 from calibre.web.feeds.news import BasicNewsRecipe
 
@@ -20,35 +16,24 @@ class Infobae(BasicNewsRecipe):
     max_articles_per_feed = 100
     no_stylesheets        = True
     use_embedded_content  = False
-    language = 'es'
-    lang = 'es-AR'
-
+    language              = 'es'
     encoding              = 'cp1252'
-    cover_url             = 'http://www.infobae.com/imgs/header/header.gif'
+    masthead_url          = 'http://www.infobae.com/imgs/header/header.gif'
     remove_javascript     = True
-    preprocess_regexps = [(re.compile(
-        r'<meta name="Description" content="[^"]+">'), lambda m:'')]
-
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
-
-    extra_css = '''
-                    .col-center{font-family:Arial,Helvetica,sans-serif;}
-                    h1{font-family:Arial,Helvetica,sans-serif; color:#0D4261;}
-                    .fuenteIntNota{font-family:Arial,Helvetica,sans-serif; color:#1D1D1D; font-size:x-small;}
-                '''
-
-    keep_only_tags = [dict(name='div', attrs={'class':['content']})]
-
-
-    remove_tags = [
-               dict(name='div', attrs={'class':['options','col-right','controles', 'bannerLibre','tiulo-masleidas','masleidas-h']}),
-               dict(name='a', attrs={'name' : 'comentario',}),
-               dict(name='iframe'),
-               dict(name='img', alt = "Ver galerias de imagenes"),
-
-                                 ]
-
+    remove_empty_feeds    = True
+    extra_css             = '''
+                              body{font-family:Arial,Helvetica,sans-serif;}
+                              .popUpTitulo{color:#0D4261; font-size: xx-large}
+                            '''
+    
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : True
+                        }
+    
 
     feeds = [
               (u'Noticias'  , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml'       )
@@ -57,39 +42,14 @@ class Infobae(BasicNewsRecipe):
              ,(u'Deportes'  , u'http://www.infobae.com/adjuntos/html/RSS/deportes.xml'  )
             ]
 
-#    def print_version(self, url):
-#        main, sep, article_part = url.partition('contenidos/')
-#        article_id, rsep, rrest = article_part.partition('-')
-#        return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
-
-    def get_article_url(self, article):
-        ans = article.get('link').encode('utf-8')
-        parts = list(urlparse.urlparse(ans))
-        parts[2] = urllib.quote(parts[2])
-        ans = urlparse.urlunparse(parts)
-        return ans.decode('utf-8')
-
-
-    def preprocess_html(self, soup):
-
-        for tag in soup.head.findAll('strong'):
-            tag.extract()
-        for tag in soup.findAll('meta'):
-            del tag['content']
-            tag.extract()
-
-        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
-        soup.head.insert(0,mtag)
-        for item in soup.findAll(style=True):
-            del item['style']
-
-        return soup
+    def print_version(self, url):
+        article_part = url.rpartition('/')[2]
+        article_id= article_part.partition('-')[0]
+        return 'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
 
     def postprocess_html(self, soup, first):
-
         for tag in soup.findAll(name='strong'):
              tag.name = 'b'
-
         return soup
 
 
diff --git a/resources/recipes/nspm.recipe b/resources/recipes/nspm.recipe
index 13ff42b277..29f2cfc5e3 100644
--- a/resources/recipes/nspm.recipe
+++ b/resources/recipes/nspm.recipe
@@ -6,6 +6,7 @@ nspm.rs
 
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag, NavigableString
 
 class Nspm(BasicNewsRecipe):
     title                 = 'Nova srpska politicka misao'
@@ -21,6 +22,7 @@ class Nspm(BasicNewsRecipe):
     encoding              = 'utf-8'
     language              = 'sr'
     delay                 = 2
+    remove_empty_feeds    = True
     publication_type      = 'magazine'
     masthead_url          = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
     extra_css             = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@@ -45,8 +47,9 @@ class Nspm(BasicNewsRecipe):
                            dict(name=['link','object','embed','script','meta','base','iframe'])
                           ,dict(attrs={'class':'buttonheading'})
                          ]
-    remove_tags_after = dict(attrs={'class':'article_separator'})
-    remove_attributes = ['width','height']
+    remove_tags_before = dict(attrs={'class':'contentheading'})
+    remove_tags_after  = dict(attrs={'class':'article_separator'})
+    remove_attributes  = ['width','height']
 
     def get_browser(self):
         br = BasicNewsRecipe.get_browser()
@@ -67,4 +70,8 @@ class Nspm(BasicNewsRecipe):
     def preprocess_html(self, soup):
         for item in soup.body.findAll(style=True):
             del item['style']
+        for item in soup.body.findAll('h1'):
+            nh = NavigableString(item.a.string)
+            item.a.extract()
+            item.insert(0,nh)
         return self.adeify_images(soup)

From 548417ea6b6157faf1688b3b082f3eac5476636f Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Mon, 13 Sep 2010 09:18:45 +1000
Subject: [PATCH 15/43] comments and minor tweak

---
 src/calibre/ebooks/conversion/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index abfa43e7ed..ecf030b27d 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -111,7 +111,7 @@ class PreProcessor(object):
              html = add_markup.sub('</p>\n<p>', html)
         
         # detect chapters/sections to match xpath or splitting logic
-        heading = re.compile('<h(1|2)[^>]*>', re.IGNORECASE)
+        heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
         self.html_preprocess_sections = len(heading.findall(html))
         self.log("found " + str(self.html_preprocess_sections) + " pre-existing headings")
         # 
@@ -134,7 +134,7 @@ class PreProcessor(object):
         self.log("Unwrapping Lines")
         # Some OCR sourced files have line breaks in the html using a combination of span & p tags
         # span are used for hard line breaks, p for new paragraphs.  Determine which is used so 
-        # that lines can be wrapped across page boundaries
+        # that lines can be un-wrapped across page boundaries
         paras_reg = re.compile('<p[^>]*>', re.IGNORECASE)
         spans_reg = re.compile('<span[^>]*>', re.IGNORECASE)
         paras = len(paras_reg.findall(html))

From de6aadee76d4dafe9b84133dc3af43ddef22fd0a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 13 Sep 2010 10:15:35 -0600
Subject: [PATCH 16/43] News download: Fix bug that could break some downloads
 in non ASCII locales

---
 resources/recipes/xkcd.recipe     | 6 +++---
 src/calibre/web/feeds/__init__.py | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/resources/recipes/xkcd.recipe b/resources/recipes/xkcd.recipe
index 312027004e..ad0d420deb 100644
--- a/resources/recipes/xkcd.recipe
+++ b/resources/recipes/xkcd.recipe
@@ -24,18 +24,18 @@ class XkcdCom(BasicNewsRecipe):
         (re.compile(r'(<img.*title=")([^"]+)(".*>)'),
          lambda m: '%s%s<p>%s</p>' % (m.group(1), m.group(3), m.group(2)))
     ]
-    
+
     def parse_index(self):
         INDEX = 'http://xkcd.com/archive/'
 
-        soup = self.index_to_soup(INDEX) 
+        soup = self.index_to_soup(INDEX)
         articles = []
         for item in soup.findAll('a', title=True):
             articles.append({
                 'date': item['title'],
                 'timestamp': time.mktime(time.strptime(item['title'], '%Y-%m-%d'))+1,
                 'url': 'http://xkcd.com' + item['href'],
-                'title': self.tag_to_string(item).encode('UTF-8'),
+                'title': self.tag_to_string(item),
                 'description': '',
                 'content': '',
             })
diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py
index a70cf8b664..8aef350498 100644
--- a/src/calibre/web/feeds/__init__.py
+++ b/src/calibre/web/feeds/__init__.py
@@ -165,7 +165,9 @@ class Feed(object):
             if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article:
                 self.articles.append(article)
             else:
-                self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, article.localtime.strftime('%a, %d %b, %Y %H:%M'), self.title))
+                t = strftime(u'%a, %d %b, %Y %H:%M', article.localtime.timetuple())
+                self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'%
+                        (title, t, self.title))
             d = item.get('date', '')
             article.formatted_date = d
 

From b73e1b3da50810e151d10a5d62251754a077e605 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Tue, 14 Sep 2010 02:56:56 +1000
Subject: [PATCH 17/43] tweaked preprocess for $, added rtf to new preprocess
 logic, changed last pdf default

---
 src/calibre/ebooks/conversion/preprocess.py |  2 +-
 src/calibre/ebooks/rtf/input.py             | 13 +++----------
 src/calibre/gui2/convert/pdf_input.ui       |  2 +-
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index f6277956c8..9464be1210 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -340,7 +340,7 @@ class HTMLPreProcessor(object):
                 # print "The pdf line length returned is " + str(length)
                 end_rules.append(
                     # Un wrap using punctuation
-                    (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
+                    (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
                 )
 
         for rule in self.PREPROCESS + start_rules:
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 216ccf591d..d229b80c16 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -8,6 +8,7 @@ from lxml import etree
 
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.conversion.preprocess import line_length
+from calibre.ebooks.conversion.utils import PreProcessor
 
 class InlineClass(etree.XSLTExtension):
 
@@ -229,16 +230,8 @@ class RTFInput(InputFormatPlugin):
             res = transform.tostring(result)
             res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
             if self.options.preprocess_html:
-                self.log("*********  Preprocessing HTML  *********")
-                # Detect Chapters to match the xpath in the GUI
-                chapdetect = re.compile(r'<p[^>]*>\s*<span[^>]*>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)>(<(/i|b)>)?)?)\s*</span>\s*</p>', re.IGNORECASE)
-                res = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', res)
-                # Unwrap lines using punctation if the median length of all lines is less than 150
-                length = line_length('html', res, 0.4)
-                self.log("*** Median length is " + str(length) + " ***")
-                unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</span>\s*</p>\s*(?P<up2threeblanks><p[^>]*>\s*(<span[^>]*>\s*</span>\s*)</p>\s*){0,3}\s*<p[^>]*>\s*<span[^>]*>\s*" % length, re.UNICODE)
-                if length < 150:
-                    res = unwrap.sub(' ', res)
+                preprocessor = PreProcessor(res)
+                res = preprocessor(res)
             f.write(res)
         self.write_inline_css(inline_class)
         stream.seek(0)
diff --git a/src/calibre/gui2/convert/pdf_input.ui b/src/calibre/gui2/convert/pdf_input.ui
index 626c68ea63..b2ee421922 100644
--- a/src/calibre/gui2/convert/pdf_input.ui
+++ b/src/calibre/gui2/convert/pdf_input.ui
@@ -46,7 +46,7 @@
       <double>0.010000000000000</double>
      </property>
      <property name="value">
-      <double>0.500000000000000</double>
+      <double>0.450000000000000</double>
      </property>
     </widget>
    </item>

From 8b73bb52e8d551538d0c0e55e7b91b6b16f69977 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 13 Sep 2010 16:42:22 -0600
Subject: [PATCH 18/43] Fix #6802 (Sovos E Reader Not Recognised / Floppy Drive
 Activation)

---
 src/calibre/customize/builtins.py     |  3 ++-
 src/calibre/devices/teclast/driver.py | 11 +++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 4c87236e71..68df832048 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -459,7 +459,7 @@ from calibre.devices.iriver.driver import IRIVER_STORY
 from calibre.devices.binatone.driver import README
 from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
 from calibre.devices.edge.driver import EDGE
-from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS
+from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, SOVOS
 from calibre.devices.sne.driver import SNE
 from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, GEMEI
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
@@ -557,6 +557,7 @@ plugins += [
     TECLAST_K3,
     NEWSMY,
     IPAPYRUS,
+    SOVOS,
     EDGE,
     SNE,
     ALEX,
diff --git a/src/calibre/devices/teclast/driver.py b/src/calibre/devices/teclast/driver.py
index 0c60a367cf..2055ff9306 100644
--- a/src/calibre/devices/teclast/driver.py
+++ b/src/calibre/devices/teclast/driver.py
@@ -52,3 +52,14 @@ class IPAPYRUS(TECLAST_K3):
     VENDOR_NAME      = 'E_READER'
     WINDOWS_MAIN_MEM = ''
 
+class SOVOS(TECLAST_K3):
+
+    name = 'Sovos device interface'
+    gui_name = 'Sovos'
+    description    = _('Communicate with the Sovos reader.')
+
+    FORMATS = ['epub', 'fb2', 'pdf', 'txt']
+
+    VENDOR_NAME      = 'RK28XX'
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'USB-MSC'
+

From fb053fe3f37d531a170bb2a1d67ccf70ea030351 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 13 Sep 2010 16:58:09 -0600
Subject: [PATCH 19/43] Fix #6773 (Slightly broken CHM file)

---
 src/calibre/ebooks/chm/reader.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py
index 67a2d36607..831c16bf6a 100644
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@@ -132,7 +132,11 @@ class CHMReader(CHMFile):
         for path in self.Contents():
             lpath = os.path.join(output_dir, path)
             self._ensure_dir(lpath)
-            data = self.GetFile(path)
+            try:
+                data = self.GetFile(path)
+            except:
+                self.log.exception('Failed to extract %s from CHM, ignoring'%path)
+                continue
             if lpath.find(';') != -1:
                 # fix file names with ";<junk>" at the end, see _reformat()
                 lpath = lpath.split(';')[0]

From ba5de1c92d797abc1f82782c7e15bd61dfa387c5 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 13 Sep 2010 18:18:32 -0600
Subject: [PATCH 20/43] Conversion pipeline: When setting margins on <body>
 explicitly set padding to 0 to override and existing padding in the input
 document

---
 src/calibre/ebooks/oeb/transforms/flatcss.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py
index f48bdb9934..ffdc641d1e 100644
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@@ -138,6 +138,7 @@ class CSSFlattener(object):
                     float(self.context.margin_left))
             bs.append('margin-right : %fpt'%\
                     float(self.context.margin_right))
+            bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
             if self.context.change_justification != 'original':
                 bs.append('text-align: '+ self.context.change_justification)
             body.set('style', '; '.join(bs))

From c5063b8633506f3b661d3e3dcc84d7ec68e74345 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 13 Sep 2010 18:26:51 -0600
Subject: [PATCH 21/43] Fix #6804 (Timeout error when browsing content server
 via browser)

---
 resources/content_server/gui.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/content_server/gui.js b/resources/content_server/gui.js
index 631fb8b617..d0fb49cc8e 100644
--- a/resources/content_server/gui.js
+++ b/resources/content_server/gui.js
@@ -26,7 +26,7 @@ var current_library_request = null;
 
 ////////////////////////////// GET BOOK LIST //////////////////////////////
 
-var LIBRARY_FETCH_TIMEOUT = 30000; // milliseconds
+var LIBRARY_FETCH_TIMEOUT = 5*60000; // milliseconds
 
 function create_table_headers() {
     var thead = $('table#book_list thead tr');

From c5415bbe8012179b405f2c3ca3b5258e83a863b3 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 13 Sep 2010 19:11:38 -0600
Subject: [PATCH 22/43] Fix #6806 (--start-in-tray switch displays hidden
 windows in metacity, xfwm4 and compiz)

---
 src/calibre/gui2/cover_flow.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/gui2/cover_flow.py b/src/calibre/gui2/cover_flow.py
index 88bbae6c41..cb951b09be 100644
--- a/src/calibre/gui2/cover_flow.py
+++ b/src/calibre/gui2/cover_flow.py
@@ -155,6 +155,7 @@ class CoverFlowMixin(object):
             self.cb_splitter.action_toggle.triggered.connect(self.toggle_cover_browser)
             if CoverFlow is not None:
                 self.cover_flow.stop.connect(self.hide_cover_browser)
+            self.cover_flow.setVisible(False)
         else:
             self.cb_splitter.insertWidget(self.cb_splitter.side_index, self.cover_flow)
             if CoverFlow is not None:

From ba67e47c9260a1f813048ab0239ed78d5324e89a Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Mon, 13 Sep 2010 19:12:49 -0600
Subject: [PATCH 23/43] GwR wip book jacket

---
 src/calibre/devices/apple/driver.py          |   6 +-
 src/calibre/ebooks/oeb/transforms/flatcss.py |  16 +-
 src/calibre/ebooks/oeb/transforms/jacket.py  | 147 +++++++++++++------
 src/calibre/library/catalog.py               |   4 +
 4 files changed, 120 insertions(+), 53 deletions(-)

diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py
index e318d368ff..c9bc04a242 100644
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@@ -2342,8 +2342,10 @@ class ITUNES(DriverBase):
         if isosx:
             if DEBUG:
                 self.log.info("  deleting '%s' from iDevice" % cached_book['title'])
-            cached_book['dev_book'].delete()
-
+            try:
+                cached_book['dev_book'].delete()
+            except:
+                self.log.error("  error deleting '%s'" % cached_book['title'])
         elif iswindows:
             hit = self._find_device_book(cached_book)
             if hit:
diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py
index f48bdb9934..030c271362 100644
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@@ -146,7 +146,6 @@ class CSSFlattener(object):
                     extra_css=css)
             self.stylizers[item] = stylizer
 
-
     def baseline_node(self, node, stylizer, sizes, csize):
         csize = stylizer.style(node)['font-size']
         if node.text:
@@ -194,7 +193,7 @@ class CSSFlattener(object):
                         value = 0.0
                     cssdict[property] = "%0.5fem" % (value / fsize)
 
-    def flatten_node(self, node, stylizer, names, styles, psize, left=0):
+    def flatten_node(self, node, stylizer, names, styles, psize, item_id, left=0):
         if not isinstance(node.tag, basestring) \
            or namespace(node.tag) != XHTML_NS:
                return
@@ -286,15 +285,18 @@ class CSSFlattener(object):
         if self.lineh and 'line-height' not in cssdict:
             lineh = self.lineh / psize
             cssdict['line-height'] = "%0.5fem" % lineh
+
         if (self.context.remove_paragraph_spacing or
                 self.context.insert_blank_line) and tag in ('p', 'div'):
-            for prop in ('margin', 'padding', 'border'):
-                for edge in ('top', 'bottom'):
-                    cssdict['%s-%s'%(prop, edge)] = '0pt'
+            if item_id != 'jacket' or self.context.output_profile.name == 'Kindle':
+                for prop in ('margin', 'padding', 'border'):
+                    for edge in ('top', 'bottom'):
+                        cssdict['%s-%s'%(prop, edge)] = '0pt'
             if self.context.insert_blank_line:
                 cssdict['margin-top'] = cssdict['margin-bottom'] = '0.5em'
             if self.context.remove_paragraph_spacing:
                 cssdict['text-indent'] =  "%1.1fem" % self.context.remove_paragraph_spacing_indent_size
+
         if cssdict:
             items = cssdict.items()
             items.sort()
@@ -313,7 +315,7 @@ class CSSFlattener(object):
         if 'style' in node.attrib:
             del node.attrib['style']
         for child in node:
-            self.flatten_node(child, stylizer, names, styles, psize, left)
+            self.flatten_node(child, stylizer, names, styles, psize, item_id, left)
 
     def flatten_head(self, item, stylizer, href):
         html = item.data
@@ -360,7 +362,7 @@ class CSSFlattener(object):
             stylizer = self.stylizers[item]
             body = html.find(XHTML('body'))
             fsize = self.context.dest.fbase
-            self.flatten_node(body, stylizer, names, styles, fsize)
+            self.flatten_node(body, stylizer, names, styles, fsize, item.id)
         items = [(key, val) for (val, key) in styles.items()]
         items.sort()
         css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items)
diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py
index 030067850c..309a7fd7b6 100644
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@@ -13,6 +13,9 @@ from itertools import repeat
 from lxml import etree
 
 from calibre import guess_type, strftime
+from calibre.constants import __appname__, __version__
+from calibre.utils.date import now
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.oeb.base import XPath, XPNSMAP
 from calibre.library.comments import comments_to_html
 class Jacket(object):
@@ -29,13 +32,30 @@ class Jacket(object):
             <style type="text/css" media="screen">%(css)s</style>
         </head>
         <body>
-            <div class="banner">
-                <div class="meta_div">%(title)s</div>
-                <div class="meta_div">%(series)s</div>
-                <div class="meta_div">%(rating)s</div>
-                <div class="meta_div">%(tags)s</div>
+            <div class="cbj_banner">
+                <div class="cbj_title">%(title)s</div>
+                <table class="cbj_header">
+                  <tr class="cbj_series">
+                    <td class="cbj_label">Series:</td>
+                    <td class="cbj_content">%(series)s</td>
+                  </tr>
+                  <tr class="cbj_pubdate">
+                    <td class="cbj_label">Published:</td>
+                    <td class="cbj_content">%(pubdate)s</td>
+                  </tr>
+                  <tr class="cbj_rating">
+                    <td class="cbj_label">Rating:</td>
+                    <td class="cbj_content">%(rating)s</td>
+                  </tr>
+                  <tr class="cbj_tags">
+                    <td class="cbj_label">Tags:</td>
+                    <td class="cbj_content">%(tags)s</td>
+                  </tr>
+                </table>
+                <div class="cbj_footer">%(footer)s</div>
             </div>
-            <div class="comments">%(comments)s</div>
+            <hr class="cbj_kindle_banner_hr" />
+            <div class="cbj_comments">%(comments)s</div>
         </body>
     </html>
     ''')
@@ -56,7 +76,7 @@ class Jacket(object):
     def get_rating(self, rating):
         ans = ''
         if rating is None:
-            return
+            return ans
         try:
             num = float(rating)/2
         except:
@@ -65,19 +85,54 @@ class Jacket(object):
         num = min(num, 5)
         if num < 1:
             return ans
-        id, href = self.oeb.manifest.generate('star', 'star.png')
-        self.oeb.manifest.add(id, href, 'image/png', data=I('star.png', data=True))
-        ans = '<span class="rating">Rating: </span> ' + ''.join(repeat('<img style="vertical-align:text-top" alt="star" src="%s" />'%href, num))
+        if self.opts.output_profile.name == 'Kindle':
+            ans = '%s' % ''.join(repeat('&#9733;', num))
+        else:
+            id, href = self.oeb.manifest.generate('star', 'star.png')
+            self.oeb.manifest.add(id, href, 'image/png', data=I('star.png', data=True))
+            ans = '%s' % ''.join(repeat('<img style="vertical-align:text-bottom" alt="star" src="%s" />'%href, num))
         return ans
 
     def insert_metadata(self, mi):
         self.log('Inserting metadata into book...')
         jacket_resources = P("jacket")
 
-        if os.path.isdir(jacket_resources):
-            stylesheet = os.path.join(jacket_resources, 'stylesheet.css')
-            with open(stylesheet) as f:
-                css_data = f.read()
+        css_data = ''
+        stylesheet = os.path.join(jacket_resources, 'stylesheet.css')
+        with open(stylesheet) as f:
+            css = f.read()
+
+        try:
+            title_str = mi.title if mi.title else unicode(self.oeb.metadata.title[0])
+        except:
+            title_str = _('Unknown')
+        title = '<span class="title">%s</span>' % (escape(title_str))
+
+        series = escape(mi.series if mi.series else '')
+        if mi.series and mi.series_index is not None:
+            series += escape(' [%s]'%mi.format_series_index())
+        if not mi.series:
+            series = ''
+
+        try:
+            pubdate = strftime(u'%Y', mi.pubdate.timetuple())
+        except:
+            #pubdate = strftime(u'%Y', now())
+            pubdate = ''
+
+        rating = self.get_rating(mi.rating)
+
+        tags = mi.tags
+        if not tags:
+            try:
+                tags = map(unicode, self.oeb.metadata.subject)
+            except:
+                tags = []
+        if tags:
+            #tags = self.opts.dest.tags_to_string(tags)
+            tags = ', '.join(tags)
+        else:
+            tags = ''
 
         comments = mi.comments
         if not comments:
@@ -91,46 +146,50 @@ class Jacket(object):
         if comments:
             comments = comments_to_html(comments)
 
-        series = '<span class="meta_label">Series: </span><span class="series">%s</span>' % escape(mi.series if mi.series else '')
-        if mi.series and mi.series_index is not None:
-            series += '<span class="series">%s</span>' % escape(' [%s]'%mi.format_series_index())
-        if not mi.series:
-            series = ''
-
-        tags = mi.tags
-        if not tags:
-            try:
-                tags = map(unicode, self.oeb.metadata.subject)
-            except:
-                tags = []
-        if tags:
-            tags = '<span class="meta_label">Tags:</span><span class="tags">%s</span>' % self.opts.dest.tags_to_string(tags)
-        else:
-            tags = ''
-
-        try:
-            title_str = mi.title if mi.title else unicode(self.oeb.metadata.title[0])
-        except:
-            title_str = _('Unknown')
-        title = '<span class="title">%s</span><span class="pubdate"> (%s)</span>' % (escape(title_str), strftime(u'%Y', mi.pubdate.timetuple()))
-
+        footer = 'B<span class="cbj_smallcaps">OOK JACKET GENERATED BY %s %s</span>' % (__appname__.upper(),__version__)
 
         def generate_html(comments):
-            return self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'],
-                title=title, comments=comments,
-                series=series,
-                tags=tags, rating=self.get_rating(mi.rating),
-                css=css_data, title_str=title_str)
+            args = dict(xmlns=XPNSMAP['h'],
+                        title_str=title_str,
+                        css=css,
+                        title=title,
+                        pubdate=pubdate,
+                        series=series,
+                        rating=rating,
+                        tags=tags,
+                        comments=comments,
+                        footer = footer)
+
+            # Post-process the generated html to strip out empty header items
+            generated_html = self.JACKET_TEMPLATE % args
+            soup = BeautifulSoup(generated_html)
+            if not series:
+                series_tag = soup.find('tr', attrs={'class':'cbj_series'})
+                series_tag.extract()
+            if not rating:
+                rating_tag = soup.find('tr', attrs={'class':'cbj_rating'})
+                rating_tag.extract()
+            if not tags:
+                tags_tag = soup.find('tr', attrs={'class':'cbj_tags'})
+                tags_tag.extract()
+            if not pubdate:
+                pubdate_tag = soup.find('tr', attrs={'class':'cbj_pubdate'})
+                pubdate_tag.extract()
+            if self.opts.output_profile.name != 'Kindle':
+                hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'})
+                hr_tag.extract()
+
+            return soup.renderContents()
 
         id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml')
         from calibre.ebooks.oeb.base import RECOVER_PARSER, XPath
+
         try:
             root = etree.fromstring(generate_html(comments), parser=RECOVER_PARSER)
-#             print "root: %s" % etree.tostring(root, encoding='utf-8',
-#                     xml_declaration=True, pretty_print=True)
         except:
             root = etree.fromstring(generate_html(escape(orig_comments)),
                     parser=RECOVER_PARSER)
+
         jacket = XPath('//h:meta[@name="calibre-content" and @content="jacket"]')
         found = None
         for item in list(self.oeb.spine)[:4]:
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index bd2160aff1..ef7569bd88 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -2523,6 +2523,10 @@ class EPUB_MOBI(CatalogPlugin):
 
             # Fetch the database as a dictionary
             self.booksBySeries = self.plugin.search_sort_db(self.db, self.opts)
+            if not self.booksBySeries:
+                self.opts.generate_series = False
+                self.opts.log(" no series found in selected books, cancelling series generation")
+                return
 
             friendly_name = "Series"
 

From 6a3609f031bb9400630cd6418b278903a4883c8a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 13 Sep 2010 19:58:22 -0600
Subject: [PATCH 24/43] Implement #6808 (Feature request: ability to convert
 all single/double quotes to "smart quotes")

---
 src/calibre/ebooks/conversion/cli.py        |   2 +-
 src/calibre/ebooks/conversion/plumber.py    |   8 +
 src/calibre/ebooks/conversion/preprocess.py |  23 +-
 src/calibre/gui2/convert/look_and_feel.py   |   2 +-
 src/calibre/gui2/convert/look_and_feel.ui   |   9 +-
 src/calibre/utils/smartypants.py            | 899 ++++++++++++++++++++
 6 files changed, 933 insertions(+), 10 deletions(-)
 create mode 100755 src/calibre/utils/smartypants.py

diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py
index 7439718cf6..2ef633d0bb 100644
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@@ -122,7 +122,7 @@ def add_pipeline_options(parser, plumber):
                       'font_size_mapping',
                       'line_height',
                       'linearize_tables',
-                      'extra_css',
+                      'extra_css', 'smarten_punctuation',
                       'margin_top', 'margin_left', 'margin_right',
                       'margin_bottom', 'change_justification',
                       'insert_blank_line', 'remove_paragraph_spacing','remove_paragraph_spacing_indent_size',
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 24b35f804f..16282dd28d 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -362,6 +362,14 @@ OptionRecommendation(name='preprocess_html',
             )
         ),
 
+OptionRecommendation(name='smarten_punctuation',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Convert plain quotes, dashes and ellipsis to their '
+            'typographically correct equivalents. For details, see '
+            'http://daringfireball.net/projects/smartypants'
+            )
+        ),
+
 OptionRecommendation(name='remove_header',
         recommended_value=False, level=OptionRecommendation.LOW,
         help=_('Use a regular expression to try and remove the header.'
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 7742a20a21..4538af96c4 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -221,7 +221,7 @@ class HTMLPreProcessor(object):
                   (re.compile(u'˛\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ą'),
                   (re.compile(u'˛\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ę'),
                   (re.compile(u'˛\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'Ę'),
-                  
+
                   # ˙
                   (re.compile(u'˙\s*(<br.*?>)*\s*z', re.UNICODE), lambda match: u'ż'),
                   (re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'),
@@ -244,14 +244,14 @@ class HTMLPreProcessor(object):
                   (re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head),
                   # Cover the case where every letter in a chapter title is separated by a space
                   (re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head),
-                  
+
                   # Have paragraphs show better
                   (re.compile(r'<br.*?>'), lambda match : '<p>'),
                   # Clean up spaces
                   (re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '),
                   # Add space before and after italics
                   (re.compile(u'(?<!“)<i>'), lambda match: ' <i>'),
-                  (re.compile(r'</i>(?=\w)'), lambda match: '</i> '),                            
+                  (re.compile(r'</i>(?=\w)'), lambda match: '</i> '),
                  ]
 
     # Fix Book Designer markup
@@ -328,7 +328,7 @@ class HTMLPreProcessor(object):
                 import traceback
                 print 'Failed to parse remove_footer regexp'
                 traceback.print_exc()
-      
+
         # unwrap hyphenation - moved here so it's executed after header/footer removal
         if is_pdftohtml:
             # unwrap visible dashes and hyphens - don't delete they are often hyphens for
@@ -338,13 +338,13 @@ class HTMLPreProcessor(object):
             end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: ''))
             # unwrap/delete soft hyphens with formatting
             end_rules.append((re.compile(u'[­]\s*(</(i|u|b)>)+(\s*<p>)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: ''))
-        
-        # Make the more aggressive chapter marking regex optional with the preprocess option to 
+
+        # Make the more aggressive chapter marking regex optional with the preprocess option to
         # reduce false positives and move after header/footer removal
         if getattr(self.extra_opts, 'preprocess_html', None):
             if is_pdftohtml:
                 end_rules.append((re.compile(r'<p>\s*(?P<chap>(<[ibu]>){0,2}\s*([A-Z \'"!]{3,})\s*([\dA-Z:]+\s){0,4}\s*(</[ibu]>){0,2})\s*<p>\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<p>)?'), chap_head),)
-                
+
         if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
             length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor'))
             if length:
@@ -401,5 +401,14 @@ class HTMLPreProcessor(object):
         if self.plugin_preprocess:
             html = self.input_plugin_preprocess(html)
 
+        if getattr(self.extra_opts, 'smarten_punctuation', False):
+            html = self.smarten_punctuation(html)
+
         return html
 
+    def smarten_punctuation(self, html):
+        from calibre.utils.smartypants import smartyPants
+        from calibre.ebooks.chardet import substitute_entites
+        html = smartyPants(html)
+        return substitute_entites(html)
+
diff --git a/src/calibre/gui2/convert/look_and_feel.py b/src/calibre/gui2/convert/look_and_feel.py
index b0403bf1dd..ec3f0b944d 100644
--- a/src/calibre/gui2/convert/look_and_feel.py
+++ b/src/calibre/gui2/convert/look_and_feel.py
@@ -22,7 +22,7 @@ class LookAndFeelWidget(Widget, Ui_Form):
         Widget.__init__(self, parent,
                 ['change_justification', 'extra_css', 'base_font_size',
                     'font_size_mapping', 'line_height',
-                    'linearize_tables',
+                    'linearize_tables', 'smarten_punctuation',
                     'disable_font_rescaling', 'insert_blank_line',
                     'remove_paragraph_spacing', 'remove_paragraph_spacing_indent_size','input_encoding',
                     'asciiize', 'keep_ligatures']
diff --git a/src/calibre/gui2/convert/look_and_feel.ui b/src/calibre/gui2/convert/look_and_feel.ui
index de48e7caf9..c683300854 100644
--- a/src/calibre/gui2/convert/look_and_feel.ui
+++ b/src/calibre/gui2/convert/look_and_feel.ui
@@ -178,7 +178,7 @@
      </property>
     </widget>
    </item>
-   <item row="9" column="0" colspan="4">
+   <item row="10" column="0" colspan="4">
     <widget class="QGroupBox" name="groupBox">
      <property name="title">
       <string>Extra &amp;CSS</string>
@@ -214,6 +214,13 @@
      </property>
     </widget>
    </item>
+   <item row="9" column="0">
+    <widget class="QCheckBox" name="opt_smarten_punctuation">
+     <property name="text">
+      <string>Smarten &amp;punctuation</string>
+     </property>
+    </widget>
+   </item>
   </layout>
  </widget>
  <resources>
diff --git a/src/calibre/utils/smartypants.py b/src/calibre/utils/smartypants.py
new file mode 100755
index 0000000000..44aac4de8c
--- /dev/null
+++ b/src/calibre/utils/smartypants.py
@@ -0,0 +1,899 @@
+#!/usr/bin/python
+
+r"""
+==============
+smartypants.py
+==============
+
+----------------------------
+SmartyPants ported to Python
+----------------------------
+
+Ported by `Chad Miller`_
+Copyright (c) 2004, 2007 Chad Miller
+
+original `SmartyPants`_ by `John Gruber`_
+Copyright (c) 2003 John Gruber
+
+
+Synopsis
+========
+
+A smart-quotes plugin for Pyblosxom_.
+
+The priginal "SmartyPants" is a free web publishing plug-in for Movable Type,
+Blosxom, and BBEdit that easily translates plain ASCII punctuation characters
+into "smart" typographic punctuation HTML entities.
+
+This software, *smartypants.py*, endeavours to be a functional port of
+SmartyPants to Python, for use with Pyblosxom_.
+
+
+Description
+===========
+
+SmartyPants can perform the following transformations:
+
+- Straight quotes ( " and ' ) into "curly" quote HTML entities
+- Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities
+- Dashes (``--`` and ``---``) into en- and em-dash entities
+- Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity
+
+This means you can write, edit, and save your posts using plain old
+ASCII straight quotes, plain dashes, and plain dots, but your published
+posts (and final HTML output) will appear with smart quotes, em-dashes,
+and proper ellipses.
+
+SmartyPants does not modify characters within ``<pre>``, ``<code>``, ``<kbd>``,
+``<math>`` or ``<script>`` tag blocks. Typically, these tags are used to
+display text where smart quotes and other "smart punctuation" would not be
+appropriate, such as source code or example markup.
+
+
+Backslash Escapes
+=================
+
+If you need to use literal straight quotes (or plain hyphens and
+periods), SmartyPants accepts the following backslash escape sequences
+to force non-smart punctuation. It does so by transforming the escape
+sequence into a decimal-encoded HTML entity:
+
+(FIXME:  table here.)
+
+.. comment    It sucks that there's a disconnect between the visual layout and table markup when special characters are involved.
+.. comment ======  =====  =========
+.. comment Escape  Value  Character
+.. comment ======  =====  =========
+.. comment \\\\\\\\    &#92;  \\\\
+.. comment \\\\"     &#34;  "
+.. comment \\\\'     &#39;  '
+.. comment \\\\.     &#46;  .
+.. comment \\\\-     &#45;  \-
+.. comment \\\\`     &#96;  \`
+.. comment ======  =====  =========
+
+This is useful, for example, when you want to use straight quotes as
+foot and inch marks: 6'2" tall; a 17" iMac.
+
+Options
+=======
+
+For Pyblosxom users, the ``smartypants_attributes`` attribute is where you
+specify configuration options.
+
+Numeric values are the easiest way to configure SmartyPants' behavior:
+
+"0"
+	Suppress all transformations. (Do nothing.)
+"1"
+	Performs default SmartyPants transformations: quotes (including
+	\`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash)
+	is used to signify an em-dash; there is no support for en-dashes.
+
+"2"
+	Same as smarty_pants="1", except that it uses the old-school typewriter
+	shorthand for dashes:  "``--``" (dash dash) for en-dashes, "``---``"
+	(dash dash dash)
+	for em-dashes.
+
+"3"
+	Same as smarty_pants="2", but inverts the shorthand for dashes:
+	"``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for
+	en-dashes.
+
+"-1"
+	Stupefy mode. Reverses the SmartyPants transformation process, turning
+	the HTML entities produced by SmartyPants into their ASCII equivalents.
+	E.g.  "&#8220;" is turned into a simple double-quote ("), "&#8212;" is
+	turned into two dashes, etc.
+
+
+The following single-character attribute values can be combined to toggle
+individual transformations from within the smarty_pants attribute. For
+example, to educate normal quotes and em-dashes, but not ellipses or
+\`\`backticks'' -style quotes:
+
+``py['smartypants_attributes'] = "1"``
+
+"q"
+	Educates normal quote characters: (") and (').
+
+"b"
+	Educates \`\`backticks'' -style double quotes.
+
+"B"
+	Educates \`\`backticks'' -style double quotes and \`single' quotes.
+
+"d"
+	Educates em-dashes.
+
+"D"
+	Educates em-dashes and en-dashes, using old-school typewriter shorthand:
+	(dash dash) for en-dashes, (dash dash dash) for em-dashes.
+
+"i"
+	Educates em-dashes and en-dashes, using inverted old-school typewriter
+	shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes.
+
+"e"
+	Educates ellipses.
+
+"w"
+	Translates any instance of ``&quot;`` into a normal double-quote character.
+	This should be of no interest to most people, but of particular interest
+	to anyone who writes their posts using Dreamweaver, as Dreamweaver
+	inexplicably uses this entity to represent a literal double-quote
+	character. SmartyPants only educates normal quotes, not entities (because
+	ordinarily, entities are used for the explicit purpose of representing the
+	specific character they represent). The "w" option must be used in
+	conjunction with one (or both) of the other quote options ("q" or "b").
+	Thus, if you wish to apply all SmartyPants transformations (quotes, en-
+	and em-dashes, and ellipses) and also translate ``&quot;`` entities into
+	regular quotes so SmartyPants can educate them, you should pass the
+	following to the smarty_pants attribute:
+
+The ``smartypants_forbidden_flavours`` list contains pyblosxom flavours for
+which no Smarty Pants rendering will occur.
+
+
+Caveats
+=======
+
+Why You Might Not Want to Use Smart Quotes in Your Weblog
+---------------------------------------------------------
+
+For one thing, you might not care.
+
+Most normal, mentally stable individuals do not take notice of proper
+typographic punctuation. Many design and typography nerds, however, break
+out in a nasty rash when they encounter, say, a restaurant sign that uses
+a straight apostrophe to spell "Joe's".
+
+If you're the sort of person who just doesn't care, you might well want to
+continue not caring. Using straight quotes -- and sticking to the 7-bit
+ASCII character set in general -- is certainly a simpler way to live.
+
+Even if you I *do* care about accurate typography, you still might want to
+think twice before educating the quote characters in your weblog. One side
+effect of publishing curly quote HTML entities is that it makes your
+weblog a bit harder for others to quote from using copy-and-paste. What
+happens is that when someone copies text from your blog, the copied text
+contains the 8-bit curly quote characters (as well as the 8-bit characters
+for em-dashes and ellipses, if you use these options). These characters
+are not standard across different text encoding methods, which is why they
+need to be encoded as HTML entities.
+
+People copying text from your weblog, however, may not notice that you're
+using curly quotes, and they'll go ahead and paste the unencoded 8-bit
+characters copied from their browser into an email message or their own
+weblog. When pasted as raw "smart quotes", these characters are likely to
+get mangled beyond recognition.
+
+That said, my own opinion is that any decent text editor or email client
+makes it easy to stupefy smart quote characters into their 7-bit
+equivalents, and I don't consider it my problem if you're using an
+indecent text editor or email client.
+
+
+Algorithmic Shortcomings
+------------------------
+
+One situation in which quotes will get curled the wrong way is when
+apostrophes are used at the start of leading contractions. For example:
+
+``'Twas the night before Christmas.``
+
+In the case above, SmartyPants will turn the apostrophe into an opening
+single-quote, when in fact it should be a closing one. I don't think
+this problem can be solved in the general case -- every word processor
+I've tried gets this wrong as well. In such cases, it's best to use the
+proper HTML entity for closing single-quotes (``&#8217;``) by hand.
+
+
+Bugs
+====
+
+To file bug reports or feature requests (other than topics listed in the
+Caveats section above) please send email to: mailto:smartypantspy@chad.org
+
+If the bug involves quotes being curled the wrong way, please send example
+text to illustrate.
+
+To Do list
+----------
+
+- Provide a function for use within templates to quote anything at all.
+
+
+Version History
+===============
+
+1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400
+	- Fixed bug where blocks of precious unalterable text was instead
+	  interpreted.  Thanks to Le Roux and Dirk van Oosterbosch.
+
+1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400
+	- Fix bogus magical quotation when there is no hint that the
+	  user wants it, e.g., in "21st century".  Thanks to Nathan Hamblen.
+	- Be smarter about quotes before terminating numbers in an en-dash'ed
+	  range.
+
+1.5_1.4: Thu, 10 Feb 2005 20:24:36 -0500
+	- Fix a date-processing bug, as reported by jacob childress.
+	- Begin a test-suite for ensuring correct output.
+	- Removed import of "string", since I didn't really need it.
+	  (This was my first every Python program.  Sue me!)
+
+1.5_1.3: Wed, 15 Sep 2004 18:25:58 -0400
+	- Abort processing if the flavour is in forbidden-list.  Default of
+	  [ "rss" ]   (Idea of Wolfgang SCHNERRING.)
+	- Remove stray virgules from en-dashes.  Patch by Wolfgang SCHNERRING.
+
+1.5_1.2: Mon, 24 May 2004 08:14:54 -0400
+	- Some single quotes weren't replaced properly.  Diff-tesuji played
+	  by Benjamin GEIGER.
+
+1.5_1.1: Sun, 14 Mar 2004 14:38:28 -0500
+	- Support upcoming pyblosxom 0.9 plugin verification feature.
+
+1.5_1.0: Tue, 09 Mar 2004 08:08:35 -0500
+	- Initial release
+
+Version Information
+-------------------
+
+Version numbers will track the SmartyPants_ version numbers, with the addition
+of an underscore and the smartypants.py version on the end.
+
+New versions will be available at `http://wiki.chad.org/SmartyPantsPy`_
+
+.. _http://wiki.chad.org/SmartyPantsPy: http://wiki.chad.org/SmartyPantsPy
+
+Authors
+=======
+
+`John Gruber`_ did all of the hard work of writing this software in Perl for
+`Movable Type`_ and almost all of this useful documentation.  `Chad Miller`_
+ported it to Python to use with Pyblosxom_.
+
+
+Additional Credits
+==================
+
+Portions of the SmartyPants original work are based on Brad Choate's nifty
+MTRegex plug-in.  `Brad Choate`_ also contributed a few bits of source code to
+this plug-in.  Brad Choate is a fine hacker indeed.
+
+`Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta
+testing of the original SmartyPants.
+
+`Rael Dornfest`_ ported SmartyPants to Blosxom.
+
+.. _Brad Choate: http://bradchoate.com/
+.. _Jeremy Hedley: http://antipixel.com/
+.. _Charles Wiltgen: http://playbacktime.com/
+.. _Rael Dornfest: http://raelity.org/
+
+
+Copyright and License
+=====================
+
+SmartyPants_ license::
+
+	Copyright (c) 2003 John Gruber
+	(http://daringfireball.net/)
+	All rights reserved.
+
+	Redistribution and use in source and binary forms, with or without
+	modification, are permitted provided that the following conditions are
+	met:
+
+	*   Redistributions of source code must retain the above copyright
+		notice, this list of conditions and the following disclaimer.
+
+	*   Redistributions in binary form must reproduce the above copyright
+		notice, this list of conditions and the following disclaimer in
+		the documentation and/or other materials provided with the
+		distribution.
+
+	*   Neither the name "SmartyPants" nor the names of its contributors
+		may be used to endorse or promote products derived from this
+		software without specific prior written permission.
+
+	This software is provided by the copyright holders and contributors "as
+	is" and any express or implied warranties, including, but not limited
+	to, the implied warranties of merchantability and fitness for a
+	particular purpose are disclaimed. In no event shall the copyright
+	owner or contributors be liable for any direct, indirect, incidental,
+	special, exemplary, or consequential damages (including, but not
+	limited to, procurement of substitute goods or services; loss of use,
+	data, or profits; or business interruption) however caused and on any
+	theory of liability, whether in contract, strict liability, or tort
+	(including negligence or otherwise) arising in any way out of the use
+	of this software, even if advised of the possibility of such damage.
+
+
+smartypants.py license::
+
+	smartypants.py is a derivative work of SmartyPants.
+
+	Redistribution and use in source and binary forms, with or without
+	modification, are permitted provided that the following conditions are
+	met:
+
+	*   Redistributions of source code must retain the above copyright
+		notice, this list of conditions and the following disclaimer.
+
+	*   Redistributions in binary form must reproduce the above copyright
+		notice, this list of conditions and the following disclaimer in
+		the documentation and/or other materials provided with the
+		distribution.
+
+	This software is provided by the copyright holders and contributors "as
+	is" and any express or implied warranties, including, but not limited
+	to, the implied warranties of merchantability and fitness for a
+	particular purpose are disclaimed. In no event shall the copyright
+	owner or contributors be liable for any direct, indirect, incidental,
+	special, exemplary, or consequential damages (including, but not
+	limited to, procurement of substitute goods or services; loss of use,
+	data, or profits; or business interruption) however caused and on any
+	theory of liability, whether in contract, strict liability, or tort
+	(including negligence or otherwise) arising in any way out of the use
+	of this software, even if advised of the possibility of such damage.
+
+
+
+.. _John Gruber: http://daringfireball.net/
+.. _Chad Miller: http://web.chad.org/
+
+.. _Pyblosxom: http://roughingit.subtlehints.net/pyblosxom
+.. _SmartyPants: http://daringfireball.net/projects/smartypants/
+.. _Movable Type: http://www.movabletype.org/
+
+"""
+
+default_smartypants_attr = "1"
+
+import re
+
+tags_to_skip_regex = re.compile(r"<(/)?(pre|code|kbd|script|math)[^>]*>", re.I)
+
+
+def verify_installation(request):
+	return 1
+	# assert the plugin is functional
+
+
+def cb_story(args):
+	global default_smartypants_attr
+
+	try:
+		forbidden_flavours = args["entry"]["smartypants_forbidden_flavours"]
+	except KeyError:
+		forbidden_flavours = [ "rss" ]
+
+	try:
+		attributes = args["entry"]["smartypants_attributes"]
+	except KeyError:
+		attributes = default_smartypants_attr
+
+	if attributes is None:
+		attributes = default_smartypants_attr
+
+	entryData = args["entry"].getData()
+
+	try:
+		if args["request"]["flavour"] in forbidden_flavours:
+			return
+	except KeyError:
+		if "&lt;" in args["entry"]["body"][0:15]:  # sniff the stream
+			return  # abort if it looks like escaped HTML.  FIXME
+
+	# FIXME: make these configurable, perhaps?
+	args["entry"]["body"] = smartyPants(entryData, attributes)
+	args["entry"]["title"] = smartyPants(args["entry"]["title"], attributes)
+
+
+### interal functions below here
+
+def smartyPants(text, attr=default_smartypants_attr):
+	convert_quot = False  # should we translate &quot; entities into normal quotes?
+
+	# Parse attributes:
+	# 0 : do nothing
+	# 1 : set all
+	# 2 : set all, using old school en- and em- dash shortcuts
+	# 3 : set all, using inverted old school en and em- dash shortcuts
+	#
+	# q : quotes
+	# b : backtick quotes (``double'' only)
+	# B : backtick quotes (``double'' and `single')
+	# d : dashes
+	# D : old school dashes
+	# i : inverted old school dashes
+	# e : ellipses
+	# w : convert &quot; entities to " for Dreamweaver users
+
+	skipped_tag_stack = []
+	do_dashes = "0"
+	do_backticks = "0"
+	do_quotes = "0"
+	do_ellipses = "0"
+	do_stupefy = "0"
+
+	if attr == "0":
+		# Do nothing.
+		return text
+	elif attr == "1":
+		do_quotes    = "1"
+		do_backticks = "1"
+		do_dashes    = "1"
+		do_ellipses  = "1"
+	elif attr == "2":
+		# Do everything, turn all options on, use old school dash shorthand.
+		do_quotes    = "1"
+		do_backticks = "1"
+		do_dashes    = "2"
+		do_ellipses  = "1"
+	elif attr == "3":
+		# Do everything, turn all options on, use inverted old school dash shorthand.
+		do_quotes    = "1"
+		do_backticks = "1"
+		do_dashes    = "3"
+		do_ellipses  = "1"
+	elif attr == "-1":
+		# Special "stupefy" mode.
+		do_stupefy   = "1"
+	else:
+		for c in attr:
+			if c == "q": do_quotes = "1"
+			elif c == "b": do_backticks = "1"
+			elif c == "B": do_backticks = "2"
+			elif c == "d": do_dashes = "1"
+			elif c == "D": do_dashes = "2"
+			elif c == "i": do_dashes = "3"
+			elif c == "e": do_ellipses = "1"
+			elif c == "w": convert_quot = "1"
+			else:
+				pass
+				# ignore unknown option
+
+	tokens = _tokenize(text)
+	result = []
+	in_pre = False
+
+	prev_token_last_char = ""
+	# This is a cheat, used to get some context
+	# for one-character tokens that consist of
+	# just a quote char. What we do is remember
+	# the last character of the previous text
+	# token, to use as context to curl single-
+	# character quote tokens correctly.
+
+	for cur_token in tokens:
+		if cur_token[0] == "tag":
+			# Don't mess with quotes inside some tags.  This does not handle self <closing/> tags!
+			result.append(cur_token[1])
+			skip_match = tags_to_skip_regex.match(cur_token[1])
+			if skip_match is not None:
+				if not skip_match.group(1):
+					skipped_tag_stack.append(skip_match.group(2).lower())
+					in_pre = True
+				else:
+					if len(skipped_tag_stack) > 0:
+						if skip_match.group(2).lower() == skipped_tag_stack[-1]:
+							skipped_tag_stack.pop()
+						else:
+							pass
+							# This close doesn't match the open.  This isn't XHTML.  We should barf here.
+					if len(skipped_tag_stack) == 0:
+						in_pre = False
+		else:
+			t = cur_token[1]
+			last_char = t[-1:] # Remember last char of this token before processing.
+			if not in_pre:
+				t = processEscapes(t)
+
+				if convert_quot != "0":
+					t = re.sub('&quot;', '"', t)
+
+				if do_dashes != "0":
+					if do_dashes == "1":
+						t = educateDashes(t)
+					if do_dashes == "2":
+						t = educateDashesOldSchool(t)
+					if do_dashes == "3":
+						t = educateDashesOldSchoolInverted(t)
+
+				if do_ellipses != "0":
+					t = educateEllipses(t)
+
+				# Note: backticks need to be processed before quotes.
+				if do_backticks != "0":
+					t = educateBackticks(t)
+
+				if do_backticks == "2":
+					t = educateSingleBackticks(t)
+
+				if do_quotes != "0":
+					if t == "'":
+						# Special case: single-character ' token
+						if re.match("\S", prev_token_last_char):
+							t = "&#8217;"
+						else:
+							t = "&#8216;"
+					elif t == '"':
+						# Special case: single-character " token
+						if re.match("\S", prev_token_last_char):
+							t = "&#8221;"
+						else:
+							t = "&#8220;"
+
+					else:
+						# Normal case:
+						t = educateQuotes(t)
+
+				if do_stupefy == "1":
+					t = stupefyEntities(t)
+
+			prev_token_last_char = last_char
+			result.append(t)
+
+	return "".join(result)
+
+
+def educateQuotes(str):
+	"""
+	Parameter:  String.
+
+	Returns:	The string, with "educated" curly quote HTML entities.
+
+	Example input:  "Isn't this fun?"
+	Example output: &#8220;Isn&#8217;t this fun?&#8221;
+	"""
+
+	punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
+
+	# Special case if the very first character is a quote
+	# followed by punctuation at a non-word-break. Close the quotes by brute force:
+	str = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""&#8217;""", str)
+	str = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""&#8221;""", str)
+
+	# Special case for double sets of quotes, e.g.:
+	#   <p>He said, "'Quoted' words in a larger quote."</p>
+	str = re.sub(r""""'(?=\w)""", """&#8220;&#8216;""", str)
+	str = re.sub(r"""'"(?=\w)""", """&#8216;&#8220;""", str)
+
+	# Special case for decade abbreviations (the '80s):
+	str = re.sub(r"""\b'(?=\d{2}s)""", r"""&#8217;""", str)
+
+	close_class = r"""[^\ \t\r\n\[\{\(\-]"""
+	dec_dashes = r"""&#8211;|&#8212;"""
+
+	# Get most opening single quotes:
+	opening_single_quotes_regex = re.compile(r"""
+			(
+				\s          |   # a whitespace char, or
+				&nbsp;      |   # a non-breaking space entity, or
+				--          |   # dashes, or
+				&[mn]dash;  |   # named dash entities
+				%s          |   # or decimal entities
+				&\#x201[34];    # or hex
+			)
+			'                 # the quote
+			(?=\w)            # followed by a word character
+			""" % (dec_dashes,), re.VERBOSE)
+	str = opening_single_quotes_regex.sub(r"""\1&#8216;""", str)
+
+	closing_single_quotes_regex = re.compile(r"""
+			(%s)
+			'
+			(?!\s | s\b | \d)
+			""" % (close_class,), re.VERBOSE)
+	str = closing_single_quotes_regex.sub(r"""\1&#8217;""", str)
+
+	closing_single_quotes_regex = re.compile(r"""
+			(%s)
+			'
+			(\s | s\b)
+			""" % (close_class,), re.VERBOSE)
+	str = closing_single_quotes_regex.sub(r"""\1&#8217;\2""", str)
+
+	# Any remaining single quotes should be opening ones:
+	str = re.sub(r"""'""", r"""&#8216;""", str)
+
+	# Get most opening double quotes:
+	opening_double_quotes_regex = re.compile(r"""
+			(
+				\s          |   # a whitespace char, or
+				&nbsp;      |   # a non-breaking space entity, or
+				--          |   # dashes, or
+				&[mn]dash;  |   # named dash entities
+				%s          |   # or decimal entities
+				&\#x201[34];    # or hex
+			)
+			"                 # the quote
+			(?=\w)            # followed by a word character
+			""" % (dec_dashes,), re.VERBOSE)
+	str = opening_double_quotes_regex.sub(r"""\1&#8220;""", str)
+
+	# Double closing quotes:
+	closing_double_quotes_regex = re.compile(r"""
+			#(%s)?   # character that indicates the quote should be closing
+			"
+			(?=\s)
+			""" % (close_class,), re.VERBOSE)
+	str = closing_double_quotes_regex.sub(r"""&#8221;""", str)
+
+	closing_double_quotes_regex = re.compile(r"""
+			(%s)   # character that indicates the quote should be closing
+			"
+			""" % (close_class,), re.VERBOSE)
+	str = closing_double_quotes_regex.sub(r"""\1&#8221;""", str)
+
+	# Any remaining quotes should be opening ones.
+	str = re.sub(r'"', r"""&#8220;""", str)
+
+	return str
+
+
+def educateBackticks(str):
+	"""
+	Parameter:  String.
+	Returns:    The string, with ``backticks'' -style double quotes
+	            translated into HTML curly quote entities.
+	Example input:  ``Isn't this fun?''
+	Example output: &#8220;Isn't this fun?&#8221;
+	"""
+
+	str = re.sub(r"""``""", r"""&#8220;""", str)
+	str = re.sub(r"""''""", r"""&#8221;""", str)
+	return str
+
+
+def educateSingleBackticks(str):
+	"""
+	Parameter:  String.
+	Returns:    The string, with `backticks' -style single quotes
+	            translated into HTML curly quote entities.
+
+	Example input:  `Isn't this fun?'
+	Example output: &#8216;Isn&#8217;t this fun?&#8217;
+	"""
+
+	str = re.sub(r"""`""", r"""&#8216;""", str)
+	str = re.sub(r"""'""", r"""&#8217;""", str)
+	return str
+
+
+def educateDashes(str):
+	"""
+	Parameter:  String.
+
+	Returns:    The string, with each instance of "--" translated to
+	            an em-dash HTML entity.
+	"""
+
+	str = re.sub(r"""---""", r"""&#8211;""", str) # en  (yes, backwards)
+	str = re.sub(r"""--""", r"""&#8212;""", str) # em (yes, backwards)
+	return str
+
+
+def educateDashesOldSchool(str):
+	"""
+	Parameter:  String.
+
+	Returns:    The string, with each instance of "--" translated to
+	            an en-dash HTML entity, and each "---" translated to
+	            an em-dash HTML entity.
+	"""
+
+	str = re.sub(r"""---""", r"""&#8212;""", str)    # em (yes, backwards)
+	str = re.sub(r"""--""", r"""&#8211;""", str)    # en (yes, backwards)
+	return str
+
+
+def educateDashesOldSchoolInverted(str):
+	"""
+	Parameter:  String.
+
+	Returns:    The string, with each instance of "--" translated to
+	            an em-dash HTML entity, and each "---" translated to
+	            an en-dash HTML entity. Two reasons why: First, unlike the
+	            en- and em-dash syntax supported by
+	            EducateDashesOldSchool(), it's compatible with existing
+	            entries written before SmartyPants 1.1, back when "--" was
+	            only used for em-dashes.  Second, em-dashes are more
+	            common than en-dashes, and so it sort of makes sense that
+	            the shortcut should be shorter to type. (Thanks to Aaron
+	            Swartz for the idea.)
+	"""
+	str = re.sub(r"""---""", r"""&#8211;""", str)    # em
+	str = re.sub(r"""--""", r"""&#8212;""", str)    # en
+	return str
+
+
+
+def educateEllipses(str):
+	"""
+	Parameter:  String.
+	Returns:    The string, with each instance of "..." translated to
+	            an ellipsis HTML entity.
+
+	Example input:  Huh...?
+	Example output: Huh&#8230;?
+	"""
+
+	str = re.sub(r"""\.\.\.""", r"""&#8230;""", str)
+	str = re.sub(r"""\. \. \.""", r"""&#8230;""", str)
+	return str
+
+
+def stupefyEntities(str):
+	"""
+	Parameter:  String.
+	Returns:    The string, with each SmartyPants HTML entity translated to
+	            its ASCII counterpart.
+
+	Example input:  &#8220;Hello &#8212; world.&#8221;
+	Example output: "Hello -- world."
+	"""
+
+	str = re.sub(r"""&#8211;""", r"""-""", str)  # en-dash
+	str = re.sub(r"""&#8212;""", r"""--""", str) # em-dash
+
+	str = re.sub(r"""&#8216;""", r"""'""", str)  # open single quote
+	str = re.sub(r"""&#8217;""", r"""'""", str)  # close single quote
+
+	str = re.sub(r"""&#8220;""", r'''"''', str)  # open double quote
+	str = re.sub(r"""&#8221;""", r'''"''', str)  # close double quote
+
+	str = re.sub(r"""&#8230;""", r"""...""", str)# ellipsis
+
+	return str
+
+
+def processEscapes(str):
+	r"""
+	Parameter:  String.
+	Returns:    The string, with after processing the following backslash
+	            escape sequences. This is useful if you want to force a "dumb"
+	            quote or other character to appear.
+
+	            Escape  Value
+	            ------  -----
+	            \\      &#92;
+	            \"      &#34;
+	            \'      &#39;
+	            \.      &#46;
+	            \-      &#45;
+	            \`      &#96;
+	"""
+	str = re.sub(r"""\\\\""", r"""&#92;""", str)
+	str = re.sub(r'''\\"''', r"""&#34;""", str)
+	str = re.sub(r"""\\'""", r"""&#39;""", str)
+	str = re.sub(r"""\\\.""", r"""&#46;""", str)
+	str = re.sub(r"""\\-""", r"""&#45;""", str)
+	str = re.sub(r"""\\`""", r"""&#96;""", str)
+
+	return str
+
+
+def _tokenize(str):
+	"""
+	Parameter:  String containing HTML markup.
+	Returns:    Reference to an array of the tokens comprising the input
+	            string. Each token is either a tag (possibly with nested,
+	            tags contained therein, such as <a href="<MTFoo>">, or a
+	            run of text between tags. Each element of the array is a
+	            two-element array; the first is either 'tag' or 'text';
+	            the second is the actual value.
+
+	Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin.
+	    <http://www.bradchoate.com/past/mtregex.php>
+	"""
+
+	tokens = []
+
+	#depth = 6
+	#nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth)
+	#match = r"""(?: <! ( -- .*? -- \s* )+ > ) |  # comments
+	#		(?: <\? .*? \?> ) |  # directives
+	#		%s  # nested tags       """ % (nested_tags,)
+	tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""")
+
+	token_match = tag_soup.search(str)
+
+	previous_end = 0
+	while token_match is not None:
+		if token_match.group(1):
+			tokens.append(['text', token_match.group(1)])
+
+		tokens.append(['tag', token_match.group(2)])
+
+		previous_end = token_match.end()
+		token_match = tag_soup.search(str, token_match.end())
+
+	if previous_end < len(str):
+		tokens.append(['text', str[previous_end:]])
+
+	return tokens
+
+
+
+if __name__ == "__main__":
+
+	import locale
+
+	try:
+		locale.setlocale(locale.LC_ALL, '')
+	except:
+		pass
+
+	from docutils.core import publish_string
+	docstring_html = publish_string(__doc__, writer_name='html')
+
+	print docstring_html
+
+
+	# Unit test output goes out stderr.  No worries.
+	import unittest
+	sp = smartyPants
+
+	class TestSmartypantsAllAttributes(unittest.TestCase):
+		# the default attribute is "1", which means "all".
+
+		def test_dates(self):
+			self.assertEqual(sp("1440-80's"), "1440-80&#8217;s")
+			self.assertEqual(sp("1440-'80s"), "1440-&#8216;80s")
+			self.assertEqual(sp("1440---'80s"), "1440&#8211;&#8216;80s")
+			self.assertEqual(sp("1960s"), "1960s")  # no effect.
+			self.assertEqual(sp("1960's"), "1960&#8217;s")
+			self.assertEqual(sp("one two '60s"), "one two &#8216;60s")
+			self.assertEqual(sp("'60s"), "&#8216;60s")
+
+		def test_skip_tags(self):
+			self.assertEqual(
+				sp("""<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>"""),
+				   """<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>""")
+			self.assertEqual(
+				sp("""<p>He said &quot;Let's write some code.&quot; This code here <code>if True:\n\tprint &quot;Okay&quot;</code> is python code.</p>"""),
+				   """<p>He said &#8220;Let&#8217;s write some code.&#8221; This code here <code>if True:\n\tprint &quot;Okay&quot;</code> is python code.</p>""")
+
+
+		def test_ordinal_numbers(self):
+			self.assertEqual(sp("21st century"), "21st century")  # no effect.
+			self.assertEqual(sp("3rd"), "3rd")  # no effect.
+
+		def test_educated_quotes(self):
+			self.assertEqual(sp('''"Isn't this fun?"'''), '''&#8220;Isn&#8217;t this fun?&#8221;''')
+
+	unittest.main()
+
+
+
+
+__author__ = "Chad Miller <smartypantspy@chad.org>"
+__version__ = "1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400"
+__url__ = "http://wiki.chad.org/SmartyPantsPy"
+__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom"

From d215bd4b464aa91a77815064b929b0ea6ad74ca7 Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Tue, 14 Sep 2010 07:31:49 -0600
Subject: [PATCH 25/43] GwR change id to calibre_jacket

---
 src/calibre/ebooks/oeb/transforms/flatcss.py | 2 +-
 src/calibre/ebooks/oeb/transforms/jacket.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py
index 9a5ff36d55..7212bd33c6 100644
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@@ -289,7 +289,7 @@ class CSSFlattener(object):
 
         if (self.context.remove_paragraph_spacing or
                 self.context.insert_blank_line) and tag in ('p', 'div'):
-            if item_id != 'jacket' or self.context.output_profile.name == 'Kindle':
+            if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle':
                 for prop in ('margin', 'padding', 'border'):
                     for edge in ('top', 'bottom'):
                         cssdict['%s-%s'%(prop, edge)] = '0pt'
diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py
index 309a7fd7b6..6786d7cf9c 100644
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@@ -181,7 +181,7 @@ class Jacket(object):
 
             return soup.renderContents()
 
-        id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml')
+        id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml')
         from calibre.ebooks.oeb.base import RECOVER_PARSER, XPath
 
         try:

From abd1dba1d3b63192ff0ef2d9e5e7c408ecd89844 Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Tue, 14 Sep 2010 09:43:21 -0600
Subject: [PATCH 26/43] added jacket CSS

---
 resources/jacket/stylesheet.css | 116 ++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 resources/jacket/stylesheet.css

diff --git a/resources/jacket/stylesheet.css b/resources/jacket/stylesheet.css
new file mode 100644
index 0000000000..204342ed22
--- /dev/null
+++ b/resources/jacket/stylesheet.css
@@ -0,0 +1,116 @@
+/*
+**	Book Jacket generation
+**
+**	The template for Book Jackets is in ebooks.oeb.transforms.jacket:JACKET_TEMPLATE
+**	This CSS is inserted into the generated HTML at conversion time
+**
+**	Users can control parts of the presentation of a generated book jacket by
+** 	editing this file.
+**
+**	The general form of a generated Book Jacket:
+**
+**	                  Title
+**	Series: series [series_index]
+**	Published: year_of_publication
+**	Rating: #_of_stars
+**	Tags: tag1, tag2, tag3 ...
+**
+**	Comments
+**
+**	If a book does not have Series information, a date of publication, a rating or tags
+**	the corresponding row is automatically removed from the generated book jacket.
+*/
+
+/*
+**	Banner
+**	Only affects EPUB, kindle ignores this type of formatting
+*/
+.cbj_banner {
+	background: #eee;
+	border: thin solid black;
+	margin: 1em;
+	padding: 1em;
+	-webkit-border-radius:8px;
+	}
+
+/*
+**	Title
+*/
+.cbj_title {
+	font-size: x-large;
+	text-align: center;
+	}
+
+/*
+**	Table containing Series, Publication Year, Rating and Tags
+*/
+table.cbj_header {
+	width: 100%;
+	}
+
+/*
+**	General formatting for banner labels
+*/
+table.cbj_header td.cbj_label {
+	font-family: sans-serif;
+	font-weight: bold;
+	text-align: right;
+	width: 40%;
+	}
+
+/*
+**	General formatting for banner content
+*/
+table.cbj_header td.cbj_content {
+	font-family: sans-serif;
+	text-align: left;
+	width:60%;
+	}
+
+/*
+**	To skip a banner item (Series|Published|Rating|Tags),
+**	edit the appropriate CSS rule below.
+*/
+table.cbj_header tr.cbj_series {
+	/* Uncomment the next line to remove 'Series' from banner section */
+	/* display:none; */
+	}
+
+table.cbj_header tr.cbj_pubdate {
+	/* Uncomment the next line to remove 'Published' from banner section */
+	/* display:none; */
+	}
+
+table.cbj_header tr.cbj_rating {
+	/* Uncomment the next line to remove 'Rating' from banner section */
+	/* display:none; */
+	}
+
+table.cbj_header tr.cbj_tags {
+	/* Uncomment the next line to remove 'Tags' from banner section */
+	/* display:none; */
+	}
+
+hr {
+	/* This rule controls formatting for any hr elements contained in the jacket */
+	border-top: 0px solid white;
+	border-right: 0px solid white;
+	border-bottom: 2px solid black;
+	border-left: 0px solid white;
+	margin-left: 10%;
+	width: 80%;
+	}
+
+.cbj_footer {
+	font-family: sans-serif;
+	font-size: small;
+	margin-top: 8px;
+	text-align: center;
+	}
+.cbj_smallcaps {
+	font-size: 90%;
+	}
+
+.cbj_comments {
+	font-family: sans-serif;
+	}

From 47141e527e45880d2b143c0ec0c7f73bec30efc9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 Sep 2010 13:38:03 -0600
Subject: [PATCH 27/43] Fix handling of non-ASCII chars when redering series in
 default EPUB cover

---
 src/calibre/ebooks/oeb/transforms/cover.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/oeb/transforms/cover.py b/src/calibre/ebooks/oeb/transforms/cover.py
index 59b42df68a..532c9bbc03 100644
--- a/src/calibre/ebooks/oeb/transforms/cover.py
+++ b/src/calibre/ebooks/oeb/transforms/cover.py
@@ -99,7 +99,8 @@ class CoverManager(object):
         series_string = None
         if m.series and m.series_index:
             series_string = _('Book %s of %s')%(
-                    fmt_sidx(m.series_index[0], use_roman=True), m.series[0])
+                    fmt_sidx(m.series_index[0], use_roman=True),
+                    unicode(m.series[0]))
 
         try:
             from calibre.ebooks import calibre_cover

From 3ef0192e8d058fb2b904a0cef12979cd9a4951e1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 Sep 2010 13:39:30 -0600
Subject: [PATCH 28/43] Fix #6756 (Slate recipe does not work)

---
 resources/recipes/slate.recipe | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/resources/recipes/slate.recipe b/resources/recipes/slate.recipe
index c03255d2df..9da1c4da78 100644
--- a/resources/recipes/slate.recipe
+++ b/resources/recipes/slate.recipe
@@ -27,9 +27,6 @@ class PeriodicalNameHere(BasicNewsRecipe):
     encoding                = None
     language = 'en'
 
-
-
-
     # Method variables for customizing feed parsing
     summary_length          = 250
     use_embedded_content    = None
@@ -45,13 +42,26 @@ class PeriodicalNameHere(BasicNewsRecipe):
     match_regexps           = []
 
     # The second entry is for 'Big Money', which comes from a different site, uses different markup
-    keep_only_tags          = [dict(attrs={   'id':['article_top', 'article_body']}),
+    keep_only_tags          = [dict(attrs={   'id':['article_top', 'article_body', 'story']}),
                                dict(attrs={   'id':['content']})  ]
 
     # The second entry is for 'Big Money', which comes from a different site, uses different markup
-    remove_tags             = [dict(attrs={   'id':['toolbox','recommend_tab','insider_ad_wrapper',
-                                                    'article_bottom_tools_cntr','fray_article_discussion',                                                    'fray_article_links','bottom_sponsored_links','author_bio',
-                                                    'bizbox_links_bottom','ris_links_wrapper','BOXXLE']}),
+    remove_tags             = [dict(attrs={   'id':[
+                                                    'add_comments_button',
+                                                    'article_bottom_tools',
+                                                    'article_bottom_tools_cntr',
+                                                    'bizbox_links_bottom',
+                                                    'BOXXLE',
+                                                    'comments_button',
+                                                    'comments-to-fray',
+                                                    'fbog_article_bottom_cntr',
+                                                    'fray_article_discussion',                                                    'fray_article_links','bottom_sponsored_links','author_bio',
+                                                    'insider_ad_wrapper',
+                                                    'js_kit_cntr',
+                                                    'recommend_tab',
+                                                    'ris_links_wrapper',
+                                                    'toolbox',
+                                                    ]}),
                                dict(attrs={    'id':['content-top','service-links-bottom','hed']})   ]
 
     excludedDescriptionKeywords =   ['Slate V','Twitter feed','podcast']
@@ -339,8 +349,8 @@ class PeriodicalNameHere(BasicNewsRecipe):
 
         # Change <h1> to <h2>
         headline = soup.find("h1")
-        tag = headline.find("span")
-        tag.name = 'div'
+        #tag = headline.find("span")
+        #tag.name = 'div'
 
         if headline is not None :
             h2tag = Tag(soup, "h2")

From 786e904186879f37ffd86720dea4c010433364e0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 Sep 2010 13:49:32 -0600
Subject: [PATCH 29/43] ...

---
 src/calibre/ebooks/oeb/transforms/jacket.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py
index dc1d2fea41..dd1b14a736 100644
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@@ -81,6 +81,7 @@ class Jacket(object):
             if XPath(JACKET_XPATH)(x.data):
                 self.remove_images(x, limit=sys.maxint)
                 self.oeb.manifest.remove(x)
+                self.log('Removed existing jacket')
                 break
 
     def __call__(self, oeb, opts, metadata):

From d731515ad452f24872d6abcee8516169ac2c6e57 Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Tue, 14 Sep 2010 14:39:13 -0600
Subject: [PATCH 30/43] GwR revisions

---
 src/calibre/customize/profiles.py           |  6 ++++--
 src/calibre/ebooks/oeb/transforms/jacket.py | 10 ++++++----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py
index 1563f764ca..b9a159ee7d 100644
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@@ -555,9 +555,11 @@ class KindleOutput(OutputProfile):
     periodical_date_in_title = False
 
     @classmethod
+#     def tags_to_string(cls, tags):
+#         return u'%s <br/><span style="color: white">%s</span>' % (', '.join(tags),
+#                 'ttt '.join(tags)+'ttt ')
     def tags_to_string(cls, tags):
-        return u'%s <br/><span style="color: white">%s</span>' % (', '.join(tags),
-                'ttt '.join(tags)+'ttt ')
+        return u'%s' % (', '.join(tags))
 
 class KindleDXOutput(OutputProfile):
 
diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py
index dc1d2fea41..ff192ca537 100644
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@@ -55,10 +55,11 @@ class Jacket(object):
         img = I(fname, data=True)
 
         if self.opts.output_profile.short_name == 'kindle':
+            # Original star.png size: 24x23
+            # Needs to be scaled by half
             fname = 'star.jpg'
             img = save_cover_data_to(img, fname,
-                return_data=True)
-
+                return_data=True, resize_to=[12,12])
 
         id, href = self.oeb.manifest.generate('calibre_jacket_star', fname)
         self.oeb.manifest.add(id, href, guess_type(fname)[0], data=img)
@@ -97,7 +98,7 @@ class Jacket(object):
 
 # Render Jacket {{{
 
-def get_rating(rating, href):
+def get_rating(rating, href, output_profile):
     ans = ''
     try:
         num = float(rating)/2
@@ -114,6 +115,7 @@ def get_rating(rating, href):
             href, int(num)))
     else:
         ans = u' '.join(u'\u2605')
+
     return ans
 
 
@@ -138,7 +140,7 @@ def render_jacket(mi, output_profile, star_href=None,
     except:
         pubdate = ''
 
-    rating = get_rating(mi.rating, star_href)
+    rating = get_rating(mi.rating, star_href, output_profile)
 
     tags = mi.tags if mi.tags else alt_tags
     if tags:

From a5ccbbcb216aee219716380d9a74d7a1fe898699 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 Sep 2010 17:49:43 -0600
Subject: [PATCH 31/43] Windows drivers for PRS-(3/6)50

---
 src/calibre/devices/prs505/driver.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py
index c55936be2d..4c14565c2d 100644
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@@ -36,15 +36,15 @@ class PRS505(USBMS):
     VENDOR_NAME        = 'SONY'
     WINDOWS_MAIN_MEM   = re.compile(
             r'(PRS-(505|300|500))|'
-            r'(PRS-((700[#/])|((6|9)00&)))'
+            r'(PRS-((700[#/])|((6|9|3)(0|5)0&)))'
             )
     WINDOWS_CARD_A_MEM = re.compile(
             r'(PRS-(505|500)[#/]\S+:MS)|'
-            r'(PRS-((700[/#]\S+:)|((6|9)00[#_]))MS)'
+            r'(PRS-((700[/#]\S+:)|((6|9)(0|5)0[#_]))MS)'
             )
     WINDOWS_CARD_B_MEM = re.compile(
             r'(PRS-(505|500)[#/]\S+:SD)|'
-            r'(PRS-((700[/#]\S+:)|((6|9)00[#_]))SD)'
+            r'(PRS-((700[/#]\S+:)|((6|9)(0|5)0[#_]))SD)'
             )
 
 

From 724acba1d825fae2bab90b9a49102c2d0889b62a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 Sep 2010 20:35:51 -0600
Subject: [PATCH 32/43] Tweaks to jacket page and new ratings icon

---
 imgsrc/rating.svg                           | 589 ++++++++++++++++++++
 resources/images/rating.png                 | Bin 0 -> 10827 bytes
 resources/images/star.png                   | Bin 1737 -> 0 bytes
 src/calibre/ebooks/oeb/transforms/jacket.py |  49 +-
 src/calibre/gui2/tag_view.py                |   2 +-
 5 files changed, 609 insertions(+), 31 deletions(-)
 create mode 100644 imgsrc/rating.svg
 create mode 100644 resources/images/rating.png
 delete mode 100644 resources/images/star.png

diff --git a/imgsrc/rating.svg b/imgsrc/rating.svg
new file mode 100644
index 0000000000..d289c71b99
--- /dev/null
+++ b/imgsrc/rating.svg
@@ -0,0 +1,589 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg
+   xmlns:i="http://ns.adobe.com/AdobeIllustrator/10.0/"
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://web.resource.org/cc/"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:xlink="http://www.w3.org/1999/xlink"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="128"
+   height="128"
+   id="svg2"
+   sodipodi:version="0.32"
+   inkscape:version="0.45+devel"
+   version="1.0"
+   sodipodi:docbase="/Users/david/Progetti/oxygen-svn/theme/svg/actions"
+   sodipodi:docname="rating.svgz"
+   inkscape:output_extension="org.inkscape.output.svgz.inkscape"
+   inkscape:export-filename="/home/pinheiro/pics/oxygen/scalable/actions/rating.png"
+   inkscape:export-xdpi="11.25"
+   inkscape:export-ydpi="11.25">
+  <defs
+     id="defs4">
+    <linearGradient
+       id="linearGradient3946">
+      <stop
+         style="stop-color:#552b00;stop-opacity:1;"
+         offset="0"
+         id="stop3948" />
+      <stop
+         style="stop-color:#673400;stop-opacity:0;"
+         offset="1"
+         id="stop3950" />
+    </linearGradient>
+    <linearGradient
+       inkscape:collect="always"
+       id="linearGradient3844">
+      <stop
+         style="stop-color:#faff64;stop-opacity:1;"
+         offset="0"
+         id="stop3846" />
+      <stop
+         style="stop-color:#faff64;stop-opacity:0;"
+         offset="1"
+         id="stop3848" />
+    </linearGradient>
+    <linearGradient
+       inkscape:collect="always"
+       id="linearGradient3379">
+      <stop
+         style="stop-color:#fffc07;stop-opacity:1;"
+         offset="0"
+         id="stop3381" />
+      <stop
+         style="stop-color:#fffc07;stop-opacity:0;"
+         offset="1"
+         id="stop3383" />
+    </linearGradient>
+    <linearGradient
+       inkscape:collect="always"
+       id="linearGradient3363">
+      <stop
+         style="stop-color:#ffffff;stop-opacity:1;"
+         offset="0"
+         id="stop3365" />
+      <stop
+         style="stop-color:#ffffff;stop-opacity:0;"
+         offset="1"
+         id="stop3367" />
+    </linearGradient>
+    <linearGradient
+       id="linearGradient3309">
+      <stop
+         style="stop-color:#f8ff8a;stop-opacity:1"
+         offset="0"
+         id="stop3311" />
+      <stop
+         style="stop-color:#ffffff;stop-opacity:0;"
+         offset="1"
+         id="stop3313" />
+    </linearGradient>
+    <linearGradient
+       id="linearGradient26907"
+       gradientUnits="userSpaceOnUse"
+       x1="-84.002403"
+       y1="-383.9971"
+       x2="-12.0029"
+       y2="-383.9971"
+       gradientTransform="matrix(0,1,-1,0,-39.9985,140.0029)">
+      <stop
+         offset="0"
+         style="stop-color:#888a85;stop-opacity:1;"
+         id="stop26909" />
+      <stop
+         offset="1"
+         style="stop-color:#2e3436;stop-opacity:1;"
+         id="stop26911" />
+    </linearGradient>
+    <linearGradient
+       gradientTransform="matrix(0,1,-1,0,-39.9985,140.0029)"
+       y2="-383.9975"
+       x2="-23.516129"
+       y1="-383.9971"
+       x1="-84.002403"
+       gradientUnits="userSpaceOnUse"
+       id="linearGradient3711">
+      <stop
+         id="stop3713"
+         style="stop-color:white;stop-opacity:1;"
+         offset="0" />
+      <stop
+         id="stop3715"
+         style="stop-color:white;stop-opacity:0;"
+         offset="1" />
+    </linearGradient>
+    <linearGradient
+       id="linearGradient3081">
+      <stop
+         id="stop3083"
+         offset="0"
+         style="stop-color:#28691f;stop-opacity:1;" />
+      <stop
+         id="stop3085"
+         offset="1"
+         style="stop-color:#00bf00;stop-opacity:1;" />
+    </linearGradient>
+    <linearGradient
+       id="linearGradient3290">
+      <stop
+         style="stop-color:yellow;stop-opacity:1;"
+         offset="0"
+         id="stop3292" />
+      <stop
+         style="stop-color:#f07800;stop-opacity:1;"
+         offset="1"
+         id="stop3294" />
+    </linearGradient>
+    <linearGradient
+       id="linearGradient3638">
+      <stop
+         style="stop-color:#ffffff;stop-opacity:0;"
+         offset="0"
+         id="stop3640" />
+      <stop
+         id="stop3661"
+         offset="0.06868132"
+         style="stop-color:#ffffff;stop-opacity:1;" />
+      <stop
+         id="stop3659"
+         offset="0.5"
+         style="stop-color:#ffffff;stop-opacity:1;" />
+      <stop
+         style="stop-color:#ffffff;stop-opacity:0;"
+         offset="1"
+         id="stop3642" />
+    </linearGradient>
+    <linearGradient
+       id="linearGradient1563">
+      <stop
+         id="stop1565"
+         offset="0"
+         style="stop-color:#ffffff;stop-opacity:1;" />
+      <stop
+         id="stop1567"
+         offset="1"
+         style="stop-color:white;stop-opacity:0;" />
+    </linearGradient>
+    <linearGradient
+       id="linearGradient3273">
+      <stop
+         id="stop3275"
+         offset="0"
+         style="stop-color:#ffffff;stop-opacity:0.55035973;" />
+      <stop
+         id="stop3277"
+         offset="1"
+         style="stop-color:#ffffff;stop-opacity:0;" />
+    </linearGradient>
+    <linearGradient
+       id="linearGradient12948">
+      <stop
+         style="stop-color:#ffffff;stop-opacity:1;"
+         offset="0"
+         id="stop12950" />
+      <stop
+         style="stop-color:#c0c0c0;stop-opacity:0;"
+         offset="1"
+         id="stop12952" />
+    </linearGradient>
+    <radialGradient
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="matrix(1,0,0,0.111111,0,138.1081)"
+       r="64.796692"
+       fy="177.29686"
+       fx="80.738739"
+       cy="155.37218"
+       cx="80.738739"
+       id="radialGradient5079"
+       xlink:href="#linearGradient5073"
+       inkscape:collect="always" />
+    <linearGradient
+       id="linearGradient5073"
+       inkscape:collect="always">
+      <stop
+         id="stop5075"
+         offset="0"
+         style="stop-color:#000000;stop-opacity:1;" />
+      <stop
+         id="stop5077"
+         offset="1"
+         style="stop-color:#000000;stop-opacity:0;" />
+    </linearGradient>
+    <foreignObject
+       id="foreignObject7221"
+       height="1"
+       width="1"
+       y="0"
+       x="0"
+       requiredExtensions="http://ns.adobe.com/AdobeIllustrator/10.0/">
+      <i:pgfRef
+         xlink:href="#adobe_illustrator_pgf" />
+    </foreignObject>
+    <linearGradient
+       id="XMLID_1_"
+       gradientUnits="userSpaceOnUse"
+       x1="95.693398"
+       y1="141.1738"
+       x2="32.308601"
+       y2="77.789001">
+      <stop
+         offset="0"
+         style="stop-color:#ffd50a;stop-opacity:1;"
+         id="stop7227" />
+      <stop
+         offset="1"
+         style="stop-color:#8d4000;stop-opacity:1;"
+         id="stop7233" />
+    </linearGradient>
+    <linearGradient
+       id="XMLID_3_"
+       gradientUnits="userSpaceOnUse"
+       x1="63.9995"
+       y1="92.865196"
+       x2="63.9995"
+       y2="120.8652"
+       gradientTransform="translate(175.0067,11.74752)">
+      <stop
+         offset="0"
+         style="stop-color:#888A85"
+         id="stop7261" />
+      <stop
+         offset="0.3226"
+         style="stop-color:#A6A7A3"
+         id="stop7263" />
+      <stop
+         offset="1"
+         style="stop-color:#EEEEEC"
+         id="stop7265" />
+    </linearGradient>
+    <linearGradient
+       id="XMLID_4_"
+       gradientUnits="userSpaceOnUse"
+       x1="64.000504"
+       y1="108.8652"
+       x2="64.000504"
+       y2="92.865196">
+      <stop
+         offset="0"
+         style="stop-color:#EEEEEC"
+         id="stop7270" />
+      <stop
+         offset="1"
+         style="stop-color:#FFFFFF"
+         id="stop7272" />
+    </linearGradient>
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3081"
+       id="linearGradient2149"
+       gradientUnits="userSpaceOnUse"
+       x1="62.112335"
+       y1="90.513916"
+       x2="67.887672"
+       y2="39.095695" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient26907"
+       id="linearGradient3226"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="matrix(0,1,-1,0,-39.9985,140.0029)"
+       x1="-70.002899"
+       y1="-383.9971"
+       x2="-11.91648"
+       y2="-383.9971" />
+    <radialGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3711"
+       id="radialGradient3228"
+       gradientUnits="userSpaceOnUse"
+       cx="343.99899"
+       cy="92"
+       fx="343.99899"
+       fy="92"
+       r="36" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3711"
+       id="linearGradient3230"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="matrix(0,1.022977,-1.022977,0,111.9686,137.8125)"
+       x1="-88.058083"
+       y1="-131.93112"
+       x2="-45.096584"
+       y2="-131.93112" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#XMLID_1_"
+       id="linearGradient2898"
+       x1="64.07962"
+       y1="-14.227339"
+       x2="64.07962"
+       y2="120.44466"
+       gradientUnits="userSpaceOnUse" />
+    <radialGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3290"
+       id="radialGradient2906"
+       cx="69.526619"
+       cy="60.115833"
+       fx="69.526619"
+       fy="89.655701"
+       r="111.65377"
+       gradientTransform="matrix(0.9439139,-0.3301918,0.332644,0.9509241,-16.097695,27.249949)"
+       gradientUnits="userSpaceOnUse" />
+    <radialGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3290"
+       id="radialGradient3304"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="matrix(0.5227399,0,-1.554444e-8,0.5266221,349.81061,60.575712)"
+       cx="69.526619"
+       cy="60.115833"
+       fx="69.526619"
+       fy="60.115833"
+       r="111.65377" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3309"
+       id="linearGradient3315"
+       x1="219.22163"
+       y1="11.902248"
+       x2="219.22163"
+       y2="136.85997"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="translate(-170.08594,0)" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient1563"
+       id="linearGradient3345"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="translate(-37.771032,-0.1213203)"
+       x1="278.47162"
+       y1="77.652245"
+       x2="200.17728"
+       y2="31.10997" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3363"
+       id="linearGradient3369"
+       x1="177.42397"
+       y1="22.377773"
+       x2="177.60074"
+       y2="93.022789"
+       gradientUnits="userSpaceOnUse" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3379"
+       id="linearGradient3385"
+       x1="216.88614"
+       y1="122.5867"
+       x2="216.88614"
+       y2="37.969955"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="translate(-152,0)" />
+    <filter
+       inkscape:collect="always"
+       id="filter3391">
+      <feGaussianBlur
+         inkscape:collect="always"
+         stdDeviation="0.55939545"
+         id="feGaussianBlur3393" />
+    </filter>
+    <filter
+       inkscape:collect="always"
+       id="filter3401">
+      <feGaussianBlur
+         inkscape:collect="always"
+         stdDeviation="0.11157909"
+         id="feGaussianBlur3403" />
+    </filter>
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3363"
+       id="linearGradient3800"
+       x1="63.948792"
+       y1="12.034382"
+       x2="67.219337"
+       y2="12.034382"
+       gradientUnits="userSpaceOnUse"
+       spreadMethod="reflect" />
+    <filter
+       inkscape:collect="always"
+       id="filter3838"
+       x="-0.17816916"
+       width="1.3563383"
+       y="-0.15506857"
+       height="1.3101371">
+      <feGaussianBlur
+         inkscape:collect="always"
+         stdDeviation="0.46259975"
+         id="feGaussianBlur3840" />
+    </filter>
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3844"
+       id="linearGradient3850"
+       x1="28.637825"
+       y1="120.84999"
+       x2="31.289474"
+       y2="122.08743"
+       gradientUnits="userSpaceOnUse"
+       spreadMethod="reflect" />
+    <filter
+       inkscape:collect="always"
+       id="filter3928">
+      <feGaussianBlur
+         inkscape:collect="always"
+         stdDeviation="0.18346262"
+         id="feGaussianBlur3930" />
+    </filter>
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3844"
+       id="linearGradient3934"
+       gradientUnits="userSpaceOnUse"
+       spreadMethod="reflect"
+       x1="28.637825"
+       y1="120.84999"
+       x2="31.289474"
+       y2="122.08743" />
+    <radialGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3946"
+       id="radialGradient3956"
+       cx="64.07962"
+       cy="114.47154"
+       fx="64.07962"
+       fy="114.47154"
+       r="60.700505"
+       gradientTransform="matrix(0.2787307,0,0,0.2689969,46.218665,81.520439)"
+       gradientUnits="userSpaceOnUse" />
+    <filter
+       inkscape:collect="always"
+       id="filter3975">
+      <feGaussianBlur
+         inkscape:collect="always"
+         stdDeviation="1.2948866"
+         id="feGaussianBlur3977" />
+    </filter>
+  </defs>
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="1.4142136"
+     inkscape:cx="-57.231582"
+     inkscape:cy="95.226607"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer1"
+     inkscape:window-width="1247"
+     inkscape:window-height="816"
+     inkscape:window-x="388"
+     inkscape:window-y="110"
+     showgrid="true"
+     gridspacingx="4px"
+     gridspacingy="4px"
+     gridempspacing="0"
+     inkscape:grid-points="true">
+    <inkscape:grid
+       type="xygrid"
+       id="grid2302"
+       spacingx="4px"
+       spacingy="4px"
+       empspacing="2" />
+  </sodipodi:namedview>
+  <metadata
+     id="metadata7">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1">
+    <path
+       id="path3961"
+       d="M 64.03125 8 C 56.162818 8.0100117 46.828561 34.554451 40.46875 39.1875 C 34.10894 43.820548 5.9844574 44.576082 3.5625 52.0625 C 1.1405426 59.548917 23.465249 76.613524 25.90625 84.09375 C 28.347251 91.57398 20.40967 118.5394 26.78125 123.15625 C 33.15283 127.7731 56.287818 111.82251 64.15625 111.8125 C 72.024682 111.80249 95.202691 127.69555 101.5625 123.0625 C 107.92231 118.42945 99.890544 91.486414 102.3125 84 C 104.73446 76.513583 127.03475 59.38648 124.59375 51.90625 C 122.15275 44.426021 94.027829 43.741849 87.65625 39.125 C 81.28467 34.508152 71.899685 7.9899879 64.03125 8 z M 64.03125 11.90625 C 64.208046 12.045423 65.56776 12.712264 67.15625 14.65625 C 68.97167 16.877947 71.031426 20.210059 73.0625 23.75 C 75.093573 27.28994 77.113982 31.048819 79.09375 34.3125 C 81.073519 37.576182 82.75512 40.328991 85.40625 42.25 C 88.057376 44.171009 91.18831 44.91637 94.90625 45.78125 C 98.624192 46.646129 102.81606 47.391152 106.8125 48.21875 C 110.80894 49.046347 114.60465 49.966787 117.28125 51 C 119.62327 51.904061 120.71845 53.000764 120.90625 53.125 C 120.82618 53.333062 120.57672 54.794782 119.21875 56.90625 C 117.66679 59.319356 115.1453 62.318181 112.40625 65.34375 C 109.66721 68.369316 106.71091 71.452346 104.21875 74.34375 C 101.72659 77.235155 99.632744 79.697501 98.625 82.8125 C 97.617256 85.927495 97.892393 89.134266 98.21875 92.9375 C 98.545107 96.740738 99.114622 100.97466 99.5625 105.03125 C 100.01038 109.08783 100.31178 112.97888 100.15625 115.84375 C 100.02016 118.35052 99.34151 119.69095 99.28125 119.90625 C 99.057443 119.89786 97.552762 120.17027 95.125 119.53125 C 92.350417 118.80093 88.723899 117.29504 85 115.625 C 81.276103 113.95497 77.426259 112.10169 73.90625 110.625 C 70.386242 109.1483 67.4302 107.93334 64.15625 107.9375 C 60.882303 107.94167 57.891241 109.1706 54.375 110.65625 C 50.858761 112.1419 47.032137 114.00799 43.3125 115.6875 C 39.592862 117.367 35.960216 118.85638 33.1875 119.59375 C 30.761373 120.23895 29.286908 119.99088 29.0625 120 C 29.004012 119.7864 28.29872 118.4439 28.15625 115.9375 C 27.993428 113.07303 28.281199 109.18271 28.71875 105.125 C 29.156299 101.0673 29.714573 96.835302 30.03125 93.03125 C 30.347928 89.227198 30.609418 85.987425 29.59375 82.875 C 28.578082 79.762573 26.468263 77.322553 23.96875 74.4375 C 21.469238 71.552452 18.527988 68.487339 15.78125 65.46875 C 13.034512 62.450158 10.495601 59.471649 8.9375 57.0625 C 7.5741618 54.954496 7.3592053 53.457399 7.28125 53.25 C 7.2962039 53.337785 8.2681026 52.126785 10.84375 51.125 C 13.517705 50.084977 17.34943 49.150265 21.34375 48.3125 C 25.33807 47.474737 29.534272 46.749339 33.25 45.875 C 36.96573 45.000663 40.103767 44.24025 42.75 42.3125 C 45.396234 40.384748 47.059794 37.612458 49.03125 34.34375 C 51.002705 31.075042 53.009191 27.326347 55.03125 23.78125 C 57.053308 20.236153 59.096493 16.88256 60.90625 14.65625 C 62.489787 12.708229 63.857465 12.044552 64.03125 11.90625 z "
+       style="opacity:1;fill:url(#linearGradient2898);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80851269000000059;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1;filter:url(#filter3975)" />
+    <path
+       transform="matrix(0.4934214,0.1726044,-0.1726044,0.4934214,42.377875,49.908537)"
+       d="M 153.09403,94.713757 C 144.53658,107.09689 92.616372,93.013297 78.414631,98.001518 C 64.21289,102.98974 32.50348,146.4474 18.082028,142.13539 C 3.6605746,137.82337 1.0106378,84.092245 -8.1220219,72.127031 C -17.254681,60.161818 -68.384124,43.433534 -68.739625,28.385431 C -69.095125,13.337327 -18.812666,-5.7867426 -10.255219,-18.169872 C -1.697772,-30.553002 -1.5880954,-84.349316 12.613645,-89.337536 C 26.815387,-94.325757 60.541592,-52.41396 74.963045,-48.101941 C 89.384498,-43.789923 140.58172,-60.30959 149.71438,-48.344376 C 158.84704,-36.379162 129.40853,8.6478227 129.76403,23.695927 C 130.11953,38.74403 161.65148,82.330628 153.09403,94.713757 z"
+       inkscape:randomized="0"
+       inkscape:rounded="0.20136392"
+       inkscape:flatsided="false"
+       sodipodi:arg2="1.2330172"
+       sodipodi:arg1="0.60469864"
+       sodipodi:r2="76.832565"
+       sodipodi:r1="121.72647"
+       sodipodi:cy="25.510532"
+       sodipodi:cx="52.952892"
+       sodipodi:sides="5"
+       id="path3574"
+       style="opacity:1;fill:url(#radialGradient2906);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80892944000000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
+       sodipodi:type="star" />
+    <path
+       style="opacity:1;fill:url(#linearGradient2898);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80851269000000059;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
+       d="M 64.03125 8 C 56.162818 8.0100117 46.828561 34.554451 40.46875 39.1875 C 34.10894 43.820548 5.9844574 44.576082 3.5625 52.0625 C 1.1405426 59.548917 23.465249 76.613524 25.90625 84.09375 C 28.347251 91.57398 20.40967 118.5394 26.78125 123.15625 C 33.15283 127.7731 56.287818 111.82251 64.15625 111.8125 C 72.024682 111.80249 95.202691 127.69555 101.5625 123.0625 C 107.92231 118.42945 99.890544 91.486414 102.3125 84 C 104.73446 76.513583 127.03475 59.38648 124.59375 51.90625 C 122.15275 44.426021 94.027829 43.741849 87.65625 39.125 C 81.28467 34.508152 71.899685 7.9899879 64.03125 8 z M 64.03125 11.90625 C 64.208046 12.045423 65.56776 12.712264 67.15625 14.65625 C 68.97167 16.877947 71.031426 20.210059 73.0625 23.75 C 75.093573 27.28994 77.113982 31.048819 79.09375 34.3125 C 81.073519 37.576182 82.75512 40.328991 85.40625 42.25 C 88.057376 44.171009 91.18831 44.91637 94.90625 45.78125 C 98.624192 46.646129 102.81606 47.391152 106.8125 48.21875 C 110.80894 49.046347 114.60465 49.966787 117.28125 51 C 119.62327 51.904061 120.71845 53.000764 120.90625 53.125 C 120.82618 53.333062 120.57672 54.794782 119.21875 56.90625 C 117.66679 59.319356 115.1453 62.318181 112.40625 65.34375 C 109.66721 68.369316 106.71091 71.452346 104.21875 74.34375 C 101.72659 77.235155 99.632744 79.697501 98.625 82.8125 C 97.617256 85.927495 97.892393 89.134266 98.21875 92.9375 C 98.545107 96.740738 99.114622 100.97466 99.5625 105.03125 C 100.01038 109.08783 100.31178 112.97888 100.15625 115.84375 C 100.02016 118.35052 99.34151 119.69095 99.28125 119.90625 C 99.057443 119.89786 97.552762 120.17027 95.125 119.53125 C 92.350417 118.80093 88.723899 117.29504 85 115.625 C 81.276103 113.95497 77.426259 112.10169 73.90625 110.625 C 70.386242 109.1483 67.4302 107.93334 64.15625 107.9375 C 60.882303 107.94167 57.891241 109.1706 54.375 110.65625 C 50.858761 112.1419 47.032137 114.00799 43.3125 115.6875 C 39.592862 117.367 35.960216 118.85638 33.1875 119.59375 C 30.761373 120.23895 29.286908 119.99088 29.0625 120 C 29.004012 119.7864 28.29872 118.4439 28.15625 115.9375 C 27.993428 113.07303 28.281199 109.18271 28.71875 105.125 C 29.156299 101.0673 29.714573 96.835302 30.03125 93.03125 C 30.347928 89.227198 30.609418 85.987425 29.59375 82.875 C 28.578082 79.762573 26.468263 77.322553 23.96875 74.4375 C 21.469238 71.552452 18.527988 68.487339 15.78125 65.46875 C 13.034512 62.450158 10.495601 59.471649 8.9375 57.0625 C 7.5741618 54.954496 7.3592053 53.457399 7.28125 53.25 C 7.2962039 53.337785 8.2681026 52.126785 10.84375 51.125 C 13.517705 50.084977 17.34943 49.150265 21.34375 48.3125 C 25.33807 47.474737 29.534272 46.749339 33.25 45.875 C 36.96573 45.000663 40.103767 44.24025 42.75 42.3125 C 45.396234 40.384748 47.059794 37.612458 49.03125 34.34375 C 51.002705 31.075042 53.009191 27.326347 55.03125 23.78125 C 57.053308 20.236153 59.096493 16.88256 60.90625 14.65625 C 62.489787 12.708229 63.857465 12.044552 64.03125 11.90625 z "
+       id="path2304" />
+    <path
+       style="fill:url(#linearGradient3800);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;filter:url(#filter3838)"
+       d="M 60.98796,9.471226 C 62.846491,8.2143022 64.889907,8.0204702 67.219338,9.471226 L 64.037358,15.614216 L 60.98796,9.471226 z"
+       id="path3409"
+       sodipodi:nodetypes="cccc" />
+    <path
+       style="opacity:1;fill:url(#linearGradient3315);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80892944;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
+       d="M 64.039064,11.90625 C 63.865274,12.044552 62.497594,12.708229 60.914064,14.65625 C 59.104304,16.88256 57.061124,20.236153 55.039064,23.78125 C 53.017004,27.326347 51.010514,31.075042 49.039064,34.34375 C 47.067604,37.612458 45.404044,40.384748 42.757814,42.3125 C 40.111574,44.24025 36.973544,45.000663 33.257814,45.875 C 29.542084,46.749339 25.345884,47.474737 21.351564,48.3125 C 17.357244,49.150265 13.525514,50.084977 10.851564,51.125 C 8.2759131,52.126785 7.3040131,53.337785 7.2890631,53.25 C 7.3670131,53.457399 7.5819731,54.954496 8.9453131,57.0625 C 10.503414,59.471649 13.042324,62.450158 15.789064,65.46875 C 18.535804,68.487339 21.477054,71.552452 23.976564,74.4375 C 26.476074,77.322553 28.585894,79.762573 29.601564,82.875 C 29.865144,83.682722 30.019904,84.511238 30.132814,85.34375 C 32.540654,85.431079 34.961934,85.5 37.414064,85.5 C 64.456484,85.5 88.974124,80.107134 106.91406,71.34375 C 108.71383,69.370041 110.60784,67.338911 112.41406,65.34375 C 115.15311,62.318181 117.67459,59.319356 119.22656,56.90625 C 120.58453,54.794782 120.83398,53.333062 120.91406,53.125 C 120.72626,53.000764 119.63107,51.904061 117.28906,51 C 114.61246,49.966787 110.81674,49.046347 106.82031,48.21875 C 102.82387,47.391152 98.631994,46.646129 94.914064,45.78125 C 91.196124,44.91637 88.065184,44.171009 85.414064,42.25 C 82.762934,40.328991 81.081334,37.576182 79.101564,34.3125 C 77.121794,31.048819 75.101384,27.28994 73.070314,23.75 C 71.039234,20.210059 68.979484,16.877947 67.164064,14.65625 C 65.575574,12.712264 64.215854,12.045423 64.039064,11.90625 z"
+       id="path2910" />
+    <g
+       id="g3339"
+       transform="translate(-132.29928,0)">
+      <path
+         id="path3317"
+         d="M 196.34375,11.90625 C 196.16996,12.044552 194.80228,12.708229 193.21875,14.65625 C 191.40899,16.88256 189.36581,20.236153 187.34375,23.78125 C 185.32169,27.326347 183.3152,31.075042 181.34375,34.34375 C 179.37229,37.612458 177.70873,40.384748 175.0625,42.3125 C 172.41626,44.24025 169.27823,45.000663 165.5625,45.875 C 161.84677,46.749339 157.65057,47.474737 153.65625,48.3125 C 149.66193,49.150265 145.8302,50.084977 143.15625,51.125 C 140.5806,52.126785 139.6087,53.337785 139.59375,53.25 C 139.62377,53.329884 139.71528,53.638731 139.84375,54.0625 C 140.2595,53.69998 141.25985,52.862595 143.15625,52.125 C 145.8302,51.084977 149.66193,50.150265 153.65625,49.3125 C 157.65057,48.474737 161.84677,47.749339 165.5625,46.875 C 169.27823,46.000663 172.41626,45.24025 175.0625,43.3125 C 177.70873,41.384748 179.37229,38.612458 181.34375,35.34375 C 183.3152,32.075042 185.32169,28.326347 187.34375,24.78125 C 189.36581,21.236153 191.40899,17.88256 193.21875,15.65625 C 194.80228,13.708229 196.16996,13.044552 196.34375,12.90625 C 196.52054,13.045423 197.88026,13.712264 199.46875,15.65625 C 201.28417,17.877947 203.34392,21.210059 205.375,24.75 C 207.40607,28.28994 209.42648,32.048819 211.40625,35.3125 C 213.38602,38.576182 215.06762,41.328991 217.71875,43.25 C 220.36987,45.171009 223.50081,45.91637 227.21875,46.78125 C 230.93668,47.646129 235.12856,48.391152 239.125,49.21875 C 243.12143,50.046347 246.91715,50.966787 249.59375,52 C 251.51448,52.74144 252.56925,53.579608 253,53.9375 C 253.13371,53.522484 253.18802,53.204851 253.21875,53.125 C 253.03095,53.000764 251.93576,51.904061 249.59375,51 C 246.91715,49.966787 243.12143,49.046347 239.125,48.21875 C 235.12856,47.391152 230.93668,46.646129 227.21875,45.78125 C 223.50081,44.91637 220.36987,44.171009 217.71875,42.25 C 215.06762,40.328991 213.38602,37.576182 211.40625,34.3125 C 209.42648,31.048819 207.40607,27.28994 205.375,23.75 C 203.34392,20.210059 201.28417,16.877947 199.46875,14.65625 C 197.88026,12.712264 196.52054,12.045423 196.34375,11.90625 z"
+         style="opacity:1;fill:url(#linearGradient3369);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80892944000000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" />
+      <path
+         sodipodi:nodetypes="cscsscsccscsscsc"
+         id="path3325"
+         d="M 246.78125,49.937636 C 247.42469,50.142466 248.03845,50.379526 248.59375,50.593886 C 250.93576,51.497946 252.03095,52.594656 252.21875,52.718886 C 252.13867,52.926956 251.88922,54.388676 250.53125,56.500136 C 248.97928,58.913246 246.4578,61.912066 243.71875,64.937636 C 241.91253,66.932796 240.01852,68.963926 238.21875,70.937636 C 220.27881,79.701026 195.76117,85.093886 168.71875,85.093886 C 166.59433,85.093886 164.49568,85.039506 162.40625,84.968886 C 162.4184,85.051736 162.42625,85.135936 162.4375,85.218886 C 164.84534,85.306216 167.26662,85.375136 169.71875,85.375136 C 196.76117,85.375136 221.27881,79.982276 239.21875,71.218886 C 241.01852,69.245176 242.91253,67.214046 244.71875,65.218886 C 247.4578,62.193316 249.97928,59.194496 251.53125,56.781386 C 252.88922,54.669926 253.13867,53.208206 253.21875,53.000136 C 253.03095,52.875906 251.93576,51.779196 249.59375,50.875136 C 248.75868,50.552786 247.80629,50.238636 246.78125,49.937636 z"
+         style="opacity:1;fill:url(#linearGradient3345);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80892944;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" />
+    </g>
+    <path
+       style="fill:url(#linearGradient3850);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;opacity:0.77153558000000000;filter:url(#filter3928)"
+       d="M 25.190679,119.77989 C 26.414679,122.74238 27.241162,124.11897 31.289475,123.31542 L 30.638356,120.21008 L 29.079766,120.3986 L 28.261711,118.57341 L 25.190679,119.77989 z"
+       id="path3842"
+       sodipodi:nodetypes="cccccc" />
+    <path
+       style="opacity:1;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:url(#linearGradient3385);stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1;filter:url(#filter3391)"
+       d="M 64.125001,11.90625 C 63.951211,12.044552 62.583531,12.708229 61.000001,14.65625 C 59.190241,16.88256 57.147061,20.236153 55.125001,23.78125 C 53.102941,27.326347 51.096451,31.075042 49.125001,34.34375 C 47.153541,37.612458 45.489981,40.384748 42.843751,42.3125 C 40.197511,44.24025 37.059481,45.000663 33.343751,45.875 C 29.628021,46.749339 25.431821,47.474737 21.437501,48.3125 C 17.443181,49.150265 13.611451,50.084977 10.937501,51.125 C 8.3618506,52.126785 7.3899506,53.337785 7.3750006,53.25 C 7.4529506,53.457399 7.6679106,54.954496 9.0312506,57.0625 C 10.589351,59.471649 13.128261,62.450158 15.875001,65.46875 C 18.621741,68.487339 21.562991,71.552452 24.062501,74.4375 C 26.562011,77.322553 28.671831,79.762573 29.687501,82.875 C 30.703171,85.987425 30.441681,89.227198 30.125001,93.03125 C 29.808321,96.835302 29.250051,101.0673 28.812501,105.125 C 28.374951,109.18271 28.087181,113.07303 28.250001,115.9375 C 28.392471,118.4439 29.097761,119.7864 29.156251,120 C 29.380661,119.99088 30.855121,120.23895 33.281251,119.59375 C 36.053961,118.85638 39.686611,117.367 43.406251,115.6875 C 47.125881,114.00799 50.952511,112.1419 54.468751,110.65625 C 57.984991,109.1706 60.976051,107.94167 64.250001,107.9375 C 67.523951,107.93334 70.479991,109.1483 74.000001,110.625 C 77.520011,112.10169 81.369851,113.95497 85.093751,115.625 C 88.817651,117.29504 92.444151,118.80093 95.218751,119.53125 C 97.646511,120.17027 99.151181,119.89786 99.375001,119.90625 C 99.435261,119.69095 100.1139,118.35052 100.25,115.84375 C 100.40553,112.97888 100.10412,109.08783 99.656251,105.03125 C 99.208371,100.97466 98.638841,96.740738 98.312501,92.9375 C 97.986141,89.134266 97.710991,85.927495 98.718751,82.8125 C 99.726491,79.697501 101.82033,77.235155 104.3125,74.34375 C 106.80466,71.452346 109.76095,68.369316 112.5,65.34375 C 115.23905,62.318181 117.76053,59.319356 119.3125,56.90625 C 120.67047,54.794782 120.91992,53.333062 121,53.125 C 120.8122,53.000764 119.71701,51.904061 117.375,51 C 114.6984,49.966787 110.90268,49.046347 106.90625,48.21875 C 102.90981,47.391152 98.717931,46.646129 95.000001,45.78125 C 91.282061,44.91637 88.151121,44.171009 85.500001,42.25 C 82.848871,40.328991 81.167271,37.576182 79.187501,34.3125 C 77.207731,31.048819 75.187321,27.28994 73.156251,23.75 C 71.125171,20.210059 69.065421,16.877947 67.250001,14.65625 C 65.661511,12.712264 64.301791,12.045423 64.125001,11.90625 z"
+       id="path3375"
+       sodipodi:nodetypes="cssssssssssssssscssssssscssssssscsssssssc" />
+    <path
+       sodipodi:nodetypes="cssssssssssssssscssssssscssssssscsssssssc"
+       id="path3395"
+       d="M 64.125001,11.90625 C 63.951211,12.044552 62.583531,12.708229 61.000001,14.65625 C 59.190241,16.88256 57.147061,20.236153 55.125001,23.78125 C 53.102941,27.326347 51.096451,31.075042 49.125001,34.34375 C 47.153541,37.612458 45.489981,40.384748 42.843751,42.3125 C 40.197511,44.24025 37.059481,45.000663 33.343751,45.875 C 29.628021,46.749339 25.431821,47.474737 21.437501,48.3125 C 17.443181,49.150265 13.611451,50.084977 10.937501,51.125 C 8.3618506,52.126785 7.3899506,53.337785 7.3750006,53.25 C 7.4529506,53.457399 7.6679106,54.954496 9.0312506,57.0625 C 10.589351,59.471649 13.128261,62.450158 15.875001,65.46875 C 18.621741,68.487339 21.562991,71.552452 24.062501,74.4375 C 26.562011,77.322553 28.671831,79.762573 29.687501,82.875 C 30.703171,85.987425 30.441681,89.227198 30.125001,93.03125 C 29.808321,96.835302 29.250051,101.0673 28.812501,105.125 C 28.374951,109.18271 28.087181,113.07303 28.250001,115.9375 C 28.392471,118.4439 29.097761,119.7864 29.156251,120 C 29.380661,119.99088 30.855121,120.23895 33.281251,119.59375 C 36.053961,118.85638 39.686611,117.367 43.406251,115.6875 C 47.125881,114.00799 50.952511,112.1419 54.468751,110.65625 C 57.984991,109.1706 60.976051,107.94167 64.250001,107.9375 C 67.523951,107.93334 70.479991,109.1483 74.000001,110.625 C 77.520011,112.10169 81.369851,113.95497 85.093751,115.625 C 88.817651,117.29504 92.444151,118.80093 95.218751,119.53125 C 97.646511,120.17027 99.151181,119.89786 99.375001,119.90625 C 99.435261,119.69095 100.1139,118.35052 100.25,115.84375 C 100.40553,112.97888 100.10412,109.08783 99.656251,105.03125 C 99.208371,100.97466 98.638841,96.740738 98.312501,92.9375 C 97.986141,89.134266 97.710991,85.927495 98.718751,82.8125 C 99.726491,79.697501 101.82033,77.235155 104.3125,74.34375 C 106.80466,71.452346 109.76095,68.369316 112.5,65.34375 C 115.23905,62.318181 117.76053,59.319356 119.3125,56.90625 C 120.67047,54.794782 120.91992,53.333062 121,53.125 C 120.8122,53.000764 119.71701,51.904061 117.375,51 C 114.6984,49.966787 110.90268,49.046347 106.90625,48.21875 C 102.90981,47.391152 98.717931,46.646129 95.000001,45.78125 C 91.282061,44.91637 88.151121,44.171009 85.500001,42.25 C 82.848871,40.328991 81.167271,37.576182 79.187501,34.3125 C 77.207731,31.048819 75.187321,27.28994 73.156251,23.75 C 71.125171,20.210059 69.065421,16.877947 67.250001,14.65625 C 65.661511,12.712264 64.301791,12.045423 64.125001,11.90625 z"
+       style="opacity:1;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:url(#linearGradient3385);stroke-width:0.6;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1;filter:url(#filter3401)" />
+    <path
+       sodipodi:nodetypes="cccccc"
+       id="path3932"
+       d="M 25.190679,119.77989 C 26.414679,122.74238 27.241162,124.11897 31.289475,123.31542 L 30.638356,120.21008 L 29.079766,120.3986 L 28.261711,118.57341 L 25.190679,119.77989 z"
+       style="opacity:0.7715356;fill:url(#linearGradient3934);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;filter:url(#filter3928)"
+       transform="matrix(-1,0,0,1,128.10515,0)" />
+    <path
+       style="opacity:1;fill:url(#radialGradient3956);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:14.80851269000000059;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
+       d="M 26.03125 94.25 C 24.983755 105.1142 22.21942 119.85075 26.78125 123.15625 C 33.15283 127.7731 56.287818 111.82251 64.15625 111.8125 C 72.024682 111.80249 95.202691 127.69555 101.5625 123.0625 C 106.10279 119.75495 103.30815 105.10184 102.21875 94.25 L 98.34375 94.25 C 98.677864 97.707156 99.164649 101.42777 99.5625 105.03125 C 100.01038 109.08783 100.31178 112.97888 100.15625 115.84375 C 100.02016 118.35052 99.34151 119.69095 99.28125 119.90625 C 99.057443 119.89786 97.552762 120.17027 95.125 119.53125 C 92.350417 118.80093 88.723899 117.29504 85 115.625 C 81.276103 113.95497 77.426259 112.10169 73.90625 110.625 C 70.386242 109.1483 67.4302 107.93334 64.15625 107.9375 C 60.882303 107.94167 57.891241 109.1706 54.375 110.65625 C 50.858761 112.1419 47.032137 114.00799 43.3125 115.6875 C 39.592862 117.367 35.960216 118.85638 33.1875 119.59375 C 30.761373 120.23895 29.286908 119.99088 29.0625 120 C 29.004012 119.7864 28.29872 118.4439 28.15625 115.9375 C 27.993428 113.07303 28.281199 109.18271 28.71875 105.125 C 29.110886 101.48845 29.580993 97.733027 29.90625 94.25 L 26.03125 94.25 z "
+       id="path3936" />
+  </g>
+</svg>
diff --git a/resources/images/rating.png b/resources/images/rating.png
new file mode 100644
index 0000000000000000000000000000000000000000..81eba505b98a64f8447aa7e7e88975d612518c3a
GIT binary patch
literal 10827
zcmV-RDzw#!P)<h;3K|Lk000e1NJLTq004jh004jp1^@s6!#-il00006VoOIv0RI60
z0RN!9r;`8xAOJ~3K~#90?VWj?9LIIvzpuJ`uARLP9K=O{0EmkK36LTsTZSlEx8;vy
zlTmETu@yU#lTQ-a@)1jxv$`D1j%i1VqsWpXUs7y9j-{BAY&kM%*&=C*ka&S4D3Am|
zfH)Sri#_M+u9rV%XQrpO=U8BeBw)Vz^ryS4tE#Ks`@N&8d#d3jdWl}5m*^#WiC&_Y
z=q37RNAf~9;Q6GxMsaui0I772bl%_wLg3`d4fT+dSK+_V?Rz2Up9KI^5e||%)6ELc
zxErm?dc<VG&KcDcf$u+kW`eWVy@PoX^o4Hk^GR1c0928qU!%L%WuvHblZty2a1BZ~
zFo)_>D*Ej}xW6!0I=bN@rezfSLbv<*BIku}KqpBRnfMyrDZ{mj`-6z{MpSO0s9QW)
zzb3W%w)Jjy&mJ=~`x>Svdn6+#f@|zl`f7@I-~G?S5?rwsKtaY1&?UWWRdrvdIIl-^
zySbo!?wZ|B@~U@`7=9CK3lzt`LgDlm37&oMNYGRKDW}BYp&w>exbi_@Ug!pNijIAb
zWG=<}z<RH=IImH3yP2e4`yI#HnML_kbLfEC>dYqok2P#Od4SXl(Cp`(u1Emr0URA+
zh3hyssmiTt+2KrRpY1nB75v;OY>=zuMp1&?NmRl1npI}TJ76=cdf-!dFK9=e2fE?_
zpo+}6^kra|VD_tWotd@+LBHdu1+}1oPcU~BNq6CN4WjuoSU*SQdeG-=Nlqj_zGj$y
z9li2+6s9Y#bEoelJ#V;1)%`v!dnKnMnif}8scc?mC)sTuAdtVum%GSpc`u3HmshVV
ziuTK!WX}AnvF&7D=stClt|$QLD6a3bDiH5RwQ{qytPxAnx`HaGf|FcN-z{Ijm#<;<
zS*$*X&zI@F@lzzac2x7J0ZqFfEta_EN_GWT3;-15<aT--akg9KmcYsuZ-%VRnDW$)
z0=V?Q>`(B`-Kb81wO}o(v-o_S-hH0}5>)crlz5)?4y;~Huj-XKV!C1gaP+Gre2;a3
z%1r{hOkM~4ZsSxnTA_dE?_rb2uy(SXW<jl%gFl@mNIXgZuHPeE#_u(o<-Ga1dk<cj
zqk<~}06L0EmKa2x-Kg1TEo%f4Dm6i-`+fj-^|e@gtg7MB&DxW=tG19HdSeX)fM(ne
zZ1-4w;J}qPadt%j;IZvwe4lGX%}pw7b!X@aQbwv#zj}0E{~`R~KI+svX7hu4>Dm2j
zi0Q433X<aGCfq-`zDU;#-Ph%&%N_tairZ~j;iz{#u!oY41R1F_{e#;-i1nTT4H{~H
zZL|315Cgk^JK_qQS0#$hE3dSG@Nx%$T-Z&vWSnb(eW+aR<w&bri`NpFO?YefVT02x
zwZF2d9mgGBMY`|iYCY;8o{>LTIZTgUK~w0LH2@ADz#A{HMnrC~%1)Eh0l%v}+gA<z
zc7K$>K7c4o+JfLg`u6;K9X3im&MN1;`FBS@PU3~``*P7`3jkHb&(nuGJ5aM%l{H2s
zVWH}~yI1`Prq=~qY{T?Nw*XC@qkq%Cuhyp)N*VY4nJgn$$_(LU+lhOBmsBdjdPMfB
za;-Z{Unwn8)mU-p-u7<@{KFR^{PJe~W285}1>^Qqtw4~Jge^LsIJ=Sb3)1mrrppol
zI*RG)X3$k<7jV6(uCSiCVRJpS<-J(%naVKWqBkp#Fm&CA>I1?a=PlDdJMS9BeW7`A
zsdQNa;IUyci6omW?6WG@xU+NziO_VfH$ZB`4`BV%mm>S}1W_=ByW(n+-P>zvj!3%X
zeP-RN%YRsS*#Y3LQQT}FBO+#x4P>|LGwi3t2|w%^+Vuef`{+fR{^%Bz9%g9w`v~8o
z&@DR?)_L>gJ}kWK0I+U5T_wvF5!oxS!O3YlaB3Qx%x=Y7c{OV1mkj`@k8}$?tKU+y
z4g!HAzqc}Vc@GOOD*!z3DZIe4I>6i@$PTB(Krv%XI7%Ph{VN2ehcC_U$816A2z}e%
ziCU+6uVR-O3TDhNUCzV8%LV`n(ha?o*p8aLYFTSalGKHZD0C0K0@IU*pafQzeN*=|
z!&~1~?;cXld*m`67G5@uzV{ANMVAeMb@!>tHaACKF(Xoyg$jdvejZzVV#$*}?QU8+
zO?Kn!Fm7)Z1ErKC-FfGKJa(DBIC5D4Ko8*b^)O<@xj~hESY2g}cwy5&viWCF?;N#9
zc%9j-eVmnh-d{I4RLqz+F~#+lV?g+#2Y`Z{I8ToX+l`o8RAsB1quUP+`;6N|*OoV-
zrS|9hFLu*lj^v6Bq<Xg2CWjo6bkY3-FHQgGW#$TA^o_mu4pMUtS1ax<R=M8w87`&j
zU-Vdc-7jMOBb}CgZ6c`n7%TSta$Q<JW3KI*H1EA_`0{cEFKPhXHHzEivC_xfD9X(?
z&~<J}lPXkAa;j?ssa4ma!OZg`eO0hNrrTlQs@K)r0}v#gU!cJD!v`*(QNfEA00$1>
zjI3vfq;rEY&MU35Emfc=pE44b#p;_z35t(Bf5KnbY~gzh?0y%<q-vHSA#Tcbz97?=
z&wmPDi~x|sqa?PBv(hg(HwyfqRrZ;p440Dirhj<Dk04zE*izHLEv%&%MGfk+`ZTMq
zkGcY9Q)13}9|hLka|daCkxvr6Xc{*<iuv&Yys0AHi6pCVo$J)FUyzrJWrJ%a<0op(
zO{Rv}@P<zl<iFj#;^ppP1V~PL4`=@2x0o7xpq6G;rKrDCQ2(GG@GKdo_V!bd+X<Gr
zHJ3~;7yuOH(5G<o7f5yYkQODYmSGh(7{_e4$n{3pE>>10eA0!ascY*sul^sH)Ff)>
z>MMTUd*oo;T^#+&PZ9+AYO3q2#kU9ZCiuE*c#43tK1_?x{Dmp<`=R(^_#683#;(y(
z%x&N@Ok!3^ijYaU^p%RNcEq_#MOUfFRidmHN7k$A%5*_`15ez*t8eDoz3(Bjb{95(
zw7niIXD?%1iMWH<LN7<Z{<C%I#zL|XoXsWpYm1yvv1i1$rvuCB1dPpDW|K-`NGTkh
zru3>&g7`ahjyeK>igtU`B<38F2_cC?MwC8v8AO~R2OUAoDz&;+kgE`eRkhbu=}!2X
zu|}Lya&hxkUh@|E_xu#W+ygOp?o^MO(1N&3Y7_ah$2oTIhZehuK=F#2^Br@%AofY%
zl&GFU<ZPh9c~Qn4!-O9&Qz(*8_E0F!QM_dh#ZJ48WntG{qqxZ)ytI%k2+5?9E;{rk
z9R>nrSTKVKL#P=79a51Yfg#X-38Xg>Xx4~$zJmmA<0h@$^V9TfyN%M+w_w3~%2Il9
zvAu0mC!Nf0N*A8v>AQcKpcKF4kg<^PZJ<801!JD^J$u{;=hezMV$WME<7hDM8qTXS
z={wB00r?^nauXEC4pX}Q;PQcD$t+QjgQIvmhDqddr2GUaphsZPH4I^$AvHP#W(d_G
zgdsr&fj(5Tj+HJqAmf0m1}V8Bw#8T^k?v*v&A&)u#V-8mZzI^&Eq^{NK@2cVdMoN1
zPJZjd%$)yTQ}77j`Ksb$1<eP-WXX{ef#_M3aYW8z?YM}X_f2r#fw7>#WXWgNc;pjZ
z<Y!J(eAVd1FL2csoAdzAp-GZKiL{-eTNCtq4nxK`BUW@otr;=suo{LJgx#-bw--p4
z7igDb@vOo4p5Qy}OU6s2Sh?*-=-crVXz?t;{E_lgnTzg8D0_?ix>Qdb%5M@YaWcEe
zo%#l+ANphFrcR=5hI^vm`HCB8pq7F!OcY!>u8y7&<(#UV13M;a&soDc7se1S*g59T
zog)8+Pf%RS$|eojFgl8PZ4b$Wr7I8yT#pqN)?m#V#H><u1tKFTLxT1SlJTr|dzPdM
zCNLtUCgwc0F$Ror7+iBR{Tp9Ra`h`we+oNyxcq`g+pS-?+#xH_5*!MMgOk~cNv*@b
z@HjKi{sZI3{+{{iQ)MtL0bruw1**m;5OmHr=DC7np8?LIa$2#c0_;igIc0=%QzfSR
z`<dVOF8o$^TEwCQx8ZEPiB!Qb;J{VBuu(7@ab&$w)+(|Jw9i$^x`Ad5;#z|#x`HlM
z3mp)I-jQwet-X=V@E+WOouDOb{tPyM47AWjtIKIkZ@(O1XuwKsbX@@qCb=3(uEHb+
zv4shW6OS@~=0PT&JH-5j6V<1ROCiUC=UYoa-U?@PE)S{7aj?&b*rOWo%xsafeS=Kz
zdl#if^;FrSAa{?FB+XEl!$!pH5RvUx*(@S!5FJSP((9_G0!K_~i2@e`VuYTdP4ukX
zNoHs}iGi&+={2a9(BdRo9LE;Vf-Ni-SOcLq?^TDXk#03f7q<uTMi6fRljuP;AecEu
zVd^Nku_w52=27M*k1rV%js>q|eN`q3hWpg$A;mu8SRT*zb873e%*D++2!JZ`t=}RW
zC>v6)a|6Q5Kz1Rz&Q<A8m9(qqmWAF9i79J$cmH~NR_vr}=o-Af&A8oHBgVlNXVB6#
zTADyhlMn{zQU(8f2ZQaH|D~Ku3gZnRUO(dYB8e=vIE_Dbl)~hb%$|FK$uo~o%ukp9
z+SmptL?KzSzEOQ_&gBb;y~nXUzIT?1C<p}LflrYs&aze2-0C2&QelT1$VggA<vcM>
zHC@0skWBY5aMkr>S8gXcuoWkJH5h}I=Fs8{TAC^gJF4C79JhPDS$a52o1kbb5Ny{9
zG&_$Ri_^6Lh<+rI1+NR6JC8qogxvYZnLd4($#X~0z+zj(|0!Rs8|XKuCHPP-!FTun
zJX7HyQS>&P`)**Zy5@BtZxgsa6EINnj1x2%o{0mgOb;U)UryJ^c9H|zk@QL^&0z~O
zXlZgm*u|K_mvG$9d84#WP@M2v2|k+E>R15;ak~+307>*Ai7pVqpM8qL#1STs-NX3l
zN6XK|8d!*Q!A_R^;EgGt@9Y?*Dj2!v4$_k620Z7-RC#kYV9mT|Oq7Yng`VN{jBI*2
znUy!-WH+P5IkYf^7RS-jRD<4D?i6!e3EbKh_;L!rRlatk^reEJ#Wih|f065Ok;DL!
z=*Of6Q1TSUA7JX)LyR4JkmBsjA`6!Gb;`~88+VK{QAu;nG@i}S4~!(N^v`=@!i_L4
ztlG4f;hk^8?b(dxrpxB&%s;jtrBq(vs4#uuSTuBvm}6qfMX!a=qxqXrT-k7XJG4}P
z-{?G)F71LqOKAQqnm>!325~YZQkz+^`({RV`{X7b<;4Ae%<TE+s^#V#>Fsl!J0SR}
zN;NKkS`-1#YN{A=_}PIzuKB)q<Mr+!m_CXB+<h^<XhC7&M+B{nfI)nELyBHsq%AGV
zE`^pF`8T@Wj-cDI1eIie4xK%Uojr=UU8K7<bM2cy$;9{mf~W8KV{~EixL|lqxSX+5
zlmg2IYjvX9poN=kyzy-~P9Ofc@1Xg!EcuU?MD}rMk!w;(3tx{r4xdM_cWP^NkGS+U
z+Ad4v&7!mho0et?E<A$HO*6FXXK<X*?UB00uFT1k6gA+SVvk{&um1ndD&tRn1DiX(
zQ1KG^MU%AXUdu`<(Q?g><EYWKW@Xx?rPQG@$I-Sg9E;XZ#x&eFN^7z?z1x|6=Fjkp
zwf{6as>i}jHMbum@B+rw=SacwL>NrxPMtu5^DPm#Br6cNN1OhdYGj-xXrz&iuC=Pa
z-TLCrLuINzjp<`Myjp@KFzGc&%H{aC{}db4E{WAYuLA(UNtg|kBY6|tnG7_jzTY-)
z89#mqk;3u|z0Ho+xr7FcU9$Oa1%#ynrOi!f)9gq4)u!pM)hW2E-@x<}|BadRQ`O`|
zpoN`*{hdVsxP6q8SWfu?hm5a}hX3{Q?2}JZm^};x?KhxR$C`C8%&blerCHlArmjYP
zi}X*Lm2YNo+60BW@^u9B_wwX7KSxlk$+D}cpNh@|Ru?|J8vLs_P%Igp=u&xYQN#tX
z0nE;vVQ|$j&__!W;>Y=o#_XZI)#PaYcIE<}&j}&D$ZH|0j&xep5joaynzE=4r*|99
z;98E{`A(+K)}ElItxjD(YWGKPRh{b_y&FF{#aZ9-Kw$OH!jpir;{{F~`A5W=Y_0WS
zq{pE&sx2{TRX?eVeP&B*(kiX8wfZM$yPDp@DlGE*p?)>vK_h-ilf2?~<E`D#+53Kp
ziD#>GJ%~ca=U+#K50P&Meda!b<NL`K&~w><8;c^tm3z$1<mm1$lFD8Mw)Kjsbu)b7
zecW-QyrI2>ANMxW`gWfWv||~TVm-oI7%A7t_$?~i%o;@UNK6_S5*y!6?zzwL_+5WN
zP^^{hFX@+deK`2fan;w&_6z8KnARePr;PozOrY}>AO<Bk`tXw!<{knmwbcGN4Q}YW
zT4<59tKre}M%vtt7B|Wht*@PcG_DVg`V+^?s0CTXOGvE0onY=R4*&JXDdn_URhQMV
zaVNMfPWTY>)vyoVjlN<V#eo#FD*8-9<@LUZ0SHRUh4C{CUX{d=A+QzotVdBpEsmzO
zum)AFkJI>etiV#A3pCQgi=mjh7FmO4S|2Ib@VbJ#^3{+$!y|wB^URFZ9uFq17J3rF
zwKx4b(~X)LYs>FEgnj!S@+Dz1<%qG!O_k8GR8Z!o&N8&78==3}m=?jAG!l3dz%<*}
z-R@#sLU?PGzEoEc5=_%a=uyGP0U>JUqb7f0@AU7*9m;a#D?i8i6SK8+L3@4PzH3zP
zZ`S;V&iwQD6TE!~IX|JtyI{aKvZHeE{9HgGf0n^DeGv543N_K>hJtP;>;{@IF==u6
zno_Il&8$Hiv4%ti)~fzEVMnWu<S||A@z(9-*x&tIPCk07`fN^sVQb%W-AC*nHts~3
znnrV9IE4S$E0_yJkM@Y}C}7rBuFX%E5NDF!6+;koHP`BPKoM6ilGZd3Y-bp_RLyRb
zH!7e8^$9F~`J^4~XTiNU$uFs464$(mv)}%YJoQhH)V&xosQO2{KVpA1zUIYkY)T69
zrT5U=m1cLJn9p7iv#DCxz{cIftlsnrv@lZZY@8Vloi~jIqU9?|jQ|xb*Q{sl#spDn
zWTM+SUO3kV=?%@hE}ZJ(JmL-DZT?ZtKlBS6{@ZWjhu)j*NB{B0j|Si0R=;TD@8JUa
zP#@EUB8SHV`|q;WUZ_O)Hc*~^_#Ed?-H%C5#7D%Ds5W*S+aeNQG;<a%E0KMp$i30^
zcE<m$<_6jrX;hc9TEa*`;`rxg@izY`Q%}5~!{7LOO8MG|t6S~KSuc21yTXSyza4z{
z-RN7_P>?j!sD3x&$XiN?2cQ-v&dks~G)KC31pLtA7%?V(Q7{;nx6OFrqGN$(=Dk@E
z*r?u!h0wUR#95I>b;P+11)MaA%|F51iQnYGJO7H}Y%NQd=+x?j-Fp2fF@K<FOl^My
z=ip=L+t!gUWtnu1KAJM}<`UvS`Quv?XQ%03l_QxN0qZr710rcHM+l8#2w2SMb_2n7
z+(9d$HR#`>fUVK}Vgf#GkdC+{HvI(oiBI#LFMXc;R4qeVG}n`{*KYfD#v1me0WIkx
zNCzK7x2MPtk8mE<r;?7mzGTIq<%y#4GZz?KTOi@BsLd8OiwB4a2HUU%4TazAINFwW
zXeoIk_gplG(I`%cT7lSng17E=O2s=l^u^z2?wnThCWYck?CxD3;@A>f3S7kJ<L^F(
z-G3eVjNwee(KANOYy5?8>iGqjI6KMECdD&@pfT0Q1%r(=K62b}VyKlHXms9eYgM*Q
zYMC`6^t${efzy8r?uuI}PJWh$?)+a&ovi&a*cI%E6TEuoZ}6S=TU1XM^%=A~4`JWB
zj$A3pb4jDKM$GpI3m_=vl?&&_7~TX9L!j|-<|4+#nfgXre=!!Ik-v|lrTFVsqz!Hp
zts``95-V_4ydEdJ9skKMF#Em##F2*|X8f5t;p^%YKYHCq^lz5XnniTc-@!Zh7{Tke
zlk*HGvaWbOvad4KDb6X=lM{?w<6we8s_Gur{zjVL%H%f#LbR@EzDiQan#68W+v*Fp
zxObQ$ro0*#G>i!Zlih~1;)f9HQ2g%4C>{Duip~=pn-I=^Ppf4GSdr5A?D??$qXu_x
zn=a`?h+lq?VE-QG6QbYCCRkClvc1y3{IsQ5xWLfW3B(^LCpOZ;D6`v)3t9$(jm-X1
z=pxL2-6){Zab*PQ^x*W}j5F~4Xl|U+q4(oI{%M#wg$@X(3WigUQa{U=(f_*d!}cRf
z>`5#wl^wBl{LL?tDdyPP*DW8LDw)@WCq35h$*}o`eK5bFHtP_hcI{3gRn8a10!wiR
z&C)6qXXfJqw77jmStDsR?N4H|dobxu=+tBQ&-^*TnS0A){>op+v%+)6;pqKZezdzl
zE4%bhZ~U-*N0XK>iIxKZD9Go3obK*^b`56be@+z4&Z@aGu;bPJtX_8unjef8K8w?u
z0ifZq5iSl0Ejkx_-Utk#<GSfw6(qF*)3pnN62a-O5gb1V#ksQ37v8wXf+U3VJq}OY
z1BKbzjfS$^^YV`buUKLan$ofW0ALi%bw5eJ?dD}8S@YS+ysWHTa}sdl+peNFvkxt0
z+td6?dc<n96%59?6JoBlLs3C5!3tnfNNO{ZSO@+bcH(~QsV}4Rr)!HuRQ3z&gQ>vm
zu*;+W1ao7xoAr0;(O3S4-Mqxsx1?nQ0F)P(V33t*!>iL>=F_tUsZI-a4GK5EZ3}K`
zPkD}@BG4voAqs+~6GqW}6H5>if7M814;up^s^y8bnA8?j5|DeA;M`ZR=kJ4{P!o1U
z<HG<b4H}+!+HmIa+6}uBnj7@&`Wybs(kFUa(XxIG``}~fZC6tu%dDtQdyaXfZ_Dg0
z&4VAzvtm;gE%vn5{)pyETzVrn5hMI?)W0Zym?EyJy2a%K#O=i-H)GPfFp0I`Ph(Gi
zp5XX@$Bz9i%pXT8i+Y9I6*D}PZsByv;n=rp5x?hRR}9!!-S8of&;eTZ&!HcDjNnbz
zk{5^RtjC&?$~AVOGc)IXhBlQ*rB;A-7Ynt)CJ-M$()>79cx~RZnm1fj9E`UbNo_-t
z8xdy!DNbO=zlMG8GuW{&p_AW43(qYo-_q17%=fz-`6nn;=J*9x4(jN>59pt^Dt@uF
ztS+EJ3i7{yh7r%@HN(B~I}@|AqH=9yv&X(S+=$MuD<>H0{d-jSVK>l9^Fuvq>+x|@
z62$2PGmJP{)Rw^dkbfGTxev`9L9MO#<Dug^^KL`w(X@qk_9Y`6n|FBPuG(w+!+m=A
z`#-8X+iC5^Qb)dgDWG3Xb0HV-<k+<SWi}=6nlF^ue*T#fx%>%IvJ$lQ?VC=@#RPb5
zJtT@&WHN{u0H?ov9^@dHMGMCu_YG{}X{uwfkpCB1I5gCRYRY?o+G}B^R;H25%BKxy
z9;uBSTn9~y{%}mImz6s9>C-oSg5saOn=!|7+;eq8^D+qF7vcQzIo58PMGL)k%xs$)
z`0dy#1dt?Rvfy-sBtd;d3#iSbTELXX(9(n0!c&MpRV`Zg5=Z49ww9_7M?q`|qzl5Y
zg22=!aH?q)(1fj=fxIJ3J!h-;9O%{i_kDz?mcW&ZqK*MTKtGo!pYgabIjzqaBZHM#
z;Owzk)^D4|=6h-;yIEz%DRW6c%)+B);~<g%lLRpfj4grspvAI~@lk70y#S@hQU4V9
z=Ru2gFKt$2R2OUL0w{H%GZiVDNFhUcQHO}28;(pQWF0E!Nnvh=x;J|=N&QqamMj-_
z^y9}W1U4XVmAURD%rC5_J~QT1$~}uSRao%t3T|rQxNOJ82x?r=c`yQ6LZnn?oSg#^
zlpGpNBjkZXjma@_MO5p6=BSH|=__EUzMxYPbfv1WwukN!r|}g~)c^ofCu-%5Kv#k<
zE`ik-O&tY5%0tD+VzHi4RV^F(QXc%=Nf<v~)}V^e9I65ovyfKVB$Ot#bcqPORZs~r
z`zTce6yAp(#MGJmid!HAg0Pi{(fDYZx<WCpRV(9o*lT}<nRa-5snk&b<V#4ZqSGPH
z1qDT`Ge9g7PQ%eaM97Qsg@Mvc`0W6ovX6p5q3-p*kk5w(8XrB6SOF{4{>lY7=K1@O
zL`MPO`iL4_3~u%PM(P>V&Rjq&Ai|cQ9pQ)O5f;r0gh^{@8q)kYkqDGucZm4>s&<E>
zf04yjpzcKwQ{#^fnukQoNgW3O9<Ey4%JM=0Vr1bh*$_pA9ZIV^ZY12O@MD0`@P*XM
zc}Vz+{C-84;VnGeiwpjut|X>CDrKK9PK`f7@cezutD^u291IrE7~Ftb2aQW@iMX&(
z!W3pf8x|oB5V5X;m_3p6h~O5z(H$Myg?)ci<3qryvjU-hi2*lJ<B#vh*@(_iN0DX=
z7*&!A$smyGYwm6$#tcWdh4hBPj+ATY65_zIlr0W;joeJDc7`c@?klP8+NW^+01p3K
zy4qs$B~oVr;NYR8)snK6&mk$eDJcj1jkF&cGIUA7V4T3)1ww;7i-6Vy9PP$88bB&5
zb?#-Nm(>@;^QBV905A&11|(53sw4`bB?S^)iW3g1;usdCXnKqlSR{}*O?9XWQWh*n
z-Bz(CF%}?J*x@|wwkz5fAJW~m|MaVZak#DxHeWJz3;?V4;;1EKl`erqu@w3XQ=g!v
zChn~nvzjrqQCe7o;{+TxzYv~dh_N0`?;GW6BP+K+T3T=cgcjBib~P`xJk&7&%q4Ke
z(1nsoBuQ1)x>pNbedXC&pe1vv)XJnn{De?wgwRy*&4AD<CTIfGmQN)Y(nP^a)p%rm
zwzSEaOQMbeV9gpF$CNEV(j^f#Z|Pp3823g5jkP^WVF85lHk~twT7{^FH}LQ6`tzpw
zTJ%W*C7G=Sg2j@oZ@}7RrOsJ^60Ry4l#Cd>C;&A2DS<d9Mb8@nB5V;=0672v3G_)s
zK~#2|2`DOPhs6MhOA7-j;`3Ynzag%FF=T8#07@lRHZQR})G+`U!LgPuAmbQJ6#!87
zci|YIT{jTUAH(c8-yb`4Gn7AA6><%+L10|jW}1Bo<XA{{6S4sGUs{drQ70b<J17~2
zF4giMNC6V*1zuT}_l2}}01*X6SaYFn0!Ro=^HN{uqAA^GTHhu}JW*0TraJXg$kSD8
zP3Nd%7N8X1DrBsZu|aLEgJicLL8MPh>0&D;SEzyCuZ!$sEJJj}(9Wb!bW$riztQyB
zL#4UFLU6pmr(6#2K!@jPpo2Ha`FQHng(Y1q)dRq(tY>ox*p6j#i=eBmS;1D;f`~xF
z_qh}*DcQjq0DK>Y-^;T94<Je%0l;Xv-lT;LN}^Dx4*(q-lDtOt7eg`3Zy1d?8Y(s#
z8n!a?OQi<=)FN~Z5S<L#dx>{zNu7xS_G64oN`<r<lKDcd-`#@(g$9qimjet9Xi>CY
z8Q(*_P<7$KPAG5H@7GmG+hAz8;#c^d1trxZ_4xxj`bIEx$N~(U#1TtME$Ng?(uPpo
z-7jc~rp&0@H@L1B8#gp_0~J?cs0T~5Y&+CuKkD`YB*H^M%ccgUmyJ3CfQeq5Qb1ai
zl;`1vzdw)}(CX)a7cG(%rKK{l6*Y?o!Y(14H+md*|7h~Xv|E89B)e+7@_kmd>e8iB
zM*uL~jS)l2TGFn;3kx~jEl|){=4c!k;y@B}eMuGpnp%LS!fqtoW<qZl=)TrH6)Y8H
zxQ(7%5_JRsu8Si|3MFL>PW1tYYI(AwG7H#<G0lY9jG3XdMGG^A_F@d>SIxs)CFE~#
z&+tBSttD`yWkc7(*1}jwW^23(d=0I`+ZpOe5IE=HB$bp3DWN<I2!KGMOVC1Prez^L
zyl<4Yz^v$-WKAtZCCv!d7>++F96t(AJYYF@$}%@y{vt}I8~O%?jXMqNw+WkeLU*5q
zz{b|ONZ>JTZAakIYbH>V3onKM6#dS>6w(m@*aAkABrqg21b{?_a%V%YPFIE64nrg9
zb%2PLu^>sq*>i^P9fm{y&vN>hWo}vtf(6+wl$oA@u@lNu4?-#hJp;n_TMWB@z~$<#
ziX)ZpCAMf2EQ)Jlmxygp*0GT4t(7ShsQboWC#WL;&;$m>Q;S!b1q4WDL;k<PRu$^P
zP)rw$MVx@ERwP_jAn+WXeO7q*9?KK=1WcY$N&)z`yr`vbAyBAHM@Rr_z%fwF!{iy|
z{{I{B$Twi!4#Ugd?6P62l1*ByZx;*LHtrAM>nv_>INbzFrq@&fkk5DaolWQv0K5RP
zf`fyjq0W0rsvjR)lGeuoByujUQ1BeioD;tLb<5Fj2h2=pRp^zWRYmZsWr2%>S|15)
z*(g~D%tD^RJREz#^4!Cg)mw#qKjg4sn?p|~K<8{*ZEbeG2p|C@QZ-&Iyd2t5>MX*v
z-qcoyUTq3BmQhlFE@>g3aG0DDzVnrU@7<^5X3K)FTJ428lSy>lDvtFH@x`pmM}FcH
zOy361Ew_;9+QB=gW_jntd3McB<LN^CRm#({&poDm^@z{NM&X7x88+SEkxg5esohB9
zEI}jdz+xU?!SQPR3Ih1*zo+MopJ@EWk4vSF0HEZc2_IEK16y+e;iqn~LXI)(6~;;^
zT@{?Hq0r?pI|u*tdCODx`4n>CYmI>dxDKqmin0C`{PA@=`PF^zq7-iHK?n}sOOAW_
z{R8kn<+Y@)*~P!e6?osdGhFTGC_l+ua7X8#R=)Ay1HQA$vgd~!cI@}?d<)Y-`R3u}
zUz%Bi=o&!_n{la&8`5cd;Bo^%4s|_z6kin8g=J!p2AbW-3WNZmwme!hgAPHDVeAat
z|Azs`zGDe&nfKxPy0Qc-*Ymd{EBM(rj&ib{cHa&GUuO>Z+~^Mev}X%_tGDwT=TGx5
zC(e<n#w7-(Pb=U0jL&!f9Cp0cVb2e{Bt(%57CRNQa6}VppdnB=9<@K20trj>)tP0b
zjsPI#6W~!0B`-u=05mu4ab`XN3_I8X!?_c1=(7PQ9xOB2YRIB03=A^UJ;*2jYLfRI
z{3NxtRhLFOdYB@IIXAkWcc)AI{MOz4SZ<DAA3Mcr39a^#pN2z!5%9=Y1Gc}`;fA+5
zc-;nb!D1(DP1{2n-$?thAQ20YbSOj{x17`w02GS^p2wUx%!#17u#RwX9!S(?y6Q~3
zE&JQ>2tc|GcEmux!&CPKJoIs&v7=glGcDs~)jG}&t>B|?{1~5D&c2M^jStL>ev419
z+RL9@`*L1hwA^v_8FrS63!_ji2aMnSQozG^25h{=aKl?2`nI^3$pAeYfKN-;APFeU
z*5(0>1AEh*{g-e@0FZ~037B*YlLA(?JnEkwQ%tuIL`^#j=N9n5S?%DDIOHuncxS-l
z-zbuw)<ySmJ#^$M9_%0DKfK{%{9`BjQlU|>9AaJ%@r@7e=ce_mS)1+WV^e2&^UNe(
zC7)k{C%<WV@|%{Sjl%VBaoBWo0-3bPq$L<{u&$5=O$irH)cz)(O5jJo^}F2EnNh$Y
zivG(OHx8$P0$RRLVWzy0w9_lt8Lc7T1!t8*X@$WVFdRE%dF-Ii@ds?3Ijm4Bjjr0j
zKXk3+-9PvN9`8sW8jyf;H+kUb=nme#Vhep+uH%D~W4vSRx%!#A^G_*%`@25h%)*Y>
zIqZC`OLjBpn1!>JV9eIJhEkVd>}XK+Y3U@hiyE-}{pk#S^ku9_c)W4N5WjMMimjFO
zTi@-n<0e5Tl@g#mf;V7z_94q-e;07<+qQ10lrZ%UkY9NfUmRM=dtUZF#yi#arPAnr
zTykXgzKoyF&GOzeCt2&4;`0uy6Sln4VcV^Su0EkOZZWyCA2X{RKKoMz@(V`)>NVW+
z)<0ocmxhf}#{h80Eu;g7>k=NfJ08DKScqk_L&CMUmgj-3597y`Gml#e4Zd{a7+84~
zCkBT3t=Hbc?{})b%R!@HdTt@PW;eSM@Y`pf;}x^hjbDAu4hbWhg|5D`7-q+nXYQ|A
z{OdOJAAa!D%Nx@~sbc^b1(V&&fK774hBbWV*>kLIStvwZS^QNi`DS+yzi{gvJk+6P
zc8X+l2VQoVo-I3g|I7v6aqcvOb_w3RznA%Ue!24xgE~6ZcmSLed+8q-X0Igp^29XB
zX8BVo?C>B@b@%egnf-kH_S-Ld826%~(FLop;X3vh%jon4ZW$k^yBQc#N$j?byyezU
z@|8}vuj6ygqZPBbhOKUb|28?zP5B}TwMfFl@5(Zf?czI`G=Da^pU+<jLT^H5bO-KD
z-^Vqoyen5=|J)2$7k!d}k4YuSb!T~^zn`DI^%Iw!)q9CvqL=6;dWl}5m*_I0{|^ay
VQeW-~U1tCQ002ovPDHLkV1l%VKLr2)

literal 0
HcmV?d00001

diff --git a/resources/images/star.png b/resources/images/star.png
deleted file mode 100644
index 6eb1fb890f14077dba504f54c8f9affd15561496..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1737
zcmV;)1~&PLP)<h;3K|Lk000e1NJLTq000;O000*V1^@s65h0H)00004XF*Lt006O%
z3;baP00006VoOIv0RI600RN!9r;`8x010qNS#tmY3ljhU3ljkVnw%H_000McNliru
z*$E2)AQnSr`SSn(21ZFlK~zY`Rg`ONmUkJ4um5v;&u!nHTiVhChzta}v4Nt(0$T>t
z7>(5NVKFgS788t#CY#1cx0oz5YE(>AGQ$==I3aON3EK#>;M~B3lM3TdXiI4+J-p|`
zb9kTUx&6>(iTC$k?%yG~V4E0-BoS#qBqfL;NJNmxC}JQI*~!U>oS^^QGaSD09LMD@
z+w(Skh;+#g>g5H}@>iN=f_ACWB|?`7%S345o)AM0L=mY&BsD4+a*~K3rXV^yB_i`A
z|N9cpo!ZZZbKmEh`)htE!ZJjfjw05`k`aZ7j_kIOoe^X-ib!0YF4H!Xz$k*%2Kpe#
z3do~I3$RT)#KzdLT<kplQ+~U?hh=5}H}5d9{b}|d`<#@A{y>dKfKH$T1!#Z{Xp~S&
z1hWKn_n|q2L=7r7%x+Rr1q?=%b)%2HlT}8ZdYW-O?01fke)KLsYI;rneh4aQ2!|j9
zK={AYP-3Q(6j#Aw2V~AdcJmPlo`)C^XA2F}^X%;E#-N=j?dUDtgf{}7V7*G*XP;SN
z#2jOnCG0aqozX>IG(@3`LN|bVz(tAEuuT=dJ1Rmm#w$w(f|;{?BihXBwQHF|2NbOS
z3{(HP&LiyN*%z<u_$BOyr38W^906UxD1tJ8U`d9~nO;K_f%+`kA@-@^w+^iuyTG3Q
zZcMt!ps)&xVtqH+o<5e_i+ua%&d{ISC|(TQ1USQjgOcFmhe`>%+WFGS1~$<<t<ij<
zOzLuxWZI;AV;dU=n%FqFiFsBsx9C%yb13yyapJdFQMu0Gn?+6<P)dZ%H!JS<2TZjk
zc|bshB({3@Sze7d(&pB&x{{*VZ6NR0k!?xiZEQtvXe5PBlo*9co#~8AzBNO<yFd>v
z?O~4kT%NdJ!3oObatlx?;3jL_Pn*medYa`lF|hq{vV5L#96H<pD-yx;r|3Xi>Na$d
zKu41rks4U2rH!-}5>5SNnIS?B)!D~2?vS`WNn0X{qrd(W7OsB8=311VmH|Q>dJPSW
z7>im3aB=altAbzg(ZnNaIYgr_mgxf)34{z)icXaody$1Rx6$Xy+*<b-N0e~1o`XXx
zPtEaitedT0_&S#`FiO{HPxW$vSz@ALlyssMorv#aSuVsX1ThaON6-re#B-P)AE)%e
zWvuxOSH7}^KkqugjIxowC@k=@T6wC(5866evwi~^8Y$oqdM>yM73ic*MUxQ_4?V>7
zal#t53NSMTyiA7dm7A0&rn&jdqx|u$7nmB^f1q7mL#%_)@H;5*J+<c8A}_7}6g_L7
zU<{39RuR}CwM310Lya^(b%dw_I61-YEmQYk0rU1feADHhn+7>T2iN%pwt5>~TO-Hg
zzZdLI(D~K7H+S#h$4xMMew44#N~#iJnQPFfTy%=~<Zvkxh7PLYW8hMgJgJV5?4}L8
zPYVmY0XN=GnS$iCZ}HVOc)Wi0Fcv%+@($5@i)w!#)xcrEH)*YD)JcF&1)nlCD){1}
zya0nm%zPdO;#^~Zan9rN>NfY&6pQN1vrS5NlT=3~1llOyE{j^ZZuTH=bhMGClPO%l
zq>P5<xtM-{9)~18Z4_aq#s}#lYhpz_^(1?Sw#wOKLI;qS2dY`4AN^zS)kt~)b3_OT
zu*jiNz+%p}@v}L!LJ6-}!pZ0Had1h2X;6(@6#W2j(AO+sCjf*fqOlqE38;$4aL%wS
zUnP!%&m573dzcb%FYuTen?lAW2?CoNcwA+U>Rg6|=OI~}dd|~x28>Uj-GFCMB*81E
zh39wFsFP|~Lr<B^5$6Fl9M`3I{vwUGgPF9r<vV!qP13$1hW~IU=VDPxR-9U$iG@vM
z6g&kX$FG?5qZ_Qk#~H5y=6+t3iF-7NO{8lMYb=L+c%M2uWaf)~{Oh5;oX4Yj`2Y`(
zPp~aFO=oR@<nktBIh%+c(r7JJB(!t|tiv@Z1H;2M5M6tk=zNwstwwx)5q+jWadRJk
zS@jeb=%T`hz`rRC#(8t_A^yQtHXJSS%>|vtf=$x(seg${6M8kaG`p0r4DO(@1GtGc
zydLlR->9H7(b~?r*2j3Ctz6<G98MtkR2cb<1KUk_Jxj~a-W&Q3!X!Il4ly&Nc$i-A
fwF+<-xP<jTK4l8oonlKf00000NkvXXu0mjfX%{Ep

diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py
index dd1b14a736..a44a18db95 100644
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@@ -8,16 +8,13 @@ __docformat__ = 'restructuredtext en'
 
 import sys
 from xml.sax.saxutils import escape
-from itertools import repeat
 
 from lxml import etree
 
 from calibre import guess_type, strftime
-from calibre.constants import __appname__, __version__
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML
 from calibre.library.comments import comments_to_html
-from calibre.utils.magick.draw import save_cover_data_to
 
 JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]'
 
@@ -51,26 +48,24 @@ class Jacket(object):
     def insert_metadata(self, mi):
         self.log('Inserting metadata into book...')
 
-        fname = 'star.png'
-        img = I(fname, data=True)
-
-        if self.opts.output_profile.short_name == 'kindle':
-            fname = 'star.jpg'
-            img = save_cover_data_to(img, fname,
-                return_data=True)
-
-
-        id, href = self.oeb.manifest.generate('calibre_jacket_star', fname)
-        self.oeb.manifest.add(id, href, guess_type(fname)[0], data=img)
-
         try:
             tags = map(unicode, self.oeb.metadata.subject)
         except:
             tags = []
 
-        root = render_jacket(mi, self.opts.output_profile, star_href=href,
-                alt_title=unicode(self.oeb.metadata.title[0]), alt_tags=tags,
-                alt_comments=unicode(self.oeb.metadata.description[0]))
+        try:
+            comments = unicode(self.oeb.metadata.description[0])
+        except:
+            comments = ''
+
+        try:
+            title = unicode(self.oeb.metadata.title[0])
+        except:
+            title = _('Unknown')
+
+        root = render_jacket(mi, self.opts.output_profile,
+                alt_title=title, alt_tags=tags,
+                alt_comments=comments)
         id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml')
 
         item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root)
@@ -98,7 +93,7 @@ class Jacket(object):
 
 # Render Jacket {{{
 
-def get_rating(rating, href):
+def get_rating(rating):
     ans = ''
     try:
         num = float(rating)/2
@@ -109,16 +104,11 @@ def get_rating(rating, href):
     if num < 1:
         return ans
 
-    if href is not None:
-        ans = ' '.join(repeat(
-            '<img style="vertical-align:text-bottom" alt="star" src="%s" />'%
-            href, int(num)))
-    else:
-        ans = u' '.join(u'\u2605')
+    ans = u'\u2605' * int(num)
     return ans
 
 
-def render_jacket(mi, output_profile, star_href=None,
+def render_jacket(mi, output_profile,
         alt_title=_('Unknown'), alt_tags=[], alt_comments=''):
     css = P('jacket/stylesheet.css', data=True).decode('utf-8')
 
@@ -139,7 +129,7 @@ def render_jacket(mi, output_profile, star_href=None,
     except:
         pubdate = ''
 
-    rating = get_rating(mi.rating, star_href)
+    rating = get_rating(mi.rating)
 
     tags = mi.tags if mi.tags else alt_tags
     if tags:
@@ -153,8 +143,6 @@ def render_jacket(mi, output_profile, star_href=None,
     if comments:
         comments = comments_to_html(comments)
 
-    footer = 'B<span class="cbj_smallcaps">OOK JACKET GENERATED BY %s %s</span>' % (__appname__.upper(),__version__)
-
     def generate_html(comments):
         args = dict(xmlns=XHTML_NS,
                     title_str=title_str,
@@ -165,7 +153,8 @@ def render_jacket(mi, output_profile, star_href=None,
                     rating_label=_('Rating'), rating=rating,
                     tags_label=_('Tags'), tags=tags,
                     comments=comments,
-                    footer = footer)
+                    footer=''
+                    )
 
         generated_html = P('jacket/template.xhtml',
                 data=True).decode('utf-8').format(**args)
diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py
index 519d533ff6..6c50a71b92 100644
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@@ -376,7 +376,7 @@ class TagsModel(QAbstractItemModel): # {{{
                     'series'    : QIcon(I('series.png')),
                     'formats'   : QIcon(I('book.png')),
                     'publisher' : QIcon(I('publisher.png')),
-                    'rating'    : QIcon(I('star.png')),
+                    'rating'    : QIcon(I('rating.png')),
                     'news'      : QIcon(I('news.png')),
                     'tags'      : QIcon(I('tags.png')),
                     ':custom'   : QIcon(I('column.png')),

From 95a02d69ad0948df76aafbad5eb33ae2a6a2a661 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 Sep 2010 21:03:40 -0600
Subject: [PATCH 33/43] ...

---
 src/calibre/library/database2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 8a5ab75c3c..2df6b3bdc4 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -597,7 +597,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         return identical_book_ids
 
     def has_cover(self, index, index_is_id=False):
-        id = index if  index_is_id else self.id(index)
+        id = index if index_is_id else self.id(index)
         path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
         return os.access(path, os.R_OK)
 

From e0afe753ccaa45e2e7ca6da602132bf6d25ef0db Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Wed, 15 Sep 2010 04:55:08 -0600
Subject: [PATCH 34/43] GR wip

---
 src/calibre/customize/profiles.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py
index b9a159ee7d..97b84f1286 100644
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@@ -555,11 +555,9 @@ class KindleOutput(OutputProfile):
     periodical_date_in_title = False
 
     @classmethod
-#     def tags_to_string(cls, tags):
-#         return u'%s <br/><span style="color: white">%s</span>' % (', '.join(tags),
-#                 'ttt '.join(tags)+'ttt ')
     def tags_to_string(cls, tags):
-        return u'%s' % (', '.join(tags))
+        return u'%s <br/><span style="color:white">%s</span>' % (', '.join(tags),
+                'ttt '.join(tags)+'ttt ')
 
 class KindleDXOutput(OutputProfile):
 

From d568fbb3d82c593ba2e496e905f6f400fd958764 Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Wed, 15 Sep 2010 06:56:44 -0600
Subject: [PATCH 35/43] GwR apple driver bug fix

---
 src/calibre/devices/apple/driver.py | 41 +++++++++++++++++------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py
index c9bc04a242..5fe36faf75 100644
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@@ -207,8 +207,8 @@ class ITUNES(DriverBase):
             for (j,p_book) in enumerate(self.update_list):
                 if False:
                     if isosx:
-                        self.log.info("  looking for %s" %
-                            str(p_book['lib_book'])[-9:])
+                        self.log.info("  looking for '%s' by %s uuid:%s" %
+                            (p_book['title'],p_book['author'], p_book['uuid']))
                     elif iswindows:
                         self.log.info(" looking for '%s' by %s (%s)" %
                                         (p_book['title'],p_book['author'], p_book['uuid']))
@@ -303,7 +303,7 @@ class ITUNES(DriverBase):
                         this_book.device_collections = []
                         this_book.library_id = library_books[this_book.path] if this_book.path in library_books else None
                         this_book.size = book.size()
-                        this_book.uuid = book.album()
+                        this_book.uuid = book.composer()
                         # Hack to discover if we're running in GUI environment
                         if self.report_progress is not None:
                             this_book.thumbnail = self._generate_thumbnail(this_book.path, book)
@@ -732,15 +732,15 @@ class ITUNES(DriverBase):
         for path in paths:
             if DEBUG:
                 self._dump_cached_book(self.cached_books[path], indent=2)
-                self.log.info("  looking for '%s' by '%s' (%s)" %
+                self.log.info("  looking for '%s' by '%s' uuid:%s" %
                                 (self.cached_books[path]['title'],
                                  self.cached_books[path]['author'],
                                  self.cached_books[path]['uuid']))
 
             # Purge the booklist, self.cached_books, thumb cache
             for i,bl_book in enumerate(booklists[0]):
-                if False:
-                    self.log.info(" evaluating '%s' by '%s' (%s)" %
+                if DEBUG:
+                    self.log.info(" evaluating '%s' by '%s' uuid:%s" %
                                   (bl_book.title, bl_book.author,bl_book.uuid))
 
                 found = False
@@ -781,10 +781,10 @@ class ITUNES(DriverBase):
                     zf.close()
 
                     break
-#                 else:
-#                     if DEBUG:
-#                         self.log.error("  unable to find '%s' by '%s' (%s)" %
-#                                         (bl_book.title, bl_book.author,bl_book.uuid))
+            else:
+                if DEBUG:
+                    self.log.error("  unable to find '%s' by '%s' (%s)" %
+                                    (bl_book.title, bl_book.author,bl_book.uuid))
 
         if False:
             self._dump_booklist(booklists[0], indent = 2)
@@ -905,7 +905,8 @@ class ITUNES(DriverBase):
 
                 # Add new_book to self.cached_books
                 if DEBUG:
-                    self.log.info(" adding '%s' by '%s' ['%s'] to self.cached_books" %
+                    self.log.info("ITUNES.upload_books()")
+                    self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" %
                                   ( metadata[i].title, metadata[i].author, metadata[i].uuid))
                 self.cached_books[this_book.path] = {
                    'author': metadata[i].author,
@@ -943,7 +944,11 @@ class ITUNES(DriverBase):
                     new_booklist.append(this_book)
                     self._update_iTunes_metadata(metadata[i], db_added, lb_added, this_book)
 
-                    # Add new_book to self.cached_paths
+                    # Add new_book to self.cached_books
+                    if DEBUG:
+                        self.log.info("ITUNES.upload_books()")
+                        self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" %
+                                      ( metadata[i].title, metadata[i].author, metadata[i].uuid))
                     self.cached_books[this_book.path] = {
                        'author': metadata[i].author[0],
                      'dev_book': db_added,
@@ -1406,8 +1411,8 @@ class ITUNES(DriverBase):
 
         for book in booklist:
             if isosx:
-                self.log.info("%s%-40.40s %-30.30s %-10.10s" %
-                 (' '*indent,book.title, book.author, str(book.library_id)[-9:]))
+                self.log.info("%s%-40.40s %-30.30s %-10.10s %s" %
+                 (' '*indent,book.title, book.author, str(book.library_id)[-9:], book.uuid))
             elif iswindows:
                 self.log.info("%s%-40.40s %-30.30s" %
                  (' '*indent,book.title, book.author))
@@ -1547,11 +1552,12 @@ class ITUNES(DriverBase):
 
         if isosx:
             for ub in self.update_list:
-                self.log.info("%s%-40.40s %-30.30s %-10.10s" %
+                self.log.info("%s%-40.40s %-30.30s %-10.10s %s" %
                  (' '*indent,
                   ub['title'],
                   ub['author'],
-                  str(ub['lib_book'])[-9:]))
+                  str(ub['lib_book'])[-9:],
+                  ub['uuid']))
         elif iswindows:
             for ub in self.update_list:
                 self.log.info("%s%-40.40s %-30.30s" %
@@ -2804,7 +2810,7 @@ class ITUNES_ASYNC(ITUNES):
                     #this_book.library_id = library_books[this_book.path] if this_book.path in library_books else None
                     this_book.library_id = library_books[book]
                     this_book.size = library_books[book].size()
-                    this_book.uuid = library_books[book].album()
+                    this_book.uuid = library_books[book].composer()
                     # Hack to discover if we're running in GUI environment
                     if self.report_progress is not None:
                         this_book.thumbnail = self._generate_thumbnail(this_book.path, library_books[book])
@@ -2844,6 +2850,7 @@ class ITUNES_ASYNC(ITUNES):
                         this_book.device_collections = []
                         this_book.library_id = library_books[book]
                         this_book.size = library_books[book].Size
+                        this_book.uuid = library_books[book].Composer
                         # Hack to discover if we're running in GUI environment
                         if self.report_progress is not None:
                             this_book.thumbnail = self._generate_thumbnail(this_book.path, library_books[book])

From 8052d91e337345d9b0f59fa917ce2915c0bf931c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 15 Sep 2010 10:01:49 -0600
Subject: [PATCH 36/43] Use asterisk for ratings on most output profiles

---
 src/calibre/customize/profiles.py             |  5 ++++
 src/calibre/ebooks/oeb/transforms/jacket.py   |  6 ++--
 .../gui2/convert/structure_detection.ui       | 28 +++++++++----------
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py
index 1563f764ca..a8d7eb2e0d 100644
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@@ -248,6 +248,9 @@ class OutputProfile(Plugin):
     #: If True, the date is appended to the title of downloaded news
     periodical_date_in_title = True
 
+    #: The character used to represent a star in ratings
+    ratings_char = u'*'
+
     @classmethod
     def tags_to_string(cls, tags):
         return escape(', '.join(tags))
@@ -273,6 +276,7 @@ class iPadOutput(OutputProfile):
             'macros': {'border-width': '{length}|medium|thick|thin'}
         }
     ]
+    ratings_char = u'\u2605'
     touchscreen = True
     # touchscreen_news_css {{{
     touchscreen_news_css = u'''
@@ -553,6 +557,7 @@ class KindleOutput(OutputProfile):
     fsizes                    = [12, 12, 14, 16, 18, 20, 22, 24]
     supports_mobi_indexing = True
     periodical_date_in_title = False
+    ratings_char = u'\u2605'
 
     @classmethod
     def tags_to_string(cls, tags):
diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py
index a44a18db95..88c7a4ff0e 100644
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@@ -93,7 +93,7 @@ class Jacket(object):
 
 # Render Jacket {{{
 
-def get_rating(rating):
+def get_rating(rating, rchar):
     ans = ''
     try:
         num = float(rating)/2
@@ -104,7 +104,7 @@ def get_rating(rating):
     if num < 1:
         return ans
 
-    ans = u'\u2605' * int(num)
+    ans = rchar * int(num)
     return ans
 
 
@@ -129,7 +129,7 @@ def render_jacket(mi, output_profile,
     except:
         pubdate = ''
 
-    rating = get_rating(mi.rating)
+    rating = get_rating(mi.rating, output_profile.ratings_char)
 
     tags = mi.tags if mi.tags else alt_tags
     if tags:
diff --git a/src/calibre/gui2/convert/structure_detection.ui b/src/calibre/gui2/convert/structure_detection.ui
index eb2892a07a..c0b3de3bd9 100644
--- a/src/calibre/gui2/convert/structure_detection.ui
+++ b/src/calibre/gui2/convert/structure_detection.ui
@@ -41,24 +41,17 @@
      </property>
     </widget>
    </item>
-   <item row="3" column="0">
+   <item row="4" column="0">
     <widget class="QCheckBox" name="opt_insert_metadata">
      <property name="text">
       <string>Insert &amp;metadata as page at start of book</string>
      </property>
     </widget>
    </item>
-   <item row="8" column="0" colspan="2">
-    <widget class="QCheckBox" name="opt_preprocess_html">
-     <property name="text">
-      <string>&amp;Preprocess input file to possibly improve structure detection</string>
-     </property>
-    </widget>
-   </item>
-   <item row="9" column="0" colspan="2">
+   <item row="10" column="0" colspan="2">
     <widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
    </item>
-   <item row="10" column="0" colspan="2">
+   <item row="11" column="0" colspan="2">
     <spacer name="verticalSpacer">
      <property name="orientation">
       <enum>Qt::Vertical</enum>
@@ -71,26 +64,33 @@
      </property>
     </spacer>
    </item>
-   <item row="6" column="0">
+   <item row="7" column="0">
     <widget class="QCheckBox" name="opt_remove_footer">
      <property name="text">
       <string>Remove F&amp;ooter</string>
      </property>
     </widget>
    </item>
-   <item row="4" column="0">
+   <item row="5" column="0">
     <widget class="QCheckBox" name="opt_remove_header">
      <property name="text">
       <string>Remove H&amp;eader</string>
      </property>
     </widget>
    </item>
-   <item row="5" column="0" colspan="2">
+   <item row="6" column="0" colspan="2">
     <widget class="RegexEdit" name="opt_header_regex" native="true"/>
    </item>
-   <item row="7" column="0" colspan="2">
+   <item row="8" column="0" colspan="2">
     <widget class="RegexEdit" name="opt_footer_regex" native="true"/>
    </item>
+   <item row="3" column="0">
+    <widget class="QCheckBox" name="opt_preprocess_html">
+     <property name="text">
+      <string>&amp;Preprocess input file to possibly improve structure detection</string>
+     </property>
+    </widget>
+   </item>
   </layout>
  </widget>
  <customwidgets>

From 30fafed01710d563a6aaf67b12be0f7db189f4f2 Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Wed, 15 Sep 2010 10:12:04 -0600
Subject: [PATCH 37/43] GwR fix #6822

---
 src/calibre/library/catalog.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index ef7569bd88..e14d092727 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -2590,7 +2590,7 @@ class EPUB_MOBI(CatalogPlugin):
                     aTag = Tag(soup, 'a')
                     aTag['name'] = "%s_series" % re.sub('\W','',book['series']).lower()
                     pSeriesTag.insert(0,aTag)
-                    pSeriesTag.insert(1,NavigableString(self.NOT_READ_SYMBOL + '%s' % book['series']))
+                    pSeriesTag.insert(1,NavigableString('%s' % book['series']))
                     divTag.insert(dtc,pSeriesTag)
                     dtc += 1
 
@@ -2599,7 +2599,14 @@ class EPUB_MOBI(CatalogPlugin):
                 ptc = 0
 
                 #  book with read/reading/unread symbol
-                if 'read' in book and book['read']:
+                for tag in book['tags']:
+                    if tag == self.opts.read_tag:
+                        book['read'] = True
+                        break
+                else:
+                    book['read'] = False
+
+                if book['read']:
                     # check mark
                     pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL))
                     pBookTag['class'] = "read_book"

From a20015e1e7b656a47fd87d474c08bdaef61b0bae Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 15 Sep 2010 11:09:40 -0600
Subject: [PATCH 38/43] Workaround for bug that affects some windows install
 causing white backgrounds on default covers to be rendered as yellow

---
 src/calibre/utils/magick/__init__.py |  2 +-
 src/calibre/utils/magick/draw.py     | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/calibre/utils/magick/__init__.py b/src/calibre/utils/magick/__init__.py
index 073a030361..2707430c67 100644
--- a/src/calibre/utils/magick/__init__.py
+++ b/src/calibre/utils/magick/__init__.py
@@ -194,7 +194,7 @@ class Image(_magick.Image): # {{{
 
 # }}}
 
-def create_canvas(width, height, bgcolor='white'):
+def create_canvas(width, height, bgcolor='#ffffff'):
     canvas = Image()
     canvas.create_canvas(int(width), int(height), str(bgcolor))
     return canvas
diff --git a/src/calibre/utils/magick/draw.py b/src/calibre/utils/magick/draw.py
index 301bf9912a..ed9e3d3d83 100644
--- a/src/calibre/utils/magick/draw.py
+++ b/src/calibre/utils/magick/draw.py
@@ -11,7 +11,7 @@ from calibre.utils.magick import Image, DrawingWand, create_canvas
 from calibre.constants import __appname__, __version__
 from calibre import fit_image
 
-def save_cover_data_to(data, path, bgcolor='white', resize_to=None,
+def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None,
         return_data=False):
     '''
     Saves image in data to path, in the format specified by the path
@@ -28,7 +28,7 @@ def save_cover_data_to(data, path, bgcolor='white', resize_to=None,
         return canvas.export(os.path.splitext(path)[1][1:])
     canvas.save(path)
 
-def thumbnail(data, width=120, height=120, bgcolor='white', fmt='jpg'):
+def thumbnail(data, width=120, height=120, bgcolor='#ffffff', fmt='jpg'):
     img = Image()
     img.load(data)
     owidth, oheight = img.size
@@ -61,7 +61,7 @@ def identify(path):
     return identify_data(data)
 
 def add_borders_to_image(path_to_image, left=0, top=0, right=0, bottom=0,
-        border_color='white'):
+        border_color='#ffffff'):
     img = Image()
     img.open(path_to_image)
     lwidth, lheight = img.size
@@ -80,7 +80,7 @@ def create_text_wand(font_size, font_path=None):
     ans.text_alias = True
     return ans
 
-def create_text_arc(text, font_size, font=None, bgcolor='white'):
+def create_text_arc(text, font_size, font=None, bgcolor='#ffffff'):
     if isinstance(text, unicode):
         text = text.encode('utf-8')
 
@@ -148,7 +148,7 @@ class TextLine(object):
 
 
 def create_cover_page(top_lines, logo_path, width=590, height=750,
-        bgcolor='white', output_format='jpg'):
+        bgcolor='#ffffff', output_format='jpg'):
     '''
     Create the standard calibre cover page and return it as a byte string in
     the specified output_format.

From 57ca76e68efb7c3f615d948231ac741e60251dd1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 15 Sep 2010 11:12:53 -0600
Subject: [PATCH 39/43] ...

---
 src/calibre/web/feeds/news.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 9ba9583c73..a140dfbf05 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -290,10 +290,12 @@ class BasicNewsRecipe(Recipe):
     #: the cover for the periodical.  Overriding this in your recipe instructs
     #: calibre to render the downloaded cover into a frame whose width and height
     #: are expressed as a percentage of the downloaded cover.
-    #: cover_margins = (10,15,'white') pads the cover with a white margin
+    #: cover_margins = (10, 15, '#ffffff') pads the cover with a white margin
     #: 10px on the left and right, 15px on the top and bottom.
-    #: Colors name defined at http://www.imagemagick.org/script/color.php
-    cover_margins = (0,0,'white')
+    #: Color names defined at http://www.imagemagick.org/script/color.php
+    #: Note that for some reason, white does not always work on windows. Use
+    #: #ffffff instead
+    cover_margins = (0, 0, '#ffffff')
 
     #: Set to a non empty string to disable this recipe
     #: The string will be used as the disabled message

From c006e2e14bebef07898a934bdb6225ea14b6280f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 15 Sep 2010 11:27:39 -0600
Subject: [PATCH 40/43] Database: Update has_cover cache when setting/removing
 covers so that the search returns correct results. Also fix an exception that
 could occur when adding books with a db that has been upgraded from very old
 SQL.

---
 src/calibre/library/database2.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 2df6b3bdc4..f5f0f724ba 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -598,7 +598,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
 
     def has_cover(self, index, index_is_id=False):
         id = index if index_is_id else self.id(index)
-        path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
+        try:
+            path = os.path.join(self.abspath(id, index_is_id=True), 'cover.jpg')
+        except:
+            # Can happen if path has not yet been set
+            return False
         return os.access(path, os.R_OK)
 
     def remove_cover(self, id, notify=True):
@@ -609,6 +613,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
             except (IOError, OSError):
                 time.sleep(0.2)
                 os.remove(path)
+        self.data.set(id, self.FIELD_MAP['cover'], False, row_is_id=True)
         if notify:
             self.notify('cover', [id])
 
@@ -629,6 +634,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
             except (IOError, OSError):
                 time.sleep(0.2)
                 save_cover_data_to(data, path)
+        self.data.set(id, self.FIELD_MAP['cover'], True, row_is_id=True)
         if notify:
             self.notify('cover', [id])
 
@@ -1087,8 +1093,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         self.set_path(id, True)
         self.notify('metadata', [id])
 
-    # Given a book, return the list of author sort strings for the book's authors
     def authors_sort_strings(self, id, index_is_id=False):
+        '''
+        Given a book, return the list of author sort strings
+        for the book's authors
+        '''
         id = id if index_is_id else self.id(id)
         aut_strings = self.conn.get('''
                         SELECT sort
@@ -1744,10 +1753,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         series_index = 1.0 if mi.series_index is None else mi.series_index
         aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors)
         title = mi.title
-        if isinstance(aus, str):
+        if isbytestring(aus):
             aus = aus.decode(preferred_encoding, 'replace')
-        if isinstance(title, str):
-            title = title.decode(preferred_encoding)
+        if isbytestring(title):
+            title = title.decode(preferred_encoding, 'replace')
         obj = self.conn.execute('INSERT INTO books(title, series_index, author_sort) VALUES (?, ?, ?)',
                             (title, series_index, aus))
         id = obj.lastrowid

From 6bbbb0a1f57635d8d5aae6398f27914e20d333e6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 15 Sep 2010 12:19:26 -0600
Subject: [PATCH 41/43] Fix #6819 (Not recognising New sony PRS 650)

---
 src/calibre/devices/prs505/driver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py
index 4c14565c2d..094c12cf0c 100644
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@@ -35,7 +35,7 @@ class PRS505(USBMS):
 
     VENDOR_NAME        = 'SONY'
     WINDOWS_MAIN_MEM   = re.compile(
-            r'(PRS-(505|300|500))|'
+            r'(PRS-(505|500))|'
             r'(PRS-((700[#/])|((6|9|3)(0|5)0&)))'
             )
     WINDOWS_CARD_A_MEM = re.compile(

From 062d369b43b435991d0b140bdba6217c0b5b0ccf Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 15 Sep 2010 13:22:05 -0600
Subject: [PATCH 42/43] ...

---
 src/calibre/gui2/device.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index 45c78ce6da..f839e1d519 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -627,12 +627,11 @@ class DeviceMixin(object): # {{{
     def connect_to_folder(self):
         dir = choose_dir(self, 'Select Device Folder',
                              _('Select folder to open as device'))
-        kls = FOLDER_DEVICE
-        self.device_manager.mount_device(kls=kls, kind='folder', path=dir)
+        if dir is not None:
+            self.device_manager.mount_device(kls=FOLDER_DEVICE, kind='folder', path=dir)
 
     def connect_to_itunes(self):
-        kls = ITUNES_ASYNC
-        self.device_manager.mount_device(kls=kls, kind='itunes', path=None)
+        self.device_manager.mount_device(kls=ITUNES_ASYNC, kind='itunes', path=None)
 
     # disconnect from both folder and itunes devices
     def disconnect_mounted_device(self):

From ff319ccc4daae0b1d01ccab78733412fb4edb53f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 15 Sep 2010 14:05:47 -0600
Subject: [PATCH 43/43] Add an option to split the toolbar into two toolbars

---
 src/calibre/gui2/__init__.py                  |  1 +
 src/calibre/gui2/actions/__init__.py          |  6 +++
 src/calibre/gui2/actions/add.py               |  1 +
 src/calibre/gui2/actions/add_to_library.py    |  1 +
 src/calibre/gui2/actions/annotate.py          |  1 +
 src/calibre/gui2/actions/convert.py           |  1 +
 src/calibre/gui2/actions/copy_to_library.py   |  1 +
 src/calibre/gui2/actions/delete.py            |  1 +
 src/calibre/gui2/actions/edit_collections.py  |  1 +
 src/calibre/gui2/actions/edit_metadata.py     |  1 +
 src/calibre/gui2/actions/open.py              |  1 +
 src/calibre/gui2/actions/save_to_disk.py      |  1 +
 src/calibre/gui2/actions/show_book_details.py |  1 +
 src/calibre/gui2/actions/similar_books.py     |  1 +
 src/calibre/gui2/actions/view.py              |  1 +
 src/calibre/gui2/layout.py                    | 43 ++++++++++++++++---
 src/calibre/gui2/preferences/look_feel.py     |  1 +
 src/calibre/gui2/preferences/look_feel.ui     |  7 +++
 18 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index 1b61404589..e58dce5559 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -50,6 +50,7 @@ gprefs.defaults['action-layout-context-menu-device'] = (
 gprefs.defaults['show_splash_screen'] = True
 gprefs.defaults['toolbar_icon_size'] = 'medium'
 gprefs.defaults['toolbar_text'] = 'auto'
+gprefs.defaults['show_child_bar'] = False
 
 # }}}
 
diff --git a/src/calibre/gui2/actions/__init__.py b/src/calibre/gui2/actions/__init__.py
index 57ad900fba..b2d1656367 100644
--- a/src/calibre/gui2/actions/__init__.py
+++ b/src/calibre/gui2/actions/__init__.py
@@ -71,6 +71,12 @@ class InterfaceAction(QObject):
     all_locations = frozenset(['toolbar', 'toolbar-device', 'context-menu',
         'context-menu-device'])
 
+    #: Type of action
+    #: 'current' means acts on the current view
+    #: 'global' means an action that does not act on the current view, but rather
+    #: on calibre as a whole
+    action_type = 'global'
+
     def __init__(self, parent, site_customization):
         QObject.__init__(self, parent)
         self.setObjectName(self.name)
diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py
index f0ff794fab..add7bf1d5b 100644
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@@ -25,6 +25,7 @@ class AddAction(InterfaceAction):
     action_spec = (_('Add books'), 'add_book.png',
             _('Add books to the calibre library/device from files on your computer')
             , _('A'))
+    action_type = 'current'
 
     def genesis(self):
         self._add_filesystem_book = self.Dispatcher(self.__add_filesystem_book)
diff --git a/src/calibre/gui2/actions/add_to_library.py b/src/calibre/gui2/actions/add_to_library.py
index 6fc0d5fb1f..05aea8f1dd 100644
--- a/src/calibre/gui2/actions/add_to_library.py
+++ b/src/calibre/gui2/actions/add_to_library.py
@@ -13,6 +13,7 @@ class AddToLibraryAction(InterfaceAction):
     action_spec = (_('Add books to library'), 'add_book.png',
             _('Add books to your calibre library from the connected device'), None)
     dont_add_to = frozenset(['toolbar', 'context-menu'])
+    action_type = 'current'
 
     def genesis(self):
         self.qaction.triggered.connect(self.add_books_to_library)
diff --git a/src/calibre/gui2/actions/annotate.py b/src/calibre/gui2/actions/annotate.py
index 5356d63e98..dfafcd1a39 100644
--- a/src/calibre/gui2/actions/annotate.py
+++ b/src/calibre/gui2/actions/annotate.py
@@ -18,6 +18,7 @@ class FetchAnnotationsAction(InterfaceAction):
 
     name = 'Fetch Annotations'
     action_spec = (_('Fetch annotations (experimental)'), None, None, None)
+    action_type = 'current'
 
     def genesis(self):
         pass
diff --git a/src/calibre/gui2/actions/convert.py b/src/calibre/gui2/actions/convert.py
index ee0f06ab71..29acfc52b1 100644
--- a/src/calibre/gui2/actions/convert.py
+++ b/src/calibre/gui2/actions/convert.py
@@ -21,6 +21,7 @@ class ConvertAction(InterfaceAction):
     name = 'Convert Books'
     action_spec = (_('Convert books'), 'convert.png', None, _('C'))
     dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
+    action_type = 'current'
 
     def genesis(self):
         cm = QMenu()
diff --git a/src/calibre/gui2/actions/copy_to_library.py b/src/calibre/gui2/actions/copy_to_library.py
index 7127c91e8c..6b7654f644 100644
--- a/src/calibre/gui2/actions/copy_to_library.py
+++ b/src/calibre/gui2/actions/copy_to_library.py
@@ -80,6 +80,7 @@ class CopyToLibraryAction(InterfaceAction):
             _('Copy selected books to the specified library'), None)
     popup_type = QToolButton.InstantPopup
     dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
+    action_type = 'current'
 
     def genesis(self):
         self.menu = QMenu(self.gui)
diff --git a/src/calibre/gui2/actions/delete.py b/src/calibre/gui2/actions/delete.py
index 0343c6df84..406860e4ec 100644
--- a/src/calibre/gui2/actions/delete.py
+++ b/src/calibre/gui2/actions/delete.py
@@ -16,6 +16,7 @@ class DeleteAction(InterfaceAction):
 
     name = 'Remove Books'
     action_spec = (_('Remove books'), 'trash.png', None, _('Del'))
+    action_type = 'current'
 
     def genesis(self):
         self.qaction.triggered.connect(self.delete_books)
diff --git a/src/calibre/gui2/actions/edit_collections.py b/src/calibre/gui2/actions/edit_collections.py
index e45d36fc62..7f5dd76538 100644
--- a/src/calibre/gui2/actions/edit_collections.py
+++ b/src/calibre/gui2/actions/edit_collections.py
@@ -13,6 +13,7 @@ class EditCollectionsAction(InterfaceAction):
     action_spec = (_('Manage collections'), None,
             _('Manage the collections on this device'), None)
     dont_add_to = frozenset(['toolbar', 'context-menu'])
+    action_type = 'current'
 
     def genesis(self):
         self.qaction.triggered.connect(self.edit_collections)
diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py
index 878ba77a43..ac04652efa 100644
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@@ -22,6 +22,7 @@ class EditMetadataAction(InterfaceAction):
 
     name = 'Edit Metadata'
     action_spec = (_('Edit metadata'), 'edit_input.png', None, _('E'))
+    action_type = 'current'
 
     def genesis(self):
         self.create_action(spec=(_('Merge book records'), 'merge_books.png',
diff --git a/src/calibre/gui2/actions/open.py b/src/calibre/gui2/actions/open.py
index 106bfa24f6..141ff01a66 100644
--- a/src/calibre/gui2/actions/open.py
+++ b/src/calibre/gui2/actions/open.py
@@ -14,6 +14,7 @@ class OpenFolderAction(InterfaceAction):
     action_spec = (_('Open containing folder'), 'document_open.png', None,
             _('O'))
     dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
+    action_type = 'current'
 
     def genesis(self):
         self.qaction.triggered.connect(self.gui.iactions['View'].view_folder)
diff --git a/src/calibre/gui2/actions/save_to_disk.py b/src/calibre/gui2/actions/save_to_disk.py
index bfcc02e130..e9664b9980 100644
--- a/src/calibre/gui2/actions/save_to_disk.py
+++ b/src/calibre/gui2/actions/save_to_disk.py
@@ -38,6 +38,7 @@ class SaveToDiskAction(InterfaceAction):
 
     name = "Save To Disk"
     action_spec = (_('Save to disk'), 'save.png', None, _('S'))
+    action_type = 'current'
 
     def genesis(self):
         self.qaction.triggered.connect(self.save_to_disk)
diff --git a/src/calibre/gui2/actions/show_book_details.py b/src/calibre/gui2/actions/show_book_details.py
index d17d0998f1..18b0a694bf 100644
--- a/src/calibre/gui2/actions/show_book_details.py
+++ b/src/calibre/gui2/actions/show_book_details.py
@@ -16,6 +16,7 @@ class ShowBookDetailsAction(InterfaceAction):
     action_spec = (_('Show book details'), 'dialog_information.png', None,
             _('I'))
     dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
+    action_type = 'current'
 
     def genesis(self):
         self.qaction.triggered.connect(self.show_book_info)
diff --git a/src/calibre/gui2/actions/similar_books.py b/src/calibre/gui2/actions/similar_books.py
index 1a14869a9c..644cd3160a 100644
--- a/src/calibre/gui2/actions/similar_books.py
+++ b/src/calibre/gui2/actions/similar_books.py
@@ -16,6 +16,7 @@ class SimilarBooksAction(InterfaceAction):
     name = 'Similar Books'
     action_spec = (_('Similar books...'), None, None, None)
     popup_type = QToolButton.InstantPopup
+    action_type = 'current'
 
     def genesis(self):
         m = QMenu(self.gui)
diff --git a/src/calibre/gui2/actions/view.py b/src/calibre/gui2/actions/view.py
index 2f6be24e5b..0fbf86c567 100644
--- a/src/calibre/gui2/actions/view.py
+++ b/src/calibre/gui2/actions/view.py
@@ -22,6 +22,7 @@ class ViewAction(InterfaceAction):
 
     name = 'View'
     action_spec = (_('View'), 'view.png', None, _('V'))
+    action_type = 'current'
 
     def genesis(self):
         self.persistent_files = []
diff --git a/src/calibre/gui2/layout.py b/src/calibre/gui2/layout.py
index 58d5267c8e..ec7e023dc1 100644
--- a/src/calibre/gui2/layout.py
+++ b/src/calibre/gui2/layout.py
@@ -61,7 +61,7 @@ class LocationManager(QObject): # {{{
 
         ac('library', _('Library'), 'lt.png',
                 _('Show books in calibre library'))
-        ac('main', _('Reader'), 'reader.png',
+        ac('main', _('Device'), 'reader.png',
                 _('Show books in the main memory of the device'))
         ac('carda', _('Card A'), 'sd.png',
                 _('Show books in storage card A'))
@@ -197,11 +197,21 @@ class SearchBar(QWidget): # {{{
 
 # }}}
 
+class Spacer(QWidget):
+
+    def __init__(self, parent):
+        QWidget.__init__(self, parent)
+        self.l = QHBoxLayout()
+        self.setLayout(self.l)
+        self.l.addStretch(10)
+
+
 class ToolBar(QToolBar): # {{{
 
-    def __init__(self, donate, location_manager, parent):
+    def __init__(self, donate, location_manager, child_bar, parent):
         QToolBar.__init__(self, parent)
         self.gui = parent
+        self.child_bar = child_bar
         self.setContextMenuPolicy(Qt.PreventContextMenu)
         self.setMovable(False)
         self.setFloatable(False)
@@ -223,16 +233,19 @@ class ToolBar(QToolBar): # {{{
         sz = gprefs['toolbar_icon_size']
         sz = {'small':24, 'medium':48, 'large':64}[sz]
         self.setIconSize(QSize(sz, sz))
+        self.child_bar.setIconSize(QSize(sz, sz))
         style = Qt.ToolButtonTextUnderIcon
         if gprefs['toolbar_text'] == 'never':
             style = Qt.ToolButtonIconOnly
         self.setToolButtonStyle(style)
+        self.child_bar.setToolButtonStyle(style)
         self.donate_button.set_normal_icon_size(sz, sz)
 
     def contextMenuEvent(self, *args):
         pass
 
     def build_bar(self):
+        self.child_bar.setVisible(gprefs['show_child_bar'])
         self.showing_donate = False
         showing_device = self.location_manager.has_device
         actions = '-device' if showing_device else ''
@@ -244,10 +257,16 @@ class ToolBar(QToolBar): # {{{
                 m.setVisible(False)
 
         self.clear()
+        self.child_bar.clear()
         self.added_actions = []
+        self.spacers = [Spacer(self.child_bar), Spacer(self.child_bar),
+                Spacer(self), Spacer(self)]
+        self.child_bar.addWidget(self.spacers[0])
+        if gprefs['show_child_bar']:
+            self.addWidget(self.spacers[2])
 
         for what in actions:
-            if what is None:
+            if what is None and not gprefs['show_child_bar']:
                 self.addSeparator()
             elif what == 'Location Manager':
                 for ac in self.location_manager.available_actions:
@@ -262,12 +281,21 @@ class ToolBar(QToolBar): # {{{
                 self.showing_donate = True
             elif what in self.gui.iactions:
                 action = self.gui.iactions[what]
-                self.addAction(action.qaction)
+                bar = self
+                if action.action_type == 'current' and gprefs['show_child_bar']:
+                    bar = self.child_bar
+                bar.addAction(action.qaction)
                 self.added_actions.append(action.qaction)
                 self.setup_tool_button(action.qaction, action.popup_type)
 
+        self.child_bar.addWidget(self.spacers[1])
+        if gprefs['show_child_bar']:
+            self.addWidget(self.spacers[3])
+
     def setup_tool_button(self, ac, menu_mode=None):
         ch = self.widgetForAction(ac)
+        if ch is None:
+            ch = self.child_bar.widgetForAction(ac)
         ch.setCursor(Qt.PointingHandCursor)
         ch.setAutoRaise(True)
         if ac.menu() is not None and menu_mode is not None:
@@ -280,7 +308,8 @@ class ToolBar(QToolBar): # {{{
         if p == 'never':
             style = Qt.ToolButtonIconOnly
 
-        if p == 'auto' and self.preferred_width > self.width()+35:
+        if p == 'auto' and self.preferred_width > self.width()+35 and \
+                not gprefs['show_child_bar']:
             style = Qt.ToolButtonIconOnly
 
         self.setToolButtonStyle(style)
@@ -309,9 +338,11 @@ class MainWindowMixin(object): # {{{
         self.iactions['Fetch News'].init_scheduler(db)
 
         self.search_bar = SearchBar(self)
+        self.child_bar = QToolBar(self)
         self.tool_bar = ToolBar(self.donate_button,
-                self.location_manager, self)
+                self.location_manager, self.child_bar, self)
         self.addToolBar(Qt.TopToolBarArea, self.tool_bar)
+        self.addToolBar(Qt.BottomToolBarArea, self.child_bar)
 
         l = self.centralwidget.layout()
         l.addWidget(self.search_bar)
diff --git a/src/calibre/gui2/preferences/look_feel.py b/src/calibre/gui2/preferences/look_feel.py
index f30b2fddbb..10c2fcfe95 100644
--- a/src/calibre/gui2/preferences/look_feel.py
+++ b/src/calibre/gui2/preferences/look_feel.py
@@ -46,6 +46,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
         r('use_roman_numerals_for_series_number', config)
         r('separate_cover_flow', config, restart_required=True)
         r('search_as_you_type', config)
+        r('show_child_bar', gprefs)
 
         choices = [(_('Small'), 'small'), (_('Medium'), 'medium'),
             (_('Large'), 'large')]
diff --git a/src/calibre/gui2/preferences/look_feel.ui b/src/calibre/gui2/preferences/look_feel.ui
index 7c6c736b24..1de55d51ef 100644
--- a/src/calibre/gui2/preferences/look_feel.ui
+++ b/src/calibre/gui2/preferences/look_feel.ui
@@ -173,6 +173,13 @@
         </property>
        </widget>
       </item>
+      <item row="2" column="0" colspan="2">
+       <widget class="QCheckBox" name="opt_show_child_bar">
+        <property name="text">
+         <string>&amp;Split the toolbar into two toolbars</string>
+        </property>
+       </widget>
+      </item>
      </layout>
     </widget>
    </item>