From 6b421643705fb3e575bdda1225171485ba01965a Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Sun, 30 Jan 2011 18:11:15 +0800
Subject: [PATCH 01/55] adjusted margins for scene break heuristics

---
 src/calibre/ebooks/conversion/utils.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 5beefb5bd9..a115e584b6 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -26,7 +26,7 @@ class HeuristicProcessor(object):
         self.blanks_deleted = False
         self.blanks_between_paragraphs = False
         self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
-        self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|spacer)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
+        self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|whitespace)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
         self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
         self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}(?!\s*<h\d)', re.IGNORECASE)
 
@@ -423,21 +423,21 @@ class HeuristicProcessor(object):
         blanks_before_headings = re.compile(r'(\s*<p[^>]*>\s*</p>){1,}(?=\s*<h\d)', re.IGNORECASE)
         blanks_after_headings = re.compile(r'(?<=</h\d>)(\s*<p[^>]*>\s*</p>){1,}', re.IGNORECASE)
         
-        def markup_spacers(match):
+        def markup_whitespaces(match):
            blanks = match.group(0)
-           blanks = self.blankreg.sub('\n<p class="spacer"> </p>', blanks)
+           blanks = self.blankreg.sub('\n<p class="whitespace"> </p>', blanks)
            return blanks
-        html = blanks_before_headings.sub(markup_spacers, html)
-        html = blanks_after_headings.sub(markup_spacers, html)
+        html = blanks_before_headings.sub(markup_whitespaces, html)
+        html = blanks_after_headings.sub(markup_whitespaces, html)
         if self.html_preprocess_sections > self.min_chapters:
-            html = re.sub('(?si)^.*?(?=<h\d)', markup_spacers, html)
+            html = re.sub('(?si)^.*?(?=<h\d)', markup_whitespaces, html)
         return html
 
     def detect_soft_breaks(self, html):
         if not self.blanks_deleted and self.blanks_between_paragraphs:
-            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
+            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:.5em; margin-bottom:.5em; page-break-before:avoid"> </p>', html)
         else:
-            html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
+            html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:.5em; margin-bottom:.5em; page-break-before:avoid"> </p>', html)
         return html
 
 
@@ -489,6 +489,7 @@ class HeuristicProcessor(object):
 
         if getattr(self.extra_opts, 'markup_chapter_headings', False):
             html = self.markup_chapters(html, self.totalwords, self.blanks_between_paragraphs)
+        self.dump(html, 'after_chapter_markup')
 
         if getattr(self.extra_opts, 'italicize_common_cases', False):
             html = self.markup_italicis(html)
@@ -498,7 +499,7 @@ class HeuristicProcessor(object):
         if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
             self.log.debug("deleting blank lines")
             self.blanks_deleted = True
-            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
+            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:.5em; margin-bottom:.5em; page-break-before:avoid"> </p>', html)
             html = self.blankreg.sub('', html)
 
         # Determine line ending type
@@ -553,7 +554,7 @@ class HeuristicProcessor(object):
             html = self.detect_blank_formatting(html)
             html = self.detect_soft_breaks(html)
             # Center separator lines
-            html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
+            html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:.5em; margin-bottom:.5em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
             #html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
 
         if self.deleted_nbsps:

From e8153d5e6900df625125900c6bab539533acc502 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Mon, 31 Jan 2011 01:36:08 +0800
Subject: [PATCH 02/55] merge multiple blank paragraphs

---
 src/calibre/ebooks/conversion/utils.py | 44 ++++++++++++++++++++------
 src/calibre/ebooks/txt/txtml.py        |  2 ++
 2 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index a115e584b6..b37cd4b869 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -29,6 +29,7 @@ class HeuristicProcessor(object):
         self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|whitespace)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
         self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
         self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}(?!\s*<h\d)', re.IGNORECASE)
+        self.any_multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE)
 
     def is_pdftohtml(self, src):
         return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
@@ -418,14 +419,32 @@ class HeuristicProcessor(object):
             if getattr(self.extra_opts, option, False):
                 return True
         return False
+        
+    def merge_blanks(self, html, blanks_count=None):
+        single_blank = re.compile(r'(\s*<p[^>]*>\s*</p>)', re.IGNORECASE)
+        base_em = .5 # Baseline is 1.5em per blank line, 1st line is .5 em css and 1em for the nbsp
+        em_per_line = 1.5 # Add another 1.5 em for each additional blank
+        
+        def merge_matches(match):
+            to_merge = match.group(0)
+            lines = float(len(single_blank.findall(to_merge))) - 1.
+            em = base_em + (em_per_line * lines)
+            if to_merge.find('whitespace'):
+                newline = self.any_multi_blank.sub('\n<p class="whitespace'+str(int(em * 10))+'" style="text-align:center; margin-top:'+str(em)+'em"> </p>', match.group(0))
+            else:
+                newline = self.any_multi_blank.sub('\n<p class="softbreak'+str(int(em * 10))+'" style="text-align:center; margin-top:'+str(em)+'em"> </p>', match.group(0))
+            return newline
+            
+        html = self.any_multi_blank.sub(merge_matches, html)
+        return html
 
-    def detect_blank_formatting(self, html):
+    def detect_whitespace(self, html):
         blanks_before_headings = re.compile(r'(\s*<p[^>]*>\s*</p>){1,}(?=\s*<h\d)', re.IGNORECASE)
         blanks_after_headings = re.compile(r'(?<=</h\d>)(\s*<p[^>]*>\s*</p>){1,}', re.IGNORECASE)
         
         def markup_whitespaces(match):
            blanks = match.group(0)
-           blanks = self.blankreg.sub('\n<p class="whitespace"> </p>', blanks)
+           blanks = self.blankreg.sub('\n<p class="whitespace" style="text-align:center; margin-top:.5em"> </p>', blanks)
            return blanks
         html = blanks_before_headings.sub(markup_whitespaces, html)
         html = blanks_after_headings.sub(markup_whitespaces, html)
@@ -435,9 +454,9 @@ class HeuristicProcessor(object):
 
     def detect_soft_breaks(self, html):
         if not self.blanks_deleted and self.blanks_between_paragraphs:
-            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:.5em; margin-bottom:.5em; page-break-before:avoid"> </p>', html)
+            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1em; page-break-before:avoid; text-align:center"> </p>', html)
         else:
-            html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:.5em; margin-bottom:.5em; page-break-before:avoid"> </p>', html)
+            html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>', html)
         return html
 
 
@@ -499,7 +518,7 @@ class HeuristicProcessor(object):
         if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
             self.log.debug("deleting blank lines")
             self.blanks_deleted = True
-            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:.5em; margin-bottom:.5em; page-break-before:avoid"> </p>', html)
+            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>', html)
             html = self.blankreg.sub('', html)
 
         # Determine line ending type
@@ -550,14 +569,21 @@ class HeuristicProcessor(object):
             doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
             html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
 
+        # If scene break formatting is enabled, find all blank paragraphs that definitely aren't scenebreaks,
+        # style it with the 'whitespace' class.  All remaining blank lines are styled as softbreaks.
+        # Multiple sequential blank paragraphs are merged with appropriate margins
+        # If non-blank scene breaks exist they are center aligned and styled with appropriate margins.
         if getattr(self.extra_opts, 'format_scene_breaks', False):
-            html = self.detect_blank_formatting(html)
+            html = self.detect_whitespace(html)
             html = self.detect_soft_breaks(html)
-            # Center separator lines
-            html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:.5em; margin-bottom:.5em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
+            blanks_count = len(self.any_multi_blank.findall(html))
+            if blanks_count >= 1:
+                html = self.merge_blanks(html, blanks_count)
+            # Center separator lines, use a bit larger margin in this case
+            html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:.65em; margin-bottom:.65em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
             #html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
 
         if self.deleted_nbsps:
-            # put back non-breaking spaces in empty paragraphs to preserve original formatting
+            # put back non-breaking spaces in empty paragraphs so they render correctly
             html = self.anyblank.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
         return html
diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index 00992a8612..bf33e5540a 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -222,6 +222,8 @@ class TXTMLizer(object):
         # Scene breaks.
         if tag == 'hr':
             text.append('\n\n* * *\n\n')
+        elif style['margin-top']:
+            text.append('\n\n' + '\n' * round(style['margin-top']))
 
         # Process tags that contain text.
         if hasattr(elem, 'text') and elem.text:

From 31c277880e6fce5b2d99e8fdfdede943804b6917 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Mon, 31 Jan 2011 09:39:28 +0800
Subject: [PATCH 03/55] scene break detection to detect any repeating non-word
 character

---
 src/calibre/ebooks/conversion/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index b37cd4b869..d0dc81405b 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -580,10 +580,10 @@ class HeuristicProcessor(object):
             if blanks_count >= 1:
                 html = self.merge_blanks(html, blanks_count)
             # Center separator lines, use a bit larger margin in this case
-            html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:.65em; margin-bottom:.65em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
+            html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?![\w\'\"])(?P<break>((?P<breakchar>(?!\s)\W)\s*(?P=breakchar)?)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:.65em; margin-bottom:.65em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
             #html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
 
         if self.deleted_nbsps:
             # put back non-breaking spaces in empty paragraphs so they render correctly
             html = self.anyblank.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
-        return html
+        return html
\ No newline at end of file

From a96c73480d6a014e0b446c5003d773c8c48bb022 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Mon, 31 Jan 2011 16:19:47 +0800
Subject: [PATCH 04/55] fixed overmatching/substitution issue in italicize
 function

---
 src/calibre/ebooks/conversion/utils.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index d0dc81405b..74afbe7a42 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -159,7 +159,7 @@ class HeuristicProcessor(object):
         ]
 
         for word in ITALICIZE_WORDS:
-            html = re.sub(r'(?<=\s|>)' + word + r'(?=\s|<)', '<i>%s</i>' % word, html)
+            html = re.sub(r'(?<=\s|>)' + re.escape(word) + r'(?=\s|<)', '<i>%s</i>' % word, html)
 
         for pat in ITALICIZE_STYLE_PATS:
             html = re.sub(pat, lambda mo: '<i>%s</i>' % mo.group('words'), html)
@@ -375,8 +375,8 @@ class HeuristicProcessor(object):
         html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
         # Delete microsoft 'smart' tags
         html = re.sub('(?i)</?st1:\w+>', '', html)
-        # Delete self closing paragraph tags
-        html = re.sub('<p\s?/>', '', html)
+        # Re-open self closing paragraph tags
+        html = re.sub('<p[^>/]*/>', '<p> </p>', html)
         # Get rid of empty span, bold, font, em, & italics tags
         html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
         html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
@@ -463,7 +463,6 @@ class HeuristicProcessor(object):
 
     def __call__(self, html):
         self.log.debug("*********  Heuristic processing HTML  *********")
-
         # Count the words in the document to estimate how many chapters to look for and whether
         # other types of processing are attempted
         try:
@@ -477,7 +476,7 @@ class HeuristicProcessor(object):
 
         # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
         html = self.arrange_htm_line_endings(html)
-
+        self.dump(html, 'after_arrange_line_endings')
         if self.cleanup_required():
             ###### Check Markup ######
             #
@@ -580,7 +579,9 @@ class HeuristicProcessor(object):
             if blanks_count >= 1:
                 html = self.merge_blanks(html, blanks_count)
             # Center separator lines, use a bit larger margin in this case
-            html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?![\w\'\"])(?P<break>((?P<breakchar>(?!\s)\W)\s*(?P=breakchar)?)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:.65em; margin-bottom:.65em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
+            scene_break = re.compile(r'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?![\w\'\"])(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', re.IGNORECASE|re.UNICODE)
+            print "found "+str(len(scene_break.findall(html)))+" scene breaks"
+            html = scene_break.sub('<p class="scenebreak" style="text-align:center; margin-top:.65em; margin-bottom:.65em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
             #html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
 
         if self.deleted_nbsps:

From 5596f506a7a511eea83f3dad86e93ac87fb9f757 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Tue, 1 Feb 2011 01:51:22 +0800
Subject: [PATCH 05/55] improved scene break/whitespace formatting

---
 src/calibre/ebooks/conversion/utils.py | 60 +++++++++++++++++++-------
 1 file changed, 44 insertions(+), 16 deletions(-)

diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 74afbe7a42..77086efd97 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -30,6 +30,9 @@ class HeuristicProcessor(object):
         self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
         self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}(?!\s*<h\d)', re.IGNORECASE)
         self.any_multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE)
+        self.line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
+        self.line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
+        self.single_blank = re.compile(r'(\s*<p[^>]*>\s*</p>)', re.IGNORECASE)
 
     def is_pdftohtml(self, src):
         return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
@@ -188,19 +191,17 @@ class HeuristicProcessor(object):
 
         # Build the Regular Expressions in pieces
         init_lookahead = "(?=<(p|div))"
-        chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
+        chapter_line_open = self.line_open
         title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*"
         chapter_header_open = r"(?P<chap>"
         title_header_open = r"(?P<title>"
         chapter_header_close = ")\s*"
         title_header_close = ")"
-        chapter_line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
+        chapter_line_close = self.line_close
         title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>"
 
         is_pdftohtml = self.is_pdftohtml(html)
         if is_pdftohtml:
-            chapter_line_open = "<(?P<outer>p)[^>]*>(\s*<[ibu][^>]*>)?\s*"
-            chapter_line_close = "\s*(</[ibu][^>]*>\s*)?</(?P=outer)>"
             title_line_open = "<(?P<outer2>p)[^>]*>\s*"
             title_line_close = "\s*</(?P=outer2)>"
 
@@ -382,6 +383,8 @@ class HeuristicProcessor(object):
         html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
         html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
         html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
+        # Empty heading tags
+        html = re.sub(r'(?i)<h\d+>\s*</h\d+>', '', html)
         self.deleted_nbsps = True
         return html
 
@@ -421,13 +424,12 @@ class HeuristicProcessor(object):
         return False
         
     def merge_blanks(self, html, blanks_count=None):
-        single_blank = re.compile(r'(\s*<p[^>]*>\s*</p>)', re.IGNORECASE)
         base_em = .5 # Baseline is 1.5em per blank line, 1st line is .5 em css and 1em for the nbsp
         em_per_line = 1.5 # Add another 1.5 em for each additional blank
         
         def merge_matches(match):
             to_merge = match.group(0)
-            lines = float(len(single_blank.findall(to_merge))) - 1.
+            lines = float(len(self.single_blank.findall(to_merge))) - 1.
             em = base_em + (em_per_line * lines)
             if to_merge.find('whitespace'):
                 newline = self.any_multi_blank.sub('\n<p class="whitespace'+str(int(em * 10))+'" style="text-align:center; margin-top:'+str(em)+'em"> </p>', match.group(0))
@@ -439,17 +441,37 @@ class HeuristicProcessor(object):
         return html
 
     def detect_whitespace(self, html):
-        blanks_before_headings = re.compile(r'(\s*<p[^>]*>\s*</p>){1,}(?=\s*<h\d)', re.IGNORECASE)
-        blanks_after_headings = re.compile(r'(?<=</h\d>)(\s*<p[^>]*>\s*</p>){1,}', re.IGNORECASE)
+        blanks_around_headings = re.compile(r'(?P<initparas>(<p[^>]*>\s*</p>\s*){1,}\s*)?(?P<heading><h(?P<hnum>\d+)[^>]*>.*?</h(?P=hnum)>)(?P<endparas>\s*(<p[^>]*>\s*</p>\s*){1,})?', re.IGNORECASE)                                     
+        blanks_n_nopunct = re.compile(r'(?P<initparas>(<p[^>]*>\s*</p>\s*){1,}\s*)?<p[^>]*>\s*(<(span|[ibu]|em|strong|font)[^>]*>\s*)*.{1,100}?[^\W](</(span|[ibu]|em|strong|font)>\s*)*</p>(?P<endparas>\s*(<p[^>]*>\s*</p>\s*){1,})?', re.IGNORECASE)
         
+        def merge_header_whitespace(match):
+            initblanks = match.group('initparas')
+            endblanks = match.group('initparas') 
+            heading = match.group('heading')
+            top_margin = ''
+            bottom_margin = ''
+            if initblanks is not None:
+                top_margin = 'margin=top:'+str(len(self.single_blank.findall(initblanks)))+'em;'
+            if endblanks is not None:
+                bottom_margin = 'margin=top:'+str(len(self.single_blank.findall(initblanks)))+'em;'
+
+            if initblanks == None and endblanks == None:
+                return heading
+            else:
+                heading = re.sub('(?i)<h(?P<hnum>\d+)[^>]*>', '<h'+'\g<hnum>'+' style="'+top_margin+bottom_margin+'">', heading)
+            return heading
+
+        html = blanks_around_headings.sub(merge_header_whitespace, html)
+
         def markup_whitespaces(match):
-           blanks = match.group(0)
-           blanks = self.blankreg.sub('\n<p class="whitespace" style="text-align:center; margin-top:.5em"> </p>', blanks)
-           return blanks
-        html = blanks_before_headings.sub(markup_whitespaces, html)
-        html = blanks_after_headings.sub(markup_whitespaces, html)
+            blanks = match.group(0)
+            blanks = self.blankreg.sub('\n<p class="whitespace" style="text-align:center; margin-top:.5em"> </p>', blanks)
+            return blanks
+
+        html = blanks_n_nopunct.sub(markup_whitespaces, html)
         if self.html_preprocess_sections > self.min_chapters:
             html = re.sub('(?si)^.*?(?=<h\d)', markup_whitespaces, html)
+
         return html
 
     def detect_soft_breaks(self, html):
@@ -496,6 +518,11 @@ class HeuristicProcessor(object):
             # fix indents must run before this step, as it removes non-breaking spaces
             html = self.cleanup_markup(html)
 
+        is_pdftohtml = self.is_pdftohtml(html)
+        if is_pdftohtml:
+            self.line_open = "<(?P<outer>p)[^>]*>(\s*<[ibu][^>]*>)?\s*"
+            self.line_close = "\s*(</[ibu][^>]*>\s*)?</(?P=outer)>"
+
         # ADE doesn't render <br />, change to empty paragraphs
         #html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
 
@@ -558,7 +585,7 @@ class HeuristicProcessor(object):
         if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
             self.log.debug("Looking for more split points based on punctuation,"
                     " currently have " + unicode(self.html_preprocess_sections))
-            chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
+            chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([\W]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
             html = chapdetect3.sub(self.chapter_break, html)
 
         if getattr(self.extra_opts, 'renumber_headings', False):
@@ -579,9 +606,10 @@ class HeuristicProcessor(object):
             if blanks_count >= 1:
                 html = self.merge_blanks(html, blanks_count)
             # Center separator lines, use a bit larger margin in this case
-            scene_break = re.compile(r'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?![\w\'\"])(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', re.IGNORECASE|re.UNICODE)
+            scene_break_regex = self.line_open+'(?![\w\'\"])(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close
+            scene_break = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
             print "found "+str(len(scene_break.findall(html)))+" scene breaks"
-            html = scene_break.sub('<p class="scenebreak" style="text-align:center; margin-top:.65em; margin-bottom:.65em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
+            html = scene_break.sub('<p class="scenebreak" style="text-align:center; text-indent:0%; margin-top:.65em; margin-bottom:.65em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
             #html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
 
         if self.deleted_nbsps:

From d34451b6d1bcd27d89c6dcbccadaa3c5bb0c079f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 31 Jan 2011 15:45:17 -0700
Subject: [PATCH 06/55] EPUB Input: Filter made media tytpes from the spine,
 currently only filter Adobe page templates

---
 src/calibre/ebooks/epub/input.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/calibre/ebooks/epub/input.py b/src/calibre/ebooks/epub/input.py
index ec2004d81c..e22ed27371 100644
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@@ -175,6 +175,19 @@ class EPUBInput(InputFormatPlugin):
                 raise ValueError(
                     'EPUB files with DTBook markup are not supported')
 
+        for x in list(opf.iterspine()):
+            ref = x.get('idref', None)
+            if ref is None:
+                x.getparent().remove(x)
+                continue
+            for y in opf.itermanifest():
+                if y.get('id', None) == ref and y.get('media-type', None) in \
+                    ('application/vnd.adobe-page-template+xml',):
+                        p = x.getparent()
+                        if p is not None:
+                            p.remove(x)
+                        break
+
         with open('content.opf', 'wb') as nopf:
             nopf.write(opf.render())
 

From 5849b45d11203a8172ff6c24dca21a9f651bb54c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 31 Jan 2011 16:25:32 -0700
Subject: [PATCH 07/55] Fix #8688 (Calibre fails to convert some buggy chm
 ebooks which doesn't have .hhc file.)

---
 src/calibre/ebooks/chm/reader.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py
index 025e252005..04ce6d5efe 100644
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@@ -139,6 +139,13 @@ class CHMReader(CHMFile):
         if self.hhc_path not in files and files:
             self.hhc_path = files[0]
 
+        if self.hhc_path == '.hhc' and self.hhc_path not in files:
+            from calibre import walk
+            for x in walk(output_dir):
+                if os.path.basename(x).lower() in ('index.htm', 'index.html'):
+                    self.hhc_path = os.path.relpath(x, output_dir)
+                    break
+
     def _reformat(self, data, htmlpath):
         try:
             data = xml_to_unicode(data, strip_encoding_pats=True)[0]

From c47bacb016eabdd6870d2a3409b1d2f2ba29f8eb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 31 Jan 2011 17:12:22 -0700
Subject: [PATCH 08/55] Update 20 Minutos

---
 resources/recipes/20_minutos.recipe | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/resources/recipes/20_minutos.recipe b/resources/recipes/20_minutos.recipe
index cb3002a76c..106c0dcffa 100644
--- a/resources/recipes/20_minutos.recipe
+++ b/resources/recipes/20_minutos.recipe
@@ -1,25 +1,25 @@
-# -*- coding: utf-8
 __license__   = 'GPL v3'
 __author__    = 'Luis Hernandez'
 __copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
-description   = 'Periódico gratuito en español - v0.8 - 27 Jan 2011'
+__version__     = 'v0.85'
+__date__        = '31 January 2011'
 
 '''
 www.20minutos.es
 '''
-
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class AdvancedUserRecipe1294946868(BasicNewsRecipe):
 
-    title          = u'20 Minutos'
+    title          = u'20 Minutos new'
     publisher      = u'Grupo 20 Minutos'
 
-    __author__            = 'Luis Hernández'
-    description           = 'Periódico gratuito en español'
+    __author__            = 'Luis Hernandez'
+    description           = 'Free spanish newspaper'
     cover_url     = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
 
-    oldest_article = 5
+    oldest_article = 2
     max_articles_per_feed = 100
 
     remove_javascript = True
@@ -29,6 +29,7 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
     encoding              = 'ISO-8859-1'
     language              = 'es'
     timefmt        = '[%a, %d %b, %Y]'
+    remove_empty_feeds    = True
 
     keep_only_tags     = [
                                    dict(name='div', attrs={'id':['content','vinetas',]})
@@ -43,13 +44,21 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
     remove_tags = [
                      dict(name='ol', attrs={'class':['navigation',]})
                     ,dict(name='span', attrs={'class':['action']})
-                    ,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']})
+                    ,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','photo-gallery side-art-block','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']})
                     ,dict(name='div', attrs={'id':['twitter-destacados','eco-tabs','inner','vineta_calendario','vinetistas clearfix','otras_vinetas estirar','MIN1','main','SUP1','INT']})
                     ,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
                     ,dict(name='ul', attrs={'id':['site-links']})
                     ,dict(name='li', attrs={'class':['puntuacion','enviar','compartir']})
                        ]
 
+    extra_css             = """
+                               p{text-align: justify; font-size: 100%}
+                               body{ text-align: left; font-size:100% }
+                               h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
+                                 """
+
+    preprocess_regexps = [(re.compile(r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
+
     feeds = [
               (u'Portada'              , u'http://www.20minutos.es/rss/')
              ,(u'Nacional'             , u'http://www.20minutos.es/rss/nacional/')
@@ -65,6 +74,6 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
              ,(u'Empleo'              , u'http://www.20minutos.es/rss/empleo/')
              ,(u'Cine'                  , u'http://www.20minutos.es/rss/cine/')
              ,(u'Musica'               , u'http://www.20minutos.es/rss/musica/')
-             ,(u'Vinetas'              , u'http://www.20minutos.es/rss/vinetas/')
+             ,(u'Vinetas'          , u'http://www.20minutos.es/rss/vinetas/')
              ,(u'Comunidad20'     , u'http://www.20minutos.es/rss/zona20/')
             ]

From 527f11e32e5d42b2cdda5a3439189dad9dc154ff Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 31 Jan 2011 17:15:02 -0700
Subject: [PATCH 09/55] Cinco Dias by Luis Hernandez

---
 resources/images/news/latimes.png   | Bin 0 -> 358 bytes
 resources/recipes/cinco_dias.recipe |  71 ++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 resources/images/news/latimes.png
 create mode 100644 resources/recipes/cinco_dias.recipe

diff --git a/resources/images/news/latimes.png b/resources/images/news/latimes.png
new file mode 100644
index 0000000000000000000000000000000000000000..62bb4d0b8a2586c4884c4ccbac5b481bff096309
GIT binary patch
literal 358
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!60wlNoGJgf6n3BBRT^JZv^(q?yd7K3vk;OpT
z1B~5HX4`=T%L*LRfizezL(H+Yhk=~Qo-U3d8t11@4CHM#5OL)XZfNmb!6K6*q{XV9
z!gat=b%N#%84>n$d4~)!g@@msFMYq~`s~|WmREDNqm`6p1$K&ee13VRF63-l_VwSa
z99j7WD{t=D8onTOa)<9t5pMp*UpJnJuc(w`W)}Wn!KTJ$ahu`&(}j~4JlHxrRq)D-
zH7WbWLsmYMf6?2<ntQr(HTV9V>*WJZb$#?VG66YWwZt`|BqgyV)hf9t6-Y4{85kPs
z8W`#t8-^GfS{WKxnVRbwm|GbbFwC#DN70a*pOTqYiK4;C%FxKlzyhM7ER_8(Py>Uf
LtDnm{r-UW|G&pfi

literal 0
HcmV?d00001

diff --git a/resources/recipes/cinco_dias.recipe b/resources/recipes/cinco_dias.recipe
new file mode 100644
index 0000000000..40241aff5c
--- /dev/null
+++ b/resources/recipes/cinco_dias.recipe
@@ -0,0 +1,71 @@
+__license__   = 'GPL v3'
+__author__    = 'Luis Hernandez'
+__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
+__version__     = 'v1.2'
+__date__        = '31 January 2011'
+
+'''
+http://www.cincodias.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1294946868(BasicNewsRecipe):
+
+    title          = u'Cinco Dias'
+    publisher      = u'Grupo Prisa'
+
+    __author__            = 'Luis Hernandez'
+    description           = 'spanish web about money and bussiness, free edition'
+
+    cover_url     = 'http://www.prisa.com/images/logos/logo_cinco_dias.gif'
+    oldest_article = 2
+    max_articles_per_feed = 100
+
+    remove_javascript = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+
+    language              = 'es'
+    remove_empty_feeds    = True
+    encoding               = 'ISO-8859-1'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    keep_only_tags     = [
+                                    dict(name='div', attrs={'class':['cab_articulo cab_noticia','pos_3','txt_noticia','mod_despiece']})
+                                   ,dict(name='p', attrs={'class':['cintillo']})
+                                ]
+
+    remove_tags_before = dict(name='div' , attrs={'class':['publi_h']})
+    remove_tags_after = dict(name='div' , attrs={'class':['tab_util util_estadisticas']})
+
+    remove_tags = [
+                             dict(name='div', attrs={'class':['util-1','util-2','util-3','inner estirar','inner1','inner2','inner3','cont','tab_util util_estadisticas','tab_util util_enviar','mod_list_inf','mod_similares','mod_divisas','mod_sectores','mod_termometro','mod post','mod_img','mod_txt','nivel estirar','barra estirar','info_brujula btnBrujula','utilidad_brujula estirar']})
+                            ,dict(name='li', attrs={'class':['lnk-fcbook','lnk-retweet','lnk-meneame','desplegable','comentarios','list-options','estirar']})
+                            ,dict(name='ul', attrs={'class':['lista-izquierda','list-options','estirar']})
+                            ,dict(name='p', attrs={'class':['autor']})
+                         ]
+
+    extra_css             = """
+                               p{text-align: justify; font-size: 100%}
+                               body{ text-align: left; font-size:100% }
+                               h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
+                               h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
+                                 """
+
+    feeds = [
+                  (u'Ultima Hora'              , u'http://www.cincodias.com/rss/feed.html?feedId=17029')
+                 ,(u'Empresas'                 , u'http://www.cincodias.com/rss/feed.html?feedId=19')
+                 ,(u'Mercados'                 , u'http://www.cincodias.com/rss/feed.html?feedId=20')
+                 ,(u'Economia'                 , u'http://www.cincodias.com/rss/feed.html?feedId=21')
+                 ,(u'Tecnorama'               , u'http://www.cincodias.com/rss/feed.html?feedId=17230')
+                 ,(u'Tecnologia'                , u'http://www.cincodias.com/rss/feed.html?feedId=17106')
+                 ,(u'Finanzas Personales'  , u'http://www.cincodias.com/rss/feed.html?feedId=22')
+                 ,(u'Fiscalidad'                 , u'http://www.cincodias.com/rss/feed.html?feedId=17107')
+                 ,(u'Vivienda'                   , u'http://www.cincodias.com/rss/feed.html?feedId=17108')
+                 ,(u'Tendencias'               , u'http://www.cincodias.com/rss/feed.html?feedId=17109')
+                 ,(u'Empleo'                    , u'http://www.cincodias.com/rss/feed.html?feedId=17110')
+                 ,(u'IBEX 35'                    , u'http://www.cincodias.com/rss/feed.html?feedId=17125')
+                 ,(u'Sectores'                  , u'http://www.cincodias.com/rss/feed.html?feedId=17126')
+                 ,(u'Opinion'                    , u'http://www.cincodias.com/rss/feed.html?feedId=17105')
+              ]

From 241ef0b6e1135c33e2b61c6a75d4a65a6eaeb5a4 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 31 Jan 2011 19:57:43 -0500
Subject: [PATCH 10/55] Heuristics: Add replace soft scene break option.

---
 src/calibre/ebooks/conversion/cli.py     |  3 +-
 src/calibre/ebooks/conversion/plumber.py |  4 +++
 src/calibre/gui2/convert/heuristics.py   | 45 +++++++++++++++++++++++-
 src/calibre/gui2/convert/heuristics.ui   | 36 +++++++++++++++++++
 4 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py
index 33ae61f16a..25179d48a7 100644
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@@ -45,7 +45,8 @@ For full documentation of the conversion system see
 HEURISTIC_OPTIONS = ['markup_chapter_headings',
                       'italicize_common_cases', 'fix_indents',
                       'html_unwrap_factor', 'unwrap_lines',
-                      'delete_blank_paragraphs', 'format_scene_breaks',
+                      'delete_blank_paragraphs',
+                      'format_scene_breaks', 'replace_soft_scene_breaks',
                       'dehyphenate', 'renumber_headings']
 
 def print_help(parser, log):
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 5807ba5f8f..2c37053759 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -530,6 +530,10 @@ OptionRecommendation(name='format_scene_breaks',
     help=_('Left aligned scene break markers are center aligned. '
            'Replace soft scene breaks that use multiple blank lines with'
            'horizontal rules.')),
+           
+OptionRecommendation(name='replace_soft_scene_breaks',
+    recommended_value='', level=OptionRecommendation.LOW,
+    help=_('Replace soft scene breaks with the specified text.')),
 
 OptionRecommendation(name='dehyphenate',
     recommended_value=True, level=OptionRecommendation.LOW,
diff --git a/src/calibre/gui2/convert/heuristics.py b/src/calibre/gui2/convert/heuristics.py
index e788888257..73b4622246 100644
--- a/src/calibre/gui2/convert/heuristics.py
+++ b/src/calibre/gui2/convert/heuristics.py
@@ -6,6 +6,7 @@ __docformat__ = 'restructuredtext en'
 
 from PyQt4.Qt import Qt
 
+from calibre.gui2 import gprefs
 from calibre.gui2.convert.heuristics_ui import Ui_Form
 from calibre.gui2.convert import Widget
 
@@ -21,17 +22,35 @@ class HeuristicsWidget(Widget, Ui_Form):
                 ['enable_heuristics', 'markup_chapter_headings',
                  'italicize_common_cases', 'fix_indents',
                  'html_unwrap_factor', 'unwrap_lines',
-                 'delete_blank_paragraphs', 'format_scene_breaks',
+                 'delete_blank_paragraphs',
+                 'format_scene_breaks', 'replace_soft_scene_breaks',
                  'dehyphenate', 'renumber_headings']
                 )
         self.db, self.book_id = db, book_id
+        self.rssb_defaults = ['', '<hr />', '* * *']
         self.initialize_options(get_option, get_help, db, book_id)
 
+        self.load_histories()
+
         self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics)
         self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap)
 
         self.enable_heuristics(self.opt_enable_heuristics.checkState())
 
+    def restore_defaults(self, get_option):
+        Widget.restore_defaults(self, get_option)
+        
+        rssb_hist = gprefs['replace_soft_scene_breaks_history']
+        for x in self.rssb_defaults:
+            if x in rssb_hist:
+                del rssb_hist[rssb_hist.index(x)]
+        gprefs['replace_soft_scene_breaks_history'] = self.rssb_defaults + gprefs['replace_soft_scene_breaks_history']
+
+    def commit_options(self, save_defaults=False):
+        Widget.commit_options(self, save_defaults)
+        
+        self.save_histories()
+
     def break_cycles(self):
         Widget.break_cycles(self)
 
@@ -45,6 +64,30 @@ class HeuristicsWidget(Widget, Ui_Form):
         if val is None and g is self.opt_html_unwrap_factor:
             g.setValue(0.0)
             return True
+        if not val and g is self.opt_replace_soft_scene_breaks:
+            g.lineEdit().setText('')
+            return True
+
+    def load_histories(self):
+        val = unicode(self.opt_replace_soft_scene_breaks.currentText())
+        rssb_hist = gprefs.get('replace_soft_scene_breaks_history', self.rssb_defaults)
+        if val in rssb_hist:
+            del rssb_hist[rssb_hist.index(val)]
+        rssb_hist.insert(0, val)
+        for v in rssb_hist:
+            # Ensure we don't have duplicate items.
+            if self.opt_replace_soft_scene_breaks.findText(v) == -1:
+                self.opt_replace_soft_scene_breaks.addItem(v)
+        self.opt_replace_soft_scene_breaks.setCurrentIndex(0)
+
+    def save_histories(self):
+        rssb_history = []
+        history_pats = [unicode(self.opt_replace_soft_scene_breaks.lineEdit().text())] + [unicode(self.opt_replace_soft_scene_breaks.itemText(i)) for i in xrange(self.opt_replace_soft_scene_breaks.count())]
+        for p in history_pats[:10]:
+            # Ensure we don't have duplicate items.
+            if p not in rssb_history:
+                rssb_history.append(p)
+        gprefs['replace_soft_scene_breaks_history'] = rssb_history
 
     def enable_heuristics(self, state):
         state = state == Qt.Checked
diff --git a/src/calibre/gui2/convert/heuristics.ui b/src/calibre/gui2/convert/heuristics.ui
index 6863fcf8e6..c047957d4d 100644
--- a/src/calibre/gui2/convert/heuristics.ui
+++ b/src/calibre/gui2/convert/heuristics.ui
@@ -150,6 +150,42 @@
         </property>
        </widget>
       </item>
+      <item>
+       <layout class="QHBoxLayout" name="horizontalLayout_2">
+        <property name="sizeConstraint">
+         <enum>QLayout::SetDefaultConstraint</enum>
+        </property>
+        <item>
+         <widget class="QLabel" name="label_2">
+          <property name="sizePolicy">
+           <sizepolicy hsizetype="Minimum" vsizetype="Preferred">
+            <horstretch>0</horstretch>
+            <verstretch>0</verstretch>
+           </sizepolicy>
+          </property>
+          <property name="text">
+           <string>Replace soft scene breaks:</string>
+          </property>
+         </widget>
+        </item>
+        <item>
+         <widget class="QComboBox" name="opt_replace_soft_scene_breaks">
+          <property name="sizePolicy">
+           <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
+            <horstretch>0</horstretch>
+            <verstretch>0</verstretch>
+           </sizepolicy>
+          </property>
+          <property name="editable">
+           <bool>true</bool>
+          </property>
+          <property name="insertPolicy">
+           <enum>QComboBox::InsertAtTop</enum>
+          </property>
+         </widget>
+        </item>
+       </layout>
+      </item>
       <item>
        <widget class="QCheckBox" name="opt_dehyphenate">
         <property name="text">

From 360fd374c77d5d3f13c1b98b341a37a809154b72 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 31 Jan 2011 18:01:13 -0700
Subject: [PATCH 11/55] ...

---
 src/calibre/manual/faq.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index 849ded82c9..59f6a9b88d 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -391,6 +391,8 @@ Take your pick:
   * A tribute to the SONY Librie which was the first e-ink based e-book reader
   * My wife chose it ;-)
 
+|app| is pronounced as cal-i-ber *not* ca-libre. If you're wondering, |app| is the British/commonwealth spelling for caliber. Being Indian, that's the natural spelling for me. 
+
 Why does |app| show only some of my fonts on OS X?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 |app| embeds fonts in ebook files it creates. E-book files support embedding only TrueType (.ttf) fonts. Most fonts on OS X systems are in .dfont format, thus they cannot be embedded. |app| shows only TrueType fonts found on your system. You can obtain many TrueType fonts on the web. Simply download the .ttf files and add them to the Library/Fonts directory in your home directory.

From 5d4c7388629914e40c122f17a0106a363de3f810 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 31 Jan 2011 18:58:04 -0700
Subject: [PATCH 12/55] Fix #8672 (Converted format disappears while adding a
 new format)

---
 src/calibre/gui2/dialogs/metadata_single.py | 8 ++++++--
 src/calibre/gui2/metadata/basic_widgets.py  | 9 ++++++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index 7a8e4ea8d0..fa20658c12 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -429,10 +429,12 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                 old_extensions.add(ext)
         for ext in new_extensions:
             self.db.add_format(self.row, ext, open(paths[ext], 'rb'), notify=False)
-        db_extensions = set([f.lower() for f in self.db.formats(self.row).split(',')])
+        dbfmts = self.db.formats(self.row)
+        db_extensions = set([f.lower() for f in (dbfmts.split(',') if dbfmts
+            else [])])
         extensions = new_extensions.union(old_extensions)
         for ext in db_extensions:
-            if ext not in extensions:
+            if ext not in extensions and ext in self.original_formats:
                 self.db.remove_format(self.row, ext, notify=False)
 
     def show_format(self, item, *args):
@@ -576,6 +578,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
         self.orig_date = qt_to_dt(self.date.date())
 
         exts = self.db.formats(row)
+        self.original_formats = []
         if exts:
             exts = exts.split(',')
             for ext in exts:
@@ -586,6 +589,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                 if size is None:
                     continue
                 Format(self.formats, ext, size, timestamp=timestamp)
+                self.original_formats.append(ext.lower())
 
 
         self.initialize_combos()
diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index 590a8be3bb..d3fa5958ab 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -472,6 +472,7 @@ class FormatsManager(QWidget): # {{{
     def initialize(self, db, id_):
         self.changed = False
         exts = db.formats(id_, index_is_id=True)
+        self.original_val = set([])
         if exts:
             exts = exts.split(',')
             for ext in exts:
@@ -482,6 +483,7 @@ class FormatsManager(QWidget): # {{{
                 if size is None:
                     continue
                 Format(self.formats, ext, size, timestamp=timestamp)
+                self.original_val.add(ext.lower())
 
     def commit(self, db, id_):
         if not self.changed:
@@ -500,11 +502,12 @@ class FormatsManager(QWidget): # {{{
         for ext in new_extensions:
             db.add_format(id_, ext, open(paths[ext], 'rb'), notify=False,
                     index_is_id=True)
-        db_extensions = set([f.lower() for f in db.formats(id_,
-            index_is_id=True).split(',')])
+        dbfmts = db.formats(id_, index_is_id=True)
+        db_extensions = set([f.lower() for f in (dbfmts.split(',') if dbfmts
+            else [])])
         extensions = new_extensions.union(old_extensions)
         for ext in db_extensions:
-            if ext not in extensions:
+            if ext not in extensions and ext in self.original_val:
                 db.remove_format(id_, ext, notify=False, index_is_id=True)
 
         self.changed = False

From d2ba1812bb0b0d9c95acd6c0e22287ce47502bc9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 31 Jan 2011 20:09:26 -0700
Subject: [PATCH 13/55] Initial import of new metadata download framework

---
 src/calibre/ebooks/metadata/sources/base.py   |  61 +++++
 src/calibre/ebooks/metadata/sources/google.py | 215 ++++++++++++++++++
 2 files changed, 276 insertions(+)
 create mode 100644 src/calibre/ebooks/metadata/sources/base.py
 create mode 100644 src/calibre/ebooks/metadata/sources/google.py

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
new file mode 100644
index 0000000000..89ad8a7956
--- /dev/null
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re
+
+from calibre.customize import Plugin
+
+class Source(Plugin):
+
+    type = _('Metadata source')
+    author = 'Kovid Goyal'
+
+    supported_platforms = ['windows', 'osx', 'linux']
+
+    result_of_identify_is_complete = True
+
+    def get_author_tokens(self, authors):
+        'Take a list of authors and return a list of tokens useful for a '
+        'AND search query'
+        # Leave ' in there for Irish names
+        pat = re.compile(r'[-,:;+!@#$%^&*(){}.`~"\s\[\]/]')
+        for au in authors:
+            for tok in au.split():
+                yield pat.sub('', tok)
+
+    def split_jobs(self, jobs, num):
+        'Split a list of jobs into at most num groups, as evenly as possible'
+        groups = [[] for i in range(num)]
+        jobs = list(jobs)
+        while jobs:
+            for gr in groups:
+                try:
+                    job = jobs.pop()
+                except IndexError:
+                    break
+                gr.append(job)
+        return [g for g in groups if g]
+
+    def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
+        '''
+        Identify a book by its title/author/isbn/etc.
+
+        :param log: A log object, use it to output debugging information/errors
+        :param result_queue: A result Queue, results should be put into it.
+                            Each result is a Metadata object
+        :param abort: If abort.is_set() returns True, abort further processing
+                      and return as soon as possible
+        :param title: The title of the book, can be None
+        :param authors: A list of authors of the book, can be None
+        :param identifiers: A dictionary of other identifiers, most commonly
+                            {'isbn':'1234...'}
+        :return: None if no errors occurred, otherwise a unicode representation
+                 of the error suitable for showing to the user
+
+        '''
+        return None
+
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
new file mode 100644
index 0000000000..1a3bf6d516
--- /dev/null
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import time
+from urllib import urlencode
+from functools import partial
+from threading import Thread
+
+from lxml import etree
+
+from calibre.ebooks.metadata.sources import Source
+from calibre.ebooks.metadata.book.base import Metadata
+from calibre.utils.date import parse_date, utcnow
+from calibre import browser, as_unicode
+
+NAMESPACES = {
+              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
+              'atom' : 'http://www.w3.org/2005/Atom',
+              'dc': 'http://purl.org/dc/terms'
+            }
+XPath = partial(etree.XPath, namespaces=NAMESPACES)
+
+total_results  = XPath('//openSearch:totalResults')
+start_index    = XPath('//openSearch:startIndex')
+items_per_page = XPath('//openSearch:itemsPerPage')
+entry          = XPath('//atom:entry')
+entry_id       = XPath('descendant::atom:id')
+creator        = XPath('descendant::dc:creator')
+identifier     = XPath('descendant::dc:identifier')
+title          = XPath('descendant::dc:title')
+date           = XPath('descendant::dc:date')
+publisher      = XPath('descendant::dc:publisher')
+subject        = XPath('descendant::dc:subject')
+description    = XPath('descendant::dc:description')
+language       = XPath('descendant::dc:language')
+
+
+
+def to_metadata(browser, log, entry_):
+
+    def get_text(extra, x):
+        try:
+            ans = x(extra)
+            if ans:
+                ans = ans[0].text
+                if ans and ans.strip():
+                    return ans.strip()
+        except:
+            log.exception('Programming error:')
+        return None
+
+
+    id_url = entry_id(entry_)[0].text
+    title_ = ': '.join([x.text for x in title(entry_)]).strip()
+    authors = [x.text.strip() for x in creator(entry_) if x.text]
+    if not authors:
+        authors = [_('Unknown')]
+    if not id_url or not title:
+        # Silently discard this entry
+        return None
+
+    mi = Metadata(title_, authors)
+    try:
+        raw = browser.open(id_url).read()
+        feed = etree.fromstring(raw)
+        extra = entry(feed)[0]
+    except:
+        log.exception('Failed to get additional details for', mi.title)
+        return mi
+
+    mi.comments = get_text(extra, description)
+    #mi.language = get_text(extra, language)
+    mi.publisher = get_text(extra, publisher)
+
+    # Author sort
+    for x in creator(extra):
+        for key, val in x.attrib.items():
+            if key.endswith('file-as') and val and val.strip():
+                mi.author_sort = val
+                break
+    # ISBN
+    isbns = []
+    for x in identifier(extra):
+        t = str(x.text).strip()
+        if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
+            if t[:5].upper() == 'ISBN:':
+                isbns.append(t[5:])
+    if isbns:
+        mi.isbn = sorted(isbns, key=len)[-1]
+
+    # Tags
+    try:
+        btags = [x.text for x in subject(extra) if x.text]
+        tags = []
+        for t in btags:
+            tags.extend([y.strip() for y in t.split('/')])
+        tags = list(sorted(list(set(tags))))
+    except:
+        log.exception('Failed to parse tags:')
+        tags = []
+    if tags:
+        mi.tags = [x.replace(',', ';') for x in tags]
+
+    # pubdate
+    pubdate = get_text(extra, date)
+    if pubdate:
+        try:
+            default = utcnow().replace(day=15)
+            mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
+        except:
+            log.exception('Failed to parse pubdate')
+
+
+    return mi
+
+class Worker(Thread):
+
+    def __init__(self, log, entries, abort, result_queue):
+        self.browser, self.log, self.entries = browser(), log, entries
+        self.abort, self.result_queue = abort, result_queue
+        Thread.__init__(self)
+        self.daemon = True
+
+    def run(self):
+        for i in self.entries:
+            try:
+                ans = to_metadata(self.browser, self.log, i)
+                if ans is not None:
+                    self.result_queue.put(ans)
+            except:
+                self.log.exception(
+                    'Failed to get metadata for identify entry:',
+                    etree.tostring(i))
+            if self.abort.is_set():
+                break
+
+
+class GoogleBooks(Source):
+
+    name = 'Google Books'
+
+    def create_query(self, log, title=None, authors=None, identifiers={},
+            start_index=1):
+        BASE_URL = 'http://books.google.com/books/feeds/volumes?'
+        isbn = identifiers.get('isbn', None)
+        q = ''
+        if isbn is not None:
+            q += 'isbn:'+isbn
+        elif title or authors:
+            def build_term(prefix, parts):
+                return ' '.join('in'+prefix + ':' + x for x in parts)
+            if title is not None:
+                q += build_term('title', title.split())
+            if authors:
+                q += ('+' if q else '')+build_term('author',
+                        self.get_author_tokens(authors))
+
+        if isinstance(q, unicode):
+            q = q.encode('utf-8')
+        if not q:
+            return None
+        return BASE_URL+urlencode({
+            'q':q,
+            'max-results':20,
+            'start-index':start_index,
+            'min-viewability':'none',
+            })
+
+
+    def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
+        query = self.create_query(log, title=title, authors=authors,
+                identifiers=identifiers)
+        try:
+            raw = browser().open_novisit(query).read()
+        except Exception, e:
+            log.exception('Failed to make identify query: %r'%query)
+            return as_unicode(e)
+
+        try:
+            parser = etree.XMLParser(recover=True, no_network=True)
+            feed = etree.fromstring(raw, parser=parser)
+            entries = entry(feed)
+        except Exception, e:
+            log.exception('Failed to parse identify results')
+            return as_unicode(e)
+
+
+        groups = self.split_jobs(entries, 5) # At most 5 threads
+        if not groups:
+            return
+        workers = [Worker(log, entries, abort, result_queue) for entries in
+                groups]
+
+        if abort.is_set():
+            return
+
+        for worker in workers: worker.start()
+
+        has_alive_worker = True
+        while has_alive_worker and not abort.is_set():
+            has_alive_worker = False
+            for worker in workers:
+                if worker.is_alive():
+                    has_alive_worker = True
+            time.sleep(0.1)
+
+        return None
+
+
+
+

From 971e3150f9aaf86f7b253d6d88534e5e0256dc57 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Tue, 1 Feb 2011 13:17:58 +0800
Subject: [PATCH 14/55] ...

---
 src/calibre/ebooks/conversion/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 77086efd97..1263372ce3 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -451,21 +451,21 @@ class HeuristicProcessor(object):
             top_margin = ''
             bottom_margin = ''
             if initblanks is not None:
-                top_margin = 'margin=top:'+str(len(self.single_blank.findall(initblanks)))+'em;'
+                top_margin = 'margin-top:'+str(len(self.single_blank.findall(initblanks)))+'em;'
             if endblanks is not None:
-                bottom_margin = 'margin=top:'+str(len(self.single_blank.findall(initblanks)))+'em;'
+                bottom_margin = 'margin-bottom:'+str(len(self.single_blank.findall(initblanks)))+'em;'
 
             if initblanks == None and endblanks == None:
                 return heading
             else:
-                heading = re.sub('(?i)<h(?P<hnum>\d+)[^>]*>', '<h'+'\g<hnum>'+' style="'+top_margin+bottom_margin+'">', heading)
+                heading = re.sub('(?i)<h(?P<hnum>\d+)[^>]*>', '\n\n<h'+'\g<hnum>'+' style="'+top_margin+bottom_margin+'">', heading)
             return heading
 
         html = blanks_around_headings.sub(merge_header_whitespace, html)
 
         def markup_whitespaces(match):
             blanks = match.group(0)
-            blanks = self.blankreg.sub('\n<p class="whitespace" style="text-align:center; margin-top:.5em"> </p>', blanks)
+            blanks = self.blankreg.sub('\n<p class="whitespace" style="text-align:center; margin-top:0em; margin-bottom:0em"> </p>', blanks)
             return blanks
 
         html = blanks_n_nopunct.sub(markup_whitespaces, html)

From d75e17e6b44e8ae688ade08bd30ae552ab0c48c3 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Tue, 1 Feb 2011 18:07:37 +0800
Subject: [PATCH 15/55] added scene break replacement logic

---
 src/calibre/ebooks/conversion/cli.py     |  2 +-
 src/calibre/ebooks/conversion/plumber.py |  4 +++
 src/calibre/ebooks/conversion/utils.py   | 33 ++++++++++++++++++++----
 3 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py
index 33ae61f16a..278d599378 100644
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@@ -143,7 +143,7 @@ def add_pipeline_options(parser, plumber):
                      ' patterns. Disabled by default. Use %s to enable. '
                      ' Individual actions can be disabled with the %s options.')
                   % ('--enable-heuristics', '--disable-*'),
-                  ['enable_heuristics'] + HEURISTIC_OPTIONS
+                  ['enable_heuristics', 'replace_scene_breaks'] + HEURISTIC_OPTIONS
                   ),
 
               'SEARCH AND REPLACE' : (
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 5807ba5f8f..59d7a0ed2a 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -530,6 +530,10 @@ OptionRecommendation(name='format_scene_breaks',
     help=_('Left aligned scene break markers are center aligned. '
            'Replace soft scene breaks that use multiple blank lines with'
            'horizontal rules.')),
+           
+OptionRecommendation(name='replace_scene_breaks',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Replace scene breaks with the specified text.')),
 
 OptionRecommendation(name='dehyphenate',
     recommended_value=True, level=OptionRecommendation.LOW,
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 1263372ce3..cf305f1022 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -33,6 +33,7 @@ class HeuristicProcessor(object):
         self.line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
         self.line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
         self.single_blank = re.compile(r'(\s*<p[^>]*>\s*</p>)', re.IGNORECASE)
+        self.scene_break_open = '<p class="scenebreak" style="text-align:center; text-indent:0%; margin-top:1em; margin-bottom:1em; page-break-before:avoid">'
 
     def is_pdftohtml(self, src):
         return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
@@ -481,6 +482,22 @@ class HeuristicProcessor(object):
             html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>', html)
         return html
 
+    def markup_user_break(self, replacement_break):
+        hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em">'
+        if re.findall('(<|>)', replacement_break):
+            if re.match('^<hr', replacement_break):
+                scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
+            elif re.match('^<img', replacement_break):
+                scene_break = self.scene_break_open+replacement_break+'</p>'
+            else:
+                replacement_break = html2text(replacement_break)
+                replacement_break = re.sub('\s', '&nbsp;', replacement_break)
+                scene_break = self.scene_break_open+replacement_break+'</p>'
+        else:
+            replacement_break = re.sub('\s', '&nbsp;', replacement_break)
+            scene_break = self.scene_break_open+replacement_break+'</p>'
+
+        return scene_break
 
 
     def __call__(self, html):
@@ -498,7 +515,7 @@ class HeuristicProcessor(object):
 
         # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
         html = self.arrange_htm_line_endings(html)
-        self.dump(html, 'after_arrange_line_endings')
+        #self.dump(html, 'after_arrange_line_endings')
         if self.cleanup_required():
             ###### Check Markup ######
             #
@@ -534,7 +551,7 @@ class HeuristicProcessor(object):
 
         if getattr(self.extra_opts, 'markup_chapter_headings', False):
             html = self.markup_chapters(html, self.totalwords, self.blanks_between_paragraphs)
-        self.dump(html, 'after_chapter_markup')
+        #self.dump(html, 'after_chapter_markup')
 
         if getattr(self.extra_opts, 'italicize_common_cases', False):
             html = self.markup_italicis(html)
@@ -608,9 +625,15 @@ class HeuristicProcessor(object):
             # Center separator lines, use a bit larger margin in this case
             scene_break_regex = self.line_open+'(?![\w\'\"])(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close
             scene_break = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
-            print "found "+str(len(scene_break.findall(html)))+" scene breaks"
-            html = scene_break.sub('<p class="scenebreak" style="text-align:center; text-indent:0%; margin-top:.65em; margin-bottom:.65em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
-            #html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
+            replacement_break = getattr(self.extra_opts, 'replace_scene_breaks', None)
+            if replacement_break is not None:
+                replacement_break = self.markup_user_break(replacement_break)
+                if len(scene_break.findall(html)) >= 1:
+                    html = scene_break.sub(replacement_break, html)
+                else:
+                    html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', replacement_break, html) 
+            else:
+                html = scene_break.sub(self.scene_break_open+'\g<break>'+'</p>', html)
 
         if self.deleted_nbsps:
             # put back non-breaking spaces in empty paragraphs so they render correctly

From c4f74eb182eab41b749ddb814791c55dc260f1bd Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 1 Feb 2011 07:29:01 -0500
Subject: [PATCH 16/55] Heuristics: Rename scene break option. Fix bug
 preventing saving settings.

---
 src/calibre/ebooks/conversion/cli.py     |  5 ++---
 src/calibre/ebooks/conversion/plumber.py |  4 ++--
 src/calibre/gui2/convert/heuristics.py   | 26 ++++++++++++------------
 src/calibre/gui2/convert/heuristics.ui   |  2 +-
 src/calibre/gui2/convert/single.py       |  1 +
 5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py
index 25179d48a7..b3d2f8cac5 100644
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@@ -45,8 +45,7 @@ For full documentation of the conversion system see
 HEURISTIC_OPTIONS = ['markup_chapter_headings',
                       'italicize_common_cases', 'fix_indents',
                       'html_unwrap_factor', 'unwrap_lines',
-                      'delete_blank_paragraphs',
-                      'format_scene_breaks', 'replace_soft_scene_breaks',
+                      'delete_blank_paragraphs', 'format_scene_breaks',
                       'dehyphenate', 'renumber_headings']
 
 def print_help(parser, log):
@@ -144,7 +143,7 @@ def add_pipeline_options(parser, plumber):
                      ' patterns. Disabled by default. Use %s to enable. '
                      ' Individual actions can be disabled with the %s options.')
                   % ('--enable-heuristics', '--disable-*'),
-                  ['enable_heuristics'] + HEURISTIC_OPTIONS
+                  ['enable_heuristics',  'replace_scene_breaks'] + HEURISTIC_OPTIONS
                   ),
 
               'SEARCH AND REPLACE' : (
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 2c37053759..a4708d398c 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -531,9 +531,9 @@ OptionRecommendation(name='format_scene_breaks',
            'Replace soft scene breaks that use multiple blank lines with'
            'horizontal rules.')),
            
-OptionRecommendation(name='replace_soft_scene_breaks',
+OptionRecommendation(name='replace_scene_breaks',
     recommended_value='', level=OptionRecommendation.LOW,
-    help=_('Replace soft scene breaks with the specified text.')),
+    help=_('Replace scene breaks with the specified text.')),
 
 OptionRecommendation(name='dehyphenate',
     recommended_value=True, level=OptionRecommendation.LOW,
diff --git a/src/calibre/gui2/convert/heuristics.py b/src/calibre/gui2/convert/heuristics.py
index 73b4622246..8ca4cab455 100644
--- a/src/calibre/gui2/convert/heuristics.py
+++ b/src/calibre/gui2/convert/heuristics.py
@@ -23,7 +23,7 @@ class HeuristicsWidget(Widget, Ui_Form):
                  'italicize_common_cases', 'fix_indents',
                  'html_unwrap_factor', 'unwrap_lines',
                  'delete_blank_paragraphs',
-                 'format_scene_breaks', 'replace_soft_scene_breaks',
+                 'format_scene_breaks', 'replace_scene_breaks',
                  'dehyphenate', 'renumber_headings']
                 )
         self.db, self.book_id = db, book_id
@@ -40,16 +40,16 @@ class HeuristicsWidget(Widget, Ui_Form):
     def restore_defaults(self, get_option):
         Widget.restore_defaults(self, get_option)
         
-        rssb_hist = gprefs['replace_soft_scene_breaks_history']
+        rssb_hist = gprefs['replace_scene_breaks_history']
         for x in self.rssb_defaults:
             if x in rssb_hist:
                 del rssb_hist[rssb_hist.index(x)]
-        gprefs['replace_soft_scene_breaks_history'] = self.rssb_defaults + gprefs['replace_soft_scene_breaks_history']
+        gprefs['replace_scene_breaks_history'] = self.rssb_defaults + gprefs['replace_scene_breaks_history']
 
     def commit_options(self, save_defaults=False):
-        Widget.commit_options(self, save_defaults)
-        
         self.save_histories()
+        
+        return Widget.commit_options(self, save_defaults)
 
     def break_cycles(self):
         Widget.break_cycles(self)
@@ -64,30 +64,30 @@ class HeuristicsWidget(Widget, Ui_Form):
         if val is None and g is self.opt_html_unwrap_factor:
             g.setValue(0.0)
             return True
-        if not val and g is self.opt_replace_soft_scene_breaks:
+        if not val and g is self.opt_replace_scene_breaks:
             g.lineEdit().setText('')
             return True
 
     def load_histories(self):
-        val = unicode(self.opt_replace_soft_scene_breaks.currentText())
-        rssb_hist = gprefs.get('replace_soft_scene_breaks_history', self.rssb_defaults)
+        val = unicode(self.opt_replace_scene_breaks.currentText())
+        rssb_hist = gprefs.get('replace_scene_breaks_history', self.rssb_defaults)
         if val in rssb_hist:
             del rssb_hist[rssb_hist.index(val)]
         rssb_hist.insert(0, val)
         for v in rssb_hist:
             # Ensure we don't have duplicate items.
-            if self.opt_replace_soft_scene_breaks.findText(v) == -1:
-                self.opt_replace_soft_scene_breaks.addItem(v)
-        self.opt_replace_soft_scene_breaks.setCurrentIndex(0)
+            if self.opt_replace_scene_breaks.findText(v) == -1:
+                self.opt_replace_scene_breaks.addItem(v)
+        self.opt_replace_scene_breaks.setCurrentIndex(0)
 
     def save_histories(self):
         rssb_history = []
-        history_pats = [unicode(self.opt_replace_soft_scene_breaks.lineEdit().text())] + [unicode(self.opt_replace_soft_scene_breaks.itemText(i)) for i in xrange(self.opt_replace_soft_scene_breaks.count())]
+        history_pats = [unicode(self.opt_replace_scene_breaks.lineEdit().text())] + [unicode(self.opt_replace_scene_breaks.itemText(i)) for i in xrange(self.opt_replace_scene_breaks.count())]
         for p in history_pats[:10]:
             # Ensure we don't have duplicate items.
             if p not in rssb_history:
                 rssb_history.append(p)
-        gprefs['replace_soft_scene_breaks_history'] = rssb_history
+        gprefs['replace_scene_breaks_history'] = rssb_history
 
     def enable_heuristics(self, state):
         state = state == Qt.Checked
diff --git a/src/calibre/gui2/convert/heuristics.ui b/src/calibre/gui2/convert/heuristics.ui
index c047957d4d..4f7cf5ea6e 100644
--- a/src/calibre/gui2/convert/heuristics.ui
+++ b/src/calibre/gui2/convert/heuristics.ui
@@ -169,7 +169,7 @@
          </widget>
         </item>
         <item>
-         <widget class="QComboBox" name="opt_replace_soft_scene_breaks">
+         <widget class="QComboBox" name="opt_replace_scene_breaks">
           <property name="sizePolicy">
            <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
             <horstretch>0</horstretch>
diff --git a/src/calibre/gui2/convert/single.py b/src/calibre/gui2/convert/single.py
index 59fcbb65ad..6540383229 100644
--- a/src/calibre/gui2/convert/single.py
+++ b/src/calibre/gui2/convert/single.py
@@ -258,6 +258,7 @@ class Config(ResizableDialog, Ui_Dialog):
             if not w.pre_commit_check():
                 return
             x = w.commit(save_defaults=False)
+            print x
             recs.update(x)
         self.opf_file, self.cover_file = self.mw.opf_file, self.mw.cover_file
         self._recommendations = recs

From 48f202c7fd875bb4ccabeed6d7078e56607da142 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Tue, 1 Feb 2011 21:21:36 +0800
Subject: [PATCH 17/55] allow user applied styles to <hr> tags, updated
 comments/docs

---
 src/calibre/ebooks/conversion/utils.py | 19 +++++++++++++++++--
 src/calibre/manual/conversion.rst      | 13 +++++++++----
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index cf305f1022..21c6063f63 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -483,10 +483,23 @@ class HeuristicProcessor(object):
         return html
 
     def markup_user_break(self, replacement_break):
+        '''
+        Takes string a user supplies and wraps it in markup that will be centered with 
+        appropriate margins.  <hr> and <img> tags are allowed.  If the user specifies
+        a style with width attributes in the <hr> tag then the appropriate margins are
+        applied to wrapping divs.  This is because many ebook devices don't support margin:auto
+        All other html is converted to text.
+        '''
         hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em">'
         if re.findall('(<|>)', replacement_break):
             if re.match('^<hr', replacement_break):
-                scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
+                if replacement_break.find('width') != -1:
+                   width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
+                   divpercent = (100 - width) / 2
+                   hr_open = re.sub('45', str(divpercent), hr_open)
+                   scene_break = hr_open+replacement_break+'</div>'
+                else:
+                    scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
             elif re.match('^<img', replacement_break):
                 scene_break = self.scene_break_open+replacement_break+'</p>'
             else:
@@ -622,9 +635,11 @@ class HeuristicProcessor(object):
             blanks_count = len(self.any_multi_blank.findall(html))
             if blanks_count >= 1:
                 html = self.merge_blanks(html, blanks_count)
-            # Center separator lines, use a bit larger margin in this case
             scene_break_regex = self.line_open+'(?![\w\'\"])(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close
             scene_break = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
+            # If the user has enabled scene break replacement, then either softbreaks
+            # or 'hard' scene breaks are replaced, depending on which is in use
+            # Otherwise separator lines are centered, use a bit larger margin in this case
             replacement_break = getattr(self.extra_opts, 'replace_scene_breaks', None)
             if replacement_break is not None:
                 replacement_break = self.markup_user_break(replacement_break)
diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst
index 7f3ff21fe0..ecd8609ecc 100644
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@@ -311,10 +311,15 @@ remove all non-breaking-space entities, or may include false positive matches re
 
 :guilabel:`Ensure scene breaks are consistently formatted`
     With this option |app| will attempt to detect common scene-break markers and ensure that they are center aligned.  
-    It also attempts to detect scene breaks defined by white space and replace them with a horizontal rule 15% of the
-    page width.  Some readers may find this desirable as these 'soft' scene breaks often become page breaks on readers, and 
-    thus become difficult to distinguish.
+    'Soft' scene break markers, i.e. scene breaks only defined by extra white space, are styled to ensure that they 
+    will not be displayed in conjunction with page breaks.
 
+:guilabel:`Replace scene breaks`
+    If this option is configured then |app| will replace scene break markers it finds with the replacement text specified by the
+    user. In general you should avoid using html tags, |app| will discard any tags and use pre-defined markup.  <hr />
+    tags, i.e. horizontal rules, are an exception.  These can optionally be specified with styles, if you choose to add your own
+    style be sure to include the 'width' setting, otherwise the style information will be discarded.
+ 
 :guilabel:`Remove unnecessary hyphens`
     |app| will analyze all hyphenated content in the document when this option is enabled.  The document itself is used
     as a dictionary for analysis.  This allows |app| to accurately remove hyphens for any words in the document in any language, 
@@ -628,7 +633,7 @@ between 0 and 1. The default is 0.45, just under the median line length. Lower t
 text in the unwrapping. Increase to include less. You can adjust this value in the conversion settings under :guilabel:`PDF Input`.
 
 Also, they often have headers and footers as part of the document that will become included with the text.
-Use the options to remove headers and footers to mitigate this issue. If the headers and footers are not
+Use the Search and Replace panel to remove headers and footers to mitigate this issue. If the headers and footers are not
 removed from the text it can throw off the paragraph unwrapping. To learn how to use the header and footer removal options, read 
 :ref:`regexptutorial`.
 

From 72fe944b95bd3a6066b43b77a5b0ba9abb1685e8 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Tue, 1 Feb 2011 22:05:54 +0800
Subject: [PATCH 18/55] ...

---
 src/calibre/gui2/convert/single.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/calibre/gui2/convert/single.py b/src/calibre/gui2/convert/single.py
index 6540383229..59fcbb65ad 100644
--- a/src/calibre/gui2/convert/single.py
+++ b/src/calibre/gui2/convert/single.py
@@ -258,7 +258,6 @@ class Config(ResizableDialog, Ui_Dialog):
             if not w.pre_commit_check():
                 return
             x = w.commit(save_defaults=False)
-            print x
             recs.update(x)
         self.opf_file, self.cover_file = self.mw.opf_file, self.mw.cover_file
         self._recommendations = recs

From 97f0518585479d0001ee25395c540840bb2a1211 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Tue, 1 Feb 2011 14:14:03 +0000
Subject: [PATCH 19/55] Fix #8714: Problem sending thumbnails to Sony PRSx50 SD
 card

---
 src/calibre/devices/prs505/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/devices/prs505/__init__.py b/src/calibre/devices/prs505/__init__.py
index 48b7d98123..1a59cb81a6 100644
--- a/src/calibre/devices/prs505/__init__.py
+++ b/src/calibre/devices/prs505/__init__.py
@@ -8,5 +8,5 @@ CACHE_XML = 'Sony Reader/database/cache.xml'
 CACHE_EXT = 'Sony Reader/database/cacheExt.xml'
 
 MEDIA_THUMBNAIL = 'database/thumbnail'
-CACHE_THUMBNAIL = 'Sony Reader/database/thumbnail'
+CACHE_THUMBNAIL = 'Sony Reader/thumbnail'
 

From 31e66b8bc55d69016362ab965a7599963698d2e0 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Tue, 1 Feb 2011 14:15:04 +0000
Subject: [PATCH 20/55] 1) fix problem where new covers are not sent as
 thumbnails even if the options ask for it. 2) permit not respecting aspect
 ratio when generating cover thumbnails

---
 src/calibre/devices/interface.py     | 10 ++++++++++
 src/calibre/devices/prs505/driver.py | 20 +++++++++++++++++---
 src/calibre/gui2/device.py           | 26 ++++++++++++++++++++++++--
 src/calibre/utils/magick/draw.py     | 11 +++++++++--
 4 files changed, 60 insertions(+), 7 deletions(-)

diff --git a/src/calibre/devices/interface.py b/src/calibre/devices/interface.py
index 2a92f46e8d..bc442f5853 100644
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@@ -35,6 +35,16 @@ class DevicePlugin(Plugin):
 
     #: Height for thumbnails on the device
     THUMBNAIL_HEIGHT = 68
+    #: Width for thumbnails on the device. Setting this will force thumbnails
+    #: to this size, not preserving aspect ratio. If it is not set, then
+    #: the aspect ratio will be preserved and the thumbnail will be no higher
+    #: than THUMBNAIL_HEIGHT
+    # THUMBNAIL_WIDTH = 68
+
+    #: Set this to True if the device supports updating cover thumbnails during
+    #: sync_booklists. Setting it to true will ask device.py to refresh the
+    #: cover thumbnails during book matching
+    WANTS_UPDATED_THUMBNAILS = False
 
     #: Whether the metadata on books can be set via the GUI.
     CAN_SET_METADATA = ['title', 'authors', 'collections']
diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py
index 0f6668891a..4d3ac31540 100644
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@@ -81,12 +81,19 @@ class PRS505(USBMS):
                 _('Set this option to have separate book covers uploaded '
                   'every time you connect your device. Unset this option if '
                   'you have so many books on the reader that performance is '
-                  'unacceptable.')
+                  'unacceptable.'),
+            _('Preserve cover aspect ratio when building thumbnails') +
+                ':::' +
+                _('Set this option if you want the cover thumbnails to have '
+                  'the same aspect ratio (width to height) as the cover. '
+                  'Unset it if you want the thumbnail to be the maximum size, '
+                  'ignoring aspect ratio.')
     ]
     EXTRA_CUSTOMIZATION_DEFAULT = [
                 ', '.join(['series', 'tags']),
                 False,
-                False
+                False,
+                True
     ]
 
     OPT_COLLECTIONS    = 0
@@ -96,7 +103,7 @@ class PRS505(USBMS):
     plugboard = None
     plugboard_func = None
 
-    THUMBNAIL_HEIGHT = 200
+    THUMBNAIL_HEIGHT = 217
 
     MAX_PATH_LEN = 201 # 250 - (max(len(CACHE_THUMBNAIL), len(MEDIA_THUMBNAIL)) +
                        # len('main_thumbnail.jpg') + 1)
@@ -138,6 +145,13 @@ class PRS505(USBMS):
             if not write_cache(self._card_b_prefix):
                 self._card_b_prefix = None
         self.booklist_class.rebuild_collections = self.rebuild_collections
+        # Set the thumbnail width to the theoretical max if the user has asked
+        # that we do not preserve aspect ratio
+        if not self.settings().extra_customization[3]:
+            self.THUMBNAIL_WIDTH = 168
+        # Set CAN_UPDATE_THUMBNAILS if the user has asked that thumbnails be
+        # updated on every connect
+        self.WANTS_UPDATED_THUMBNAILS = self.settings().extra_customization[2]
 
     def get_device_information(self, end_session=True):
         return (self.gui_name, '', '', '')
diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index a5066a99ef..bf8c734089 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -871,6 +871,16 @@ class DeviceMixin(object): # {{{
             self.send_by_mail(to, fmts, delete)
 
     def cover_to_thumbnail(self, data):
+        if self.device_manager.device and \
+                hasattr(self.device_manager.device, 'THUMBNAIL_WIDTH'):
+            try:
+                return thumbnail(data,
+                                 self.device_manager.device.THUMBNAIL_WIDTH,
+                                 self.device_manager.device.THUMBNAIL_HEIGHT,
+                                 preserve_aspect_ratio=False)
+            except:
+                pass
+            return
         ht = self.device_manager.device.THUMBNAIL_HEIGHT \
                 if self.device_manager else DevicePlugin.THUMBNAIL_HEIGHT
         try:
@@ -1272,6 +1282,8 @@ class DeviceMixin(object): # {{{
             x = x.lower() if x else ''
             return string_pat.sub('', x)
 
+        update_metadata = prefs['manage_device_metadata'] == 'on_connect'
+
         # Force a reset if the caches are not initialized
         if reset or not hasattr(self, 'db_book_title_cache'):
             # Build a cache (map) of the library, so the search isn't On**2
@@ -1284,8 +1296,13 @@ class DeviceMixin(object): # {{{
             except:
                 return False
 
+            get_covers = False
+            if update_metadata and self.device_manager.is_device_connected:
+                if self.device_manager.device.WANTS_UPDATED_THUMBNAILS:
+                    get_covers = True
+
             for id in db.data.iterallids():
-                mi = db.get_metadata(id, index_is_id=True)
+                mi = db.get_metadata(id, index_is_id=True, get_cover=get_covers)
                 title = clean_string(mi.title)
                 if title not in db_book_title_cache:
                     db_book_title_cache[title] = \
@@ -1311,7 +1328,6 @@ class DeviceMixin(object): # {{{
         # the application_id to the db_id of the matching book. This value
         # will be used by books_on_device to indicate matches.
 
-        update_metadata = prefs['manage_device_metadata'] == 'on_connect'
         for booklist in booklists:
             for book in booklist:
                 book.in_library = None
@@ -1382,6 +1398,12 @@ class DeviceMixin(object): # {{{
 
         if update_metadata:
             if self.device_manager.is_device_connected:
+                if self.device_manager.device.CAN_UPDATE_THUMBNAILS:
+                    for blist in booklists:
+                        for book in blist:
+                            if book.cover and os.access(book.cover, os.R_OK):
+                                book.thumbnail = \
+                                    self.cover_to_thumbnail(open(book.cover, 'rb').read())
                 plugboards = self.library_view.model().db.prefs.get('plugboards', {})
                 self.device_manager.sync_booklists(
                                     Dispatcher(self.metadata_synced), booklists,
diff --git a/src/calibre/utils/magick/draw.py b/src/calibre/utils/magick/draw.py
index ad4b681b43..111f22cb5b 100644
--- a/src/calibre/utils/magick/draw.py
+++ b/src/calibre/utils/magick/draw.py
@@ -72,11 +72,18 @@ def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None,
                 f.write(data)
     return ret
 
-def thumbnail(data, width=120, height=120, bgcolor='#ffffff', fmt='jpg'):
+def thumbnail(data, width=120, height=120, bgcolor='#ffffff', fmt='jpg',
+              preserve_aspect_ratio=True):
     img = Image()
     img.load(data)
     owidth, oheight = img.size
-    scaled, nwidth, nheight = fit_image(owidth, oheight, width, height)
+    if not preserve_aspect_ratio:
+        scaled = owidth > width or oheight > height
+        nwidth = width
+        nheight = height
+    else:
+        scaled, nwidth, nheight = fit_image(owidth, oheight, width, height)
+    print 'in thumbnail', scaled, nwidth, nheight
     if scaled:
         img.size = (nwidth, nheight)
     canvas = create_canvas(img.size[0], img.size[1], bgcolor)

From 00210f4b7b397094ec0b77278a33c07ef98a9268 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Tue, 1 Feb 2011 15:16:44 +0000
Subject: [PATCH 21/55] Fix bugs I introduced when I renamed some interface
 attributes

---
 src/calibre/devices/prs505/driver.py | 2 +-
 src/calibre/gui2/device.py           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py
index 4d3ac31540..3768b8be62 100644
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@@ -149,7 +149,7 @@ class PRS505(USBMS):
         # that we do not preserve aspect ratio
         if not self.settings().extra_customization[3]:
             self.THUMBNAIL_WIDTH = 168
-        # Set CAN_UPDATE_THUMBNAILS if the user has asked that thumbnails be
+        # Set WANTS_UPDATED_THUMBNAILS if the user has asked that thumbnails be
         # updated on every connect
         self.WANTS_UPDATED_THUMBNAILS = self.settings().extra_customization[2]
 
diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index bf8c734089..ae38a8321b 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -1398,7 +1398,7 @@ class DeviceMixin(object): # {{{
 
         if update_metadata:
             if self.device_manager.is_device_connected:
-                if self.device_manager.device.CAN_UPDATE_THUMBNAILS:
+                if self.device_manager.device.WANTS_UPDATED_THUMBNAILS:
                     for blist in booklists:
                         for book in blist:
                             if book.cover and os.access(book.cover, os.R_OK):

From ab264422d302c0855b71dfeadbd307d93532f843 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Feb 2011 08:55:46 -0700
Subject: [PATCH 22/55] ...

---
 src/calibre/ebooks/metadata/sources/google.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index 1a3bf6d516..d9efb65ae0 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -65,7 +65,7 @@ def to_metadata(browser, log, entry_):
 
     mi = Metadata(title_, authors)
     try:
-        raw = browser.open(id_url).read()
+        raw = browser.open_novisit(id_url).read()
         feed = etree.fromstring(raw)
         extra = entry(feed)[0]
     except:
@@ -129,7 +129,7 @@ class Worker(Thread):
         for i in self.entries:
             try:
                 ans = to_metadata(self.browser, self.log, i)
-                if ans is not None:
+                if isinstance(ans, Metadata):
                     self.result_queue.put(ans)
             except:
                 self.log.exception(

From 022b639157583b6bd0e81b812dd5c2230cf0f09c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Feb 2011 09:27:34 -0700
Subject: [PATCH 23/55] ...

---
 src/calibre/gui2/device.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index ae38a8321b..48063b11a4 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -838,7 +838,8 @@ class DeviceMixin(object): # {{{
                             format_count[f] = 1
             for f in self.device_manager.device.settings().format_map:
                 if f in format_count.keys():
-                    formats.append((f, _('%i of %i Books') % (format_count[f], len(rows))), True if f in aval_out_formats else False)
+                    formats.append((f, _('%i of %i Books') % (format_count[f],
+                        len(rows)), True if f in aval_out_formats else False))
                 elif f in aval_out_formats:
                     formats.append((f, _('0 of %i Books') % len(rows)), True)
             d = ChooseFormatDeviceDialog(self, _('Choose format to send to device'), formats)

From 581fed66c972f2a4816f9385b330e39235d1ca4d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Feb 2011 09:28:01 -0700
Subject: [PATCH 24/55] ...

---
 src/calibre/gui2/device.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index 48063b11a4..8efa7f154c 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -841,7 +841,7 @@ class DeviceMixin(object): # {{{
                     formats.append((f, _('%i of %i Books') % (format_count[f],
                         len(rows)), True if f in aval_out_formats else False))
                 elif f in aval_out_formats:
-                    formats.append((f, _('0 of %i Books') % len(rows)), True)
+                    formats.append((f, _('0 of %i Books') % len(rows), True))
             d = ChooseFormatDeviceDialog(self, _('Choose format to send to device'), formats)
             if d.exec_() != QDialog.Accepted:
                 return

From ba09dbb2fde841a3efbf922a5481fe2b1883eae0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Feb 2011 09:30:01 -0700
Subject: [PATCH 25/55] ...

---
 src/calibre/manual/faq.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index 59f6a9b88d..18c53ade5d 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -391,7 +391,7 @@ Take your pick:
   * A tribute to the SONY Librie which was the first e-ink based e-book reader
   * My wife chose it ;-)
 
-|app| is pronounced as cal-i-ber *not* ca-libre. If you're wondering, |app| is the British/commonwealth spelling for caliber. Being Indian, that's the natural spelling for me. 
+|app| is pronounced as cal-i-ber *not* ca-li-bre. If you're wondering, |app| is the British/commonwealth spelling for caliber. Being Indian, that's the natural spelling for me. 
 
 Why does |app| show only some of my fonts on OS X?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From 5ec9e9ee1448a02362f8475666b4ebc541ec8919 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Feb 2011 09:31:33 -0700
Subject: [PATCH 26/55] Fix #8720 (WSJ Recipe Tab Formatting / Section Title)

---
 resources/recipes/wsj.recipe | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/recipes/wsj.recipe b/resources/recipes/wsj.recipe
index 4ce315200c..eb473f1121 100644
--- a/resources/recipes/wsj.recipe
+++ b/resources/recipes/wsj.recipe
@@ -35,7 +35,7 @@ class WallStreetJournal(BasicNewsRecipe):
 
     remove_tags_before = dict(name='h1')
     remove_tags = [
-                    dict(id=["articleTabs_tab_article", "articleTabs_tab_comments", "articleTabs_tab_interactive","articleTabs_tab_video","articleTabs_tab_map","articleTabs_tab_slideshow"]),
+                    dict(id=["articleTabs_tab_article", "articleTabs_tab_comments", "articleTabs_tab_interactive","articleTabs_tab_video","articleTabs_tab_map","articleTabs_tab_slideshow","articleTabs_tab_quotes","articleTabs_tab_document"]),
                     {'class':['footer_columns','network','insetCol3wide','interactive','video','slideshow','map','insettip','insetClose','more_in', "insetContent", 'articleTools_bottom', 'aTools', "tooltip", "adSummary", "nav-inline"]},
                     dict(rel='shortcut icon'),
                     ]
@@ -101,7 +101,7 @@ class WallStreetJournal(BasicNewsRecipe):
                title = 'Front Section'
                url = 'http://online.wsj.com' + a['href']
                feeds = self.wsj_add_feed(feeds,title,url)
-               title = 'What''s News'
+               title = "What's News"
                url = url.replace('pageone','whatsnews')
                feeds = self.wsj_add_feed(feeds,title,url)
             else:

From 3d3dcb39159842c7a9335f7678912477dec83864 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Tue, 1 Feb 2011 16:42:26 +0000
Subject: [PATCH 27/55] Fix for #7883: 'title_sort' field in save to disk
 template empty in v 7.33

---
 src/calibre/ebooks/metadata/opf2.py | 30 ++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py
index 62d57f2251..456bfb0ea6 100644
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@@ -780,22 +780,30 @@ class OPF(object): # {{{
     def title_sort(self):
 
         def fget(self):
-            matches = self.title_path(self.metadata)
+            matches = self.root.xpath('//*[name() = "meta" and starts-with(@name,'
+                                      '"calibre:title_sort") and @content]')
             if matches:
-                for match in matches:
-                    ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
-                    if not ans:
-                        ans = match.get('file-as', None)
-                    if ans:
-                        return ans
+                for elem in matches:
+                    return self.get_text(elem)
+            return None
 
         def fset(self, val):
+            print 'here'
+            matches = self.root.xpath('//*[name() = "meta" and starts-with(@name,'
+                                      '"calibre:title_sort") and @content]')
+            if matches:
+                for elem in matches:
+                    elem.getparent().remove(elem)
             matches = self.title_path(self.metadata)
             if matches:
-                for key in matches[0].attrib:
-                    if key.endswith('file-as'):
-                        matches[0].attrib.pop(key)
-                matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], unicode(val))
+                for elem in matches:
+                    parent = elem.getparent()
+                    attrib = {}
+                    attrib['name'] = 'calibre:title_sort'
+                    attrib['content'] = val
+                    e = elem.makeelement('meta', attrib=attrib)
+                    e.tail = '\n'+(' '*8)
+                    parent.append(elem)
 
         return property(fget=fget, fset=fset)
 

From 393a876d3a3d0fe7a9466fff6f6b4304d0cc4fa0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Feb 2011 10:23:42 -0700
Subject: [PATCH 28/55] Add a note about the removed header and footer options
 to the structure detection panel.

---
 src/calibre/gui2/convert/structure_detection.ui | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/convert/structure_detection.ui b/src/calibre/gui2/convert/structure_detection.ui
index ef0677a67c..f80e6f8182 100644
--- a/src/calibre/gui2/convert/structure_detection.ui
+++ b/src/calibre/gui2/convert/structure_detection.ui
@@ -48,10 +48,10 @@
      </property>
     </widget>
    </item>
-   <item row="5" column="0" colspan="3">
+   <item row="6" column="0" colspan="3">
     <widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
    </item>
-   <item row="6" column="0" colspan="3">
+   <item row="7" column="0" colspan="3">
     <spacer name="verticalSpacer">
      <property name="orientation">
       <enum>Qt::Vertical</enum>
@@ -77,6 +77,16 @@
      </property>
     </spacer>
    </item>
+   <item row="4" column="0" colspan="3">
+    <widget class="QLabel" name="label_2">
+     <property name="text">
+      <string>The header and footer removal options have been replaced by the Search &amp; Replace options. Click the Search &amp; Replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field.</string>
+     </property>
+     <property name="wordWrap">
+      <bool>true</bool>
+     </property>
+    </widget>
+   </item>
   </layout>
  </widget>
  <customwidgets>

From 9acd6d6189cdb2f008ea519fb5b0709db93c4601 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Tue, 1 Feb 2011 17:28:55 +0000
Subject: [PATCH 29/55] Put file-as back into OPF title_sort.get as a fall back

---
 src/calibre/ebooks/metadata/opf2.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py
index 456bfb0ea6..a721c5cb2f 100644
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@@ -780,15 +780,24 @@ class OPF(object): # {{{
     def title_sort(self):
 
         def fget(self):
+            #first try the title_sort meta tag
             matches = self.root.xpath('//*[name() = "meta" and starts-with(@name,'
                                       '"calibre:title_sort") and @content]')
             if matches:
                 for elem in matches:
                     return self.get_text(elem)
+            # fallback to file-as
+            matches = self.title_path(self.metadata)
+            if matches:
+                for match in matches:
+                    ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
+                    if not ans:
+                        ans = match.get('file-as', None)
+                    if ans:
+                        return ans
             return None
 
         def fset(self, val):
-            print 'here'
             matches = self.root.xpath('//*[name() = "meta" and starts-with(@name,'
                                       '"calibre:title_sort") and @content]')
             if matches:

From 1bcef905630d3373afdc05ee11a75847eba8c699 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Feb 2011 13:08:33 -0700
Subject: [PATCH 30/55] MOBI Output: Use the book uuid as the ASIN field and
 set cdetype to EBOK to allow Amazon furthest read tracking to work with
 calibre generated MOBI files. Fixes #8721 (Please add support for Mobi EXTH
 metadata data in fields 113 and 501)

---
 src/calibre/ebooks/mobi/reader.py |  2 ++
 src/calibre/ebooks/mobi/writer.py | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index 0ae3c9ac9d..9576ccb637 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -103,6 +103,8 @@ class EXTHHeader(object):
                 pass
         elif id == 108:
             pass # Producer
+        elif id == 113:
+            pass # ASIN or UUID
         #else:
         #    print 'unhandled metadata record', id, repr(content)
 
diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py
index 2a71ecd43b..abba173d69 100644
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@@ -1547,6 +1547,31 @@ class MobiWriter(object):
                     rights = 'Unknown'
                 exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
                 exth.write(rights)
+                nrecs += 1
+
+        # Write UUID as ASIN
+        uuid = None
+        from calibre.ebooks.oeb.base import OPF
+        for x in oeb.metadata['identifier']:
+            if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
+                uuid = unicode(x).split(':')[-1]
+                break
+        if uuid is None:
+            from uuid import uuid4
+            uuid = str(uuid4())
+
+        if isinstance(uuid, unicode):
+            uuid = uuid.encode('utf-8')
+        exth.write(pack('>II', 113, len(uuid) + 8))
+        exth.write(uuid)
+        nrecs += 1
+
+        # Write cdetype
+        if not self.opts.mobi_periodical:
+            data = 'EBOK'
+            exth.write(pack('>II', 501, len(data)+8))
+            exth.write(data)
+            nrecs += 1
 
         # Add a publication date entry
         if oeb.metadata['date'] != [] :

From 72b93454506717a9280bbd16966fd701724239ff Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Feb 2011 13:24:38 -0700
Subject: [PATCH 31/55] Fix mimetype sent by content server for PDB files

---
 resources/mime.types | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/mime.types b/resources/mime.types
index ab98b3bf4a..a2a67c38f9 100644
--- a/resources/mime.types
+++ b/resources/mime.types
@@ -585,7 +585,6 @@ application/vnd.osa.netdeploy
 application/vnd.osgi.bundle				
 application/vnd.osgi.dp				dp
 application/vnd.otps.ct-kip+xml				
-application/vnd.palm				oprc pdb pqa
 application/vnd.paos.xml				
 application/vnd.pg.format				str
 application/vnd.pg.osasli				ei6
@@ -1082,7 +1081,6 @@ chemical/x-ncbi-asn1				asn
 chemical/x-ncbi-asn1-ascii				ent prt
 chemical/x-ncbi-asn1-binary				aso val
 chemical/x-ncbi-asn1-spec				asn
-chemical/x-pdb				ent pdb
 chemical/x-rosdal				ros
 chemical/x-swissprot				sw
 chemical/x-vamas-iso14976				vms
@@ -1379,3 +1377,5 @@ application/x-cbr                          cbr
 application/x-cb7                          cb7
 application/x-koboreader-ebook             kobo
 image/wmf                                  wmf
+application/ereader                        pdb
+

From 99131cc2e0620ac345b8937e262d2c17146191cf Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Feb 2011 13:25:59 -0700
Subject: [PATCH 32/55] Fix #8620 (the cailibre delete my ebook)

---
 src/calibre/library/database2.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 3fc16e99b4..bfe54df36e 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -430,8 +430,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         authors = self.authors(id, index_is_id=True)
         if not authors:
             authors = _('Unknown')
-        author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
-        title  = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
+        author = ascii_filename(authors.split(',')[0])[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace')
+        title  = ascii_filename(self.title(id, index_is_id=True))[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace')
         path   = author + '/' + title + ' (%d)'%id
         return path
 
@@ -442,8 +442,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         authors = self.authors(id, index_is_id=True)
         if not authors:
             authors = _('Unknown')
-        author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
-        title  = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
+        author = ascii_filename(authors.split(',')[0])[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace')
+        title  = ascii_filename(self.title(id, index_is_id=True))[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace')
         name   = title + ' - ' + author
         while name.endswith('.'):
             name = name[:-1]

From 81bf07ddb4b81f5150198fb2f894225b22f31a06 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Feb 2011 14:46:42 -0700
Subject: [PATCH 33/55] Clarify the help texts for runnning the GUI in debug
 mode with calibre-debug

---
 src/calibre/debug.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/calibre/debug.py b/src/calibre/debug.py
index e1c3e1809e..3a080fc57b 100644
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@@ -22,13 +22,15 @@ Run an embedded python interpreter.
     parser.add_option('-d', '--debug-device-driver', default=False, action='store_true',
                       help='Debug the specified device driver.')
     parser.add_option('-g', '--gui',  default=False, action='store_true',
-                      help='Run the GUI',)
+                      help='Run the GUI with debugging enabled. Debug output is '
+                      'printed to stdout and stderr.')
     parser.add_option('--gui-debug',  default=None,
                       help='Run the GUI with a debug console, logging to the'
-                      ' specified path',)
+                      ' specified path. For internal use only, use the -g'
+                      ' option to run the GUI in debug mode',)
     parser.add_option('--show-gui-debug',  default=None,
-                      help='Display the specified log file.',)
-
+                      help='Display the specified log file. For internal use'
+                      ' only.',)
     parser.add_option('-w', '--viewer',  default=False, action='store_true',
                       help='Run the ebook viewer',)
     parser.add_option('--paths', default=False, action='store_true',

From 3e895675e29d132acdc5cf1f065d8a44c4852945 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Feb 2011 14:55:08 -0700
Subject: [PATCH 34/55] ...

---
 src/calibre/gui2/email.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/email.py b/src/calibre/gui2/email.py
index 6b2ed81413..426747e044 100644
--- a/src/calibre/gui2/email.py
+++ b/src/calibre/gui2/email.py
@@ -264,8 +264,9 @@ class EmailMixin(object): # {{{
         if _auto_ids != []:
             for id in _auto_ids:
                 if specific_format == None:
-                    formats = [f.lower() for f in self.library_view.model().db.formats(id, index_is_id=True).split(',')]
-                    formats = formats if formats != None else []
+                    dbfmts = self.library_view.model().db.formats(id, index_is_id=True)
+                    formats = [f.lower() for f in (dbfmts.split(',') if fmts else
+                        [])]
                     if list(set(formats).intersection(available_input_formats())) != [] and list(set(fmts).intersection(available_output_formats())) != []:
                         auto.append(id)
                     else:

From d51efa910450295cf052048a80d9ccccf63d42b7 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 1 Feb 2011 18:07:43 -0500
Subject: [PATCH 35/55] ...

---
 src/calibre/ebooks/txt/txtml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index bf33e5540a..660fd9d38a 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -218,7 +218,7 @@ class TXTMLizer(object):
 
         if tag in SPACE_TAGS:
             text.append(u' ')
-            
+
         # Scene breaks.
         if tag == 'hr':
             text.append('\n\n* * *\n\n')

From 8fc4e9a49055a08d8b9e7dbd81aad978b9435044 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 1 Feb 2011 18:25:02 -0500
Subject: [PATCH 36/55] Heuristics: Have restore defaults restore correclty.
 Add new default options.

---
 src/calibre/gui2/convert/heuristics.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/calibre/gui2/convert/heuristics.py b/src/calibre/gui2/convert/heuristics.py
index 8ca4cab455..18a6b8b5fe 100644
--- a/src/calibre/gui2/convert/heuristics.py
+++ b/src/calibre/gui2/convert/heuristics.py
@@ -27,7 +27,7 @@ class HeuristicsWidget(Widget, Ui_Form):
                  'dehyphenate', 'renumber_headings']
                 )
         self.db, self.book_id = db, book_id
-        self.rssb_defaults = ['', '<hr />', '* * *']
+        self.rssb_defaults = [u'', u'<hr />', u'* * *', u'• • •', u'✦ ✦ ✦', u'✮ ✮ ✮', 'u☆ ☆ ☆', u'❂ ❂ ❂', u'✣ ✣ ✣', u'❖ ❖ ❖', u'☼ ☼ ☼', u'✠ ✠ ✠']
         self.initialize_options(get_option, get_help, db, book_id)
 
         self.load_histories()
@@ -40,11 +40,13 @@ class HeuristicsWidget(Widget, Ui_Form):
     def restore_defaults(self, get_option):
         Widget.restore_defaults(self, get_option)
         
+        self.save_histories()
         rssb_hist = gprefs['replace_scene_breaks_history']
         for x in self.rssb_defaults:
             if x in rssb_hist:
                 del rssb_hist[rssb_hist.index(x)]
         gprefs['replace_scene_breaks_history'] = self.rssb_defaults + gprefs['replace_scene_breaks_history']
+        self.load_histories()
 
     def commit_options(self, save_defaults=False):
         self.save_histories()
@@ -69,6 +71,9 @@ class HeuristicsWidget(Widget, Ui_Form):
             return True
 
     def load_histories(self):
+        self.opt_replace_scene_breaks.clear()
+        self.opt_replace_scene_breaks.lineEdit().setText('')
+        
         val = unicode(self.opt_replace_scene_breaks.currentText())
         rssb_hist = gprefs.get('replace_scene_breaks_history', self.rssb_defaults)
         if val in rssb_hist:

From 7355acd7af2cca4e05b2c88e1c20a8e74fd7c96a Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 1 Feb 2011 18:26:41 -0500
Subject: [PATCH 37/55] Set replace_scene_breaks cmd default to match GUI
 default.

---
 src/calibre/ebooks/conversion/plumber.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 59d7a0ed2a..a4708d398c 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -532,7 +532,7 @@ OptionRecommendation(name='format_scene_breaks',
            'horizontal rules.')),
            
 OptionRecommendation(name='replace_scene_breaks',
-    recommended_value=None, level=OptionRecommendation.LOW,
+    recommended_value='', level=OptionRecommendation.LOW,
     help=_('Replace scene breaks with the specified text.')),
 
 OptionRecommendation(name='dehyphenate',

From 447d1a66d5fdc1ffed5343539d9c57406a4092f1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Feb 2011 21:09:32 -0700
Subject: [PATCH 38/55] ...

---
 resources/recipes/wsj_free.recipe | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/resources/recipes/wsj_free.recipe b/resources/recipes/wsj_free.recipe
index df8234e8e2..a4a957fc90 100644
--- a/resources/recipes/wsj_free.recipe
+++ b/resources/recipes/wsj_free.recipe
@@ -10,7 +10,10 @@ class WallStreetJournal(BasicNewsRecipe):
 
     title = 'Wall Street Journal (free)'
     __author__ = 'Kovid Goyal, Sujata Raman, Joshua Oster-Morris, Starson17'
-    description = 'News and current affairs'
+    description = '''News and current affairs. This recipe only fetches complete
+    versions of the articles that are available free on the wsj.com website.
+    To get the rest of the articles, subscribe to the WSJ and use the other WSJ
+    recipe.'''
     language = 'en'
     cover_url           = 'http://dealbreaker.com/images/thumbs/Wall%20Street%20Journal%20A1.JPG'
     max_articles_per_feed = 1000
@@ -151,6 +154,4 @@ class WallStreetJournal(BasicNewsRecipe):
 
         return articles
 
-    def cleanup(self):
-        self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com')
 

From 8627ec9683548dff8414dcf5d6d7ee35959bfd0c Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Wed, 2 Feb 2011 14:09:25 +0800
Subject: [PATCH 39/55] improve new docs, user specfied width handled
 correctly, default scene break markers work across Kindle and ADE based
 devices

---
 src/calibre/ebooks/conversion/utils.py |  5 +++--
 src/calibre/gui2/convert/heuristics.py |  4 ++--
 src/calibre/manual/conversion.rst      | 16 +++++++++++++---
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 8a339afe4c..16ef4c86e2 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -490,11 +490,12 @@ class HeuristicProcessor(object):
         applied to wrapping divs.  This is because many ebook devices don't support margin:auto
         All other html is converted to text.
         '''
-        hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em">'
+        hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em; page-break-before:avoid">'
         if re.findall('(<|>)', replacement_break):
             if re.match('^<hr', replacement_break):
                 if replacement_break.find('width') != -1:
                    width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
+                   replacement_break = re.sub('(?i)(width=\d+\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break)
                    divpercent = (100 - width) / 2
                    hr_open = re.sub('45', str(divpercent), hr_open)
                    scene_break = hr_open+replacement_break+'</div>'
@@ -642,7 +643,7 @@ class HeuristicProcessor(object):
             # or 'hard' scene breaks are replaced, depending on which is in use
             # Otherwise separator lines are centered, use a bit larger margin in this case
             replacement_break = getattr(self.extra_opts, 'replace_scene_breaks', None)
-            if replacement_break is not None:
+            if replacement_break != '':
                 replacement_break = self.markup_user_break(replacement_break)
                 if len(scene_break.findall(html)) >= 1:
                     html = scene_break.sub(replacement_break, html)
diff --git a/src/calibre/gui2/convert/heuristics.py b/src/calibre/gui2/convert/heuristics.py
index 77fadf059c..5e7e4aa506 100644
--- a/src/calibre/gui2/convert/heuristics.py
+++ b/src/calibre/gui2/convert/heuristics.py
@@ -27,8 +27,8 @@ class HeuristicsWidget(Widget, Ui_Form):
                  'dehyphenate', 'renumber_headings']
                 )
         self.db, self.book_id = db, book_id
-        self.rssb_defaults = [u'', u'<hr />', u'* * *', u'• • •', u'✦ ✦ ✦',
-                u'✮ ✮ ✮', u'☆ ☆ ☆', u'❂ ❂ ❂', u'✣ ✣ ✣', u'❖ ❖ ❖', u'☼ ☼ ☼', u'✠ ✠ ✠']
+        self.rssb_defaults = [u'', u'<hr />', u'∗ ∗ ∗', u'• • •', u'♦ ♦ ♦',
+                u'† †', u'‡ ‡ ‡', u'∞ ∞ ∞', u'¤ ¤ ¤', u'§']
         self.initialize_options(get_option, get_help, db, book_id)
 
         self.load_histories()
diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst
index ecd8609ecc..60f8a10fc6 100644
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@@ -316,9 +316,19 @@ remove all non-breaking-space entities, or may include false positive matches re
 
 :guilabel:`Replace scene breaks`
     If this option is configured then |app| will replace scene break markers it finds with the replacement text specified by the
-    user. In general you should avoid using html tags, |app| will discard any tags and use pre-defined markup.  <hr />
-    tags, i.e. horizontal rules, are an exception.  These can optionally be specified with styles, if you choose to add your own
-    style be sure to include the 'width' setting, otherwise the style information will be discarded.
+    user.  Please note that some ornamental characters may not be supported across all reading devices.
+    
+    In general you should avoid using html tags, |app| will discard any tags and use pre-defined markup.  <hr />
+    tags, i.e. horizontal rules, and <img> tags are exceptions.  Horizontal rules can optionally be specified with styles, if you 
+    choose to add your own style be sure to include the 'width' setting, otherwise the style information will be discarded.  Image 
+    tags can used, but |app| does not provide the ability to add the image during conversion, this must be done after the fact using 
+    the 'Tweak Epub' feature, or Sigil.
+        
+        Example image tag (place the image within an 'Images' folder inside the epub after conversion):
+            <img style="width:10%" src="../Images/scenebreak.png" />
+        
+        Example horizontal rule with styles:
+            <hr style="width:20%;padding-top: 1px;border-top: 2px ridge black;border-bottom: 2px groove black;"/>
  
 :guilabel:`Remove unnecessary hyphens`
     |app| will analyze all hyphenated content in the document when this option is enabled.  The document itself is used

From aa7ebf0ac38912890a70a7b972ff15a8490c4c38 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 00:13:57 -0700
Subject: [PATCH 40/55] Nicer completion widgets for author/tags controls

---
 src/calibre/gui2/complete.py               | 354 +++++++++++++++++++++
 src/calibre/gui2/metadata/basic_widgets.py |  16 +-
 2 files changed, 362 insertions(+), 8 deletions(-)
 create mode 100644 src/calibre/gui2/complete.py

diff --git a/src/calibre/gui2/complete.py b/src/calibre/gui2/complete.py
new file mode 100644
index 0000000000..5c5a836d98
--- /dev/null
+++ b/src/calibre/gui2/complete.py
@@ -0,0 +1,354 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from PyQt4.Qt import QLineEdit, QListView, QAbstractListModel, Qt, QTimer, \
+        QApplication, QPoint, QItemDelegate, QStyleOptionViewItem, \
+        QStyle, QEvent, pyqtSignal
+
+from calibre.utils.icu import sort_key, lower
+from calibre.gui2 import NONE
+from calibre.gui2.widgets import EnComboBox
+
+class CompleterItemDelegate(QItemDelegate): # {{{
+
+    ''' Renders the current item as thought it were selected '''
+
+    def __init__(self, view):
+        self.view = view
+        QItemDelegate.__init__(self, view)
+
+    def paint(self, p, opt, idx):
+        opt = QStyleOptionViewItem(opt)
+        opt.showDecorationSelected = True
+        if self.view.currentIndex() == idx:
+            opt.state |= QStyle.State_HasFocus
+        QItemDelegate.paint(self, p, opt, idx)
+
+# }}}
+
+class CompleteWindow(QListView): # {{{
+
+    '''
+    The completion popup. For keyboard and mouse handling see
+    :meth:`eventFilter`.
+    '''
+
+    #: This signal is emitted when the user selects one of the listed
+    #: completions, by mouse or keyboard
+    completion_selected = pyqtSignal(object)
+
+    def __init__(self, widget, model):
+        self.widget = widget
+        QListView.__init__(self)
+        self.setVisible(False)
+        self.setParent(None, Qt.Popup)
+        self.setAlternatingRowColors(True)
+        self.setFocusPolicy(Qt.NoFocus)
+        self._d = CompleterItemDelegate(self)
+        self.setItemDelegate(self._d)
+        self.setModel(model)
+        self.setFocusProxy(widget)
+        self.installEventFilter(self)
+        self.clicked.connect(self.do_selected)
+        self.entered.connect(self.do_entered)
+        self.setMouseTracking(True)
+
+    def do_entered(self, idx):
+        if idx.isValid():
+            self.setCurrentIndex(idx)
+
+    def do_selected(self, idx=None):
+        idx = self.currentIndex() if idx is None else idx
+        if not idx.isValid() and self.model().rowCount() > 0:
+            idx = self.model().index(0)
+        if idx.isValid():
+            data = unicode(self.model().data(idx, Qt.DisplayRole))
+            self.completion_selected.emit(data)
+        self.hide()
+
+    def eventFilter(self, o, e):
+        if o is not self:
+            return False
+        if e.type() == e.KeyPress:
+            key = e.key()
+            if key in (Qt.Key_Escape, Qt.Key_Backtab) or \
+                    (key == Qt.Key_F4 and (e.modifiers() & Qt.AltModifier)):
+                self.hide()
+                return True
+            elif key in (Qt.Key_Enter, Qt.Key_Return, Qt.Key_Tab):
+                self.do_selected()
+                return True
+            elif key in (Qt.Key_Up, Qt.Key_Down, Qt.Key_PageUp,
+                    Qt.Key_PageDown):
+                return False
+            # Send key event to associated line edit
+            self.widget.eat_focus_out = False
+            try:
+                self.widget.event(e)
+            finally:
+                self.widget.eat_focus_out = True
+            if not self.widget.hasFocus():
+                # Line edit lost focus
+                self.hide()
+            if e.isAccepted():
+                # Line edit consumed event
+                return True
+        elif e.type() == e.MouseButtonPress:
+            # Hide popup if user clicks outside it, otherwise
+            # pass event to popup
+            if not self.underMouse():
+                self.hide()
+                return True
+        elif e.type() in (e.InputMethod, e.ShortcutOverride):
+            QApplication.sendEvent(self.widget, e)
+
+        return False # Do not filter this event
+
+# }}}
+
+class CompleteModel(QAbstractListModel):
+
+    def __init__(self, parent=None):
+        QAbstractListModel.__init__(self, parent)
+        self.sep = ','
+        self.space_before_sep = False
+        self.items = []
+        self.lowered_items = []
+        self.matches = []
+
+    def set_items(self, items):
+        items = [unicode(x.strip()) for x in items]
+        self.items = list(sorted(items, key=lambda x: sort_key(x)))
+        self.lowered_items = [lower(x) for x in self.items]
+        self.matches = []
+        self.reset()
+
+    def rowCount(self, *args):
+        return len(self.matches)
+
+    def data(self, index, role):
+        if role == Qt.DisplayRole:
+            r = index.row()
+            try:
+                return self.matches[r]
+            except IndexError:
+                pass
+        return NONE
+
+    def get_matches(self, prefix):
+        '''
+        Return all matches that (case insensitively) start with prefix
+        '''
+        prefix = lower(prefix)
+        ans = []
+        if prefix:
+            for i, test in enumerate(self.lowered_items):
+                if test.startswith(prefix):
+                    ans.append(self.items[i])
+        return ans
+
+    def update_matches(self, matches):
+        self.matches = matches
+        self.reset()
+
+class MultiCompleteLineEdit(QLineEdit):
+    '''
+    A line edit that completes on multiple items separated by a
+    separator. Use the :meth:`update_items_cache` to set the list of
+    all possible completions. Separator can be controlled with the
+    :meth:`set_separator` and :meth:`set_space_before_sep` methods.
+    '''
+
+    def __init__(self, parent=None):
+        self.eat_focus_out = True
+        self.max_visible_items = 7
+        self.current_prefix = None
+        QLineEdit.__init__(self, parent)
+
+        self._model = CompleteModel(parent=self)
+        self.complete_window = CompleteWindow(self, self._model)
+        self.textChanged.connect(self.text_changed)
+        self.cursorPositionChanged.connect(self.cursor_position_changed)
+        self.complete_window.completion_selected.connect(self.completion_selected)
+
+    # Interface {{{
+    def update_items_cache(self, complete_items):
+        self.all_items = complete_items
+
+    def set_separator(self, sep):
+        self.sep = sep
+
+    def set_space_before_sep(self, space_before):
+        self.space_before_sep = space_before
+
+    # }}}
+
+    def eventFilter(self, o, e):
+        if self.eat_focus_out and o is self and e.type() == QEvent.FocusOut:
+            if self.complete_window.isVisible():
+                return True # Filter this event since the cw is visible
+        return QLineEdit.eventFilter(self, o, e)
+
+
+    def text_changed(self, *args):
+        self.update_completions()
+
+    def cursor_position_changed(self, *args):
+        self.update_completions()
+
+    def update_completions(self):
+        ' Update the list of completions '
+        cpos = self.cursorPosition()
+        text = unicode(self.text())
+        prefix = text[:cpos]
+        self.current_prefix = prefix
+        complete_prefix = prefix.lstrip()
+        if self.sep:
+            complete_prefix = prefix = prefix.split(self.sep)[-1].lstrip()
+
+        matches = self._model.get_matches(complete_prefix)
+        self.update_complete_window(matches)
+
+    def get_completed_text(self, text):
+        '''
+        Get completed text from current cursor position and the completion
+        text
+        '''
+        if self.sep is None:
+            return text
+        else:
+            cursor_pos = self.cursorPosition()
+            before_text = unicode(self.text())[:cursor_pos]
+            after_text = unicode(self.text())[cursor_pos:]
+            after_parts = after_text.split(self.sep)
+            if len(after_parts) < 3 and not after_parts[-1].strip():
+                after_text = u''
+            prefix_len = len(before_text.split(self.sep)[-1].lstrip())
+            if self.space_before_sep:
+                complete_text_pat = '%s%s %s %s'
+                len_extra = 3
+            else:
+                complete_text_pat = '%s%s%s %s'
+                len_extra = 2
+            return prefix_len, len_extra, complete_text_pat % (
+                before_text[:cursor_pos - prefix_len], text, self.sep, after_text)
+
+    def completion_selected(self, text):
+        prefix_len, len_extra, ctext = self.get_completed_text(text)
+        if self.sep is None:
+            self.setText(ctext)
+            self.setCursorPosition(len(ctext))
+        else:
+            cursor_pos = self.cursorPosition()
+            self.setText(ctext)
+            self.setCursorPosition(cursor_pos - prefix_len + len(text) + len_extra)
+
+    def update_complete_window(self, matches):
+        self._model.update_matches(matches)
+        if matches:
+            self.show_complete_window()
+        else:
+            self.complete_window.hide()
+
+
+    def position_complete_window(self):
+        popup = self.complete_window
+        screen = QApplication.desktop().availableGeometry(self)
+        h = (popup.sizeHintForRow(0) * min(self.max_visible_items,
+            popup.model().rowCount()) + 3) + 3
+        hsb = popup.horizontalScrollBar()
+        if hsb and hsb.isVisible():
+            h += hsb.sizeHint().height()
+
+        rh = self.height()
+        pos = self.mapToGlobal(QPoint(0, self.height() - 2))
+        w = self.width()
+
+        if w > screen.width():
+            w = screen.width()
+        if (pos.x() + w) > (screen.x() + screen.width()):
+            pos.setX(screen.x() + screen.width() - w)
+        if (pos.x() < screen.x()):
+            pos.setX(screen.x())
+
+        top = pos.y() - rh - screen.top() + 2
+        bottom = screen.bottom() - pos.y()
+        h = max(h, popup.minimumHeight())
+        if h > bottom:
+            h = min(max(top, bottom), h)
+            if top > bottom:
+                pos.setY(pos.y() - h - rh + 2)
+
+        popup.setGeometry(pos.x(), pos.y(), w, h)
+
+
+    def show_complete_window(self):
+        self.position_complete_window()
+        self.complete_window.show()
+
+    def moveEvent(self, ev):
+        ret = QLineEdit.moveEvent(self, ev)
+        QTimer.singleShot(0, self.position_complete_window)
+        return ret
+
+    def resizeEvent(self, ev):
+        ret = QLineEdit.resizeEvent(self, ev)
+        QTimer.singleShot(0, self.position_complete_window)
+        return ret
+
+
+    @dynamic_property
+    def all_items(self):
+        def fget(self):
+            return self._model.items
+        def fset(self, items):
+            self._model.set_items(items)
+        return property(fget=fget, fset=fset)
+
+    @dynamic_property
+    def sep(self):
+        def fget(self):
+            return self._model.sep
+        def fset(self, val):
+            self._model.sep = val
+        return property(fget=fget, fset=fset)
+
+    @dynamic_property
+    def space_before_sep(self):
+        def fget(self):
+            return self._model.space_before_sep
+        def fset(self, val):
+            self._model.space_before_sep = val
+        return property(fget=fget, fset=fset)
+
+class MultiCompleteComboBox(EnComboBox):
+
+    def __init__(self, *args):
+        EnComboBox.__init__(self, *args)
+        self.setLineEdit(MultiCompleteLineEdit(self))
+
+    def update_items_cache(self, complete_items):
+        self.lineEdit().update_items_cache(complete_items)
+
+    def set_separator(self, sep):
+        self.lineEdit().set_separator(sep)
+
+    def set_space_before_sep(self, space_before):
+        self.lineEdit().set_space_before_sep(space_before)
+
+
+
+if __name__ == '__main__':
+    from PyQt4.Qt import QDialog, QVBoxLayout
+    app = QApplication([])
+    d = QDialog()
+    d.setLayout(QVBoxLayout())
+    le = MultiCompleteLineEdit(d)
+    d.layout().addWidget(le)
+    le.all_items = ['one', 'otwo', 'othree', 'ooone', 'ootwo', 'oothree']
+    d.exec_()
diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index d3fa5958ab..8ec037278e 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -12,8 +12,8 @@ from PyQt4.Qt import Qt, QDateEdit, QDate, \
     QDoubleSpinBox, QListWidgetItem, QSize, QPixmap, \
     QPushButton, QSpinBox, QLineEdit
 
-from calibre.gui2.widgets import EnLineEdit, CompleteComboBox, \
-        EnComboBox, FormatList, ImageView, CompleteLineEdit
+from calibre.gui2.widgets import EnLineEdit, EnComboBox, FormatList, ImageView
+from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
 from calibre.utils.icu import sort_key
 from calibre.utils.config import tweaks, prefs
 from calibre.ebooks.metadata import title_sort, authors_to_string, \
@@ -149,14 +149,14 @@ class TitleSortEdit(TitleEdit):
 # }}}
 
 # Authors {{{
-class AuthorsEdit(CompleteComboBox):
+class AuthorsEdit(MultiCompleteComboBox):
 
     TOOLTIP = ''
     LABEL = _('&Author(s):')
 
     def __init__(self, parent):
         self.dialog = parent
-        CompleteComboBox.__init__(self, parent)
+        MultiCompleteComboBox.__init__(self, parent)
         self.setToolTip(self.TOOLTIP)
         self.setWhatsThis(self.TOOLTIP)
         self.setEditable(True)
@@ -814,14 +814,14 @@ class RatingEdit(QSpinBox): # {{{
 
 # }}}
 
-class TagsEdit(CompleteLineEdit): # {{{
+class TagsEdit(MultiCompleteLineEdit): # {{{
     LABEL = _('Ta&gs:')
     TOOLTIP = '<p>'+_('Tags categorize the book. This is particularly '
             'useful while searching. <br><br>They can be any words'
             'or phrases, separated by commas.')
 
     def __init__(self, parent):
-        CompleteLineEdit.__init__(self, parent)
+        MultiCompleteLineEdit.__init__(self, parent)
         self.setToolTip(self.TOOLTIP)
         self.setWhatsThis(self.TOOLTIP)
 
@@ -839,7 +839,7 @@ class TagsEdit(CompleteLineEdit): # {{{
         tags = db.tags(id_, index_is_id=True)
         tags = tags.split(',') if tags else []
         self.current_val = tags
-        self.update_items_cache(db.all_tags())
+        self.all_items = db.all_tags()
         self.original_val = self.current_val
 
     @property
@@ -860,7 +860,7 @@ class TagsEdit(CompleteLineEdit): # {{{
         d = TagEditor(self, db, id_)
         if d.exec_() == TagEditor.Accepted:
             self.current_val = d.tags
-            self.update_items_cache(db.all_tags())
+            self.all_items = db.all_tags()
 
 
     def commit(self, db, id_):

From 7852275d9325b3d8b32a97ef37838bcd1a5831dd Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 00:19:21 -0700
Subject: [PATCH 41/55] ...

---
 src/calibre/gui2/complete.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/calibre/gui2/complete.py b/src/calibre/gui2/complete.py
index 5c5a836d98..ce8609fc99 100644
--- a/src/calibre/gui2/complete.py
+++ b/src/calibre/gui2/complete.py
@@ -162,6 +162,9 @@ class MultiCompleteLineEdit(QLineEdit):
     separator. Use the :meth:`update_items_cache` to set the list of
     all possible completions. Separator can be controlled with the
     :meth:`set_separator` and :meth:`set_space_before_sep` methods.
+
+    A call to self.set_separator(None) will allow this widget to be used
+    to complete non multiple fields as well.
     '''
 
     def __init__(self, parent=None):

From b22e640b509d811d22705e2ad997a535ec053f34 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 2 Feb 2011 09:03:06 +0000
Subject: [PATCH 42/55] Fix template program regression triggered by
 recursively calling the processor

---
 src/calibre/utils/formatter_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py
index 2e5852df89..518f2ed140 100644
--- a/src/calibre/utils/formatter_functions.py
+++ b/src/calibre/utils/formatter_functions.py
@@ -186,7 +186,7 @@ class BuiltinTemplate(BuiltinFormatterFunction):
 
     def evaluate(self, formatter, kwargs, mi, locals, template):
         template = template.replace('[[', '{').replace(']]', '}')
-        return formatter.safe_format(template, kwargs, 'TEMPLATE', mi)
+        return formatter.__class__().safe_format(template, kwargs, 'TEMPLATE', mi)
 
 class BuiltinEval(BuiltinFormatterFunction):
     name = 'eval'

From dece4f236fd8e149089619f455e2522cff8f545b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 09:21:21 -0700
Subject: [PATCH 43/55] Connect/share menu: Re-organize to make it a little
 less easy to select email and delete instead of just email by mistake

---
 src/calibre/gui2/actions/device.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/calibre/gui2/actions/device.py b/src/calibre/gui2/actions/device.py
index fb3e627789..b32568f8fd 100644
--- a/src/calibre/gui2/actions/device.py
+++ b/src/calibre/gui2/actions/device.py
@@ -74,23 +74,29 @@ class ShareConnMenu(QMenu): # {{{
         opts = email_config().parse()
         if opts.accounts:
             self.email_to_menu = QMenu(_('Email to')+'...', self)
+            ac = self.addMenu(self.email_to_menu)
+            self.email_actions.append(ac)
+            self.email_to_and_delete_menu = QMenu(
+                    _('Email to and delete from library')+'...', self)
             keys = sorted(opts.accounts.keys())
             for account in keys:
                 formats, auto, default = opts.accounts[account]
                 dest = 'mail:'+account+';'+formats
                 action1 = DeviceAction(dest, False, False, I('mail.png'),
-                        _('Email to')+' '+account)
+                        account)
                 action2 = DeviceAction(dest, True, False, I('mail.png'),
-                        _('Email to')+' '+account+ _(' and delete from library'))
-                map(self.email_to_menu.addAction, (action1, action2))
+                        account + ' ' + _('(delete from library)'))
+                self.email_to_menu.addAction(action1)
+                self.email_to_and_delete_menu.addAction(action2)
                 map(self.memory.append, (action1, action2))
                 if default:
-                    map(self.addAction, (action1, action2))
-                    map(self.email_actions.append, (action1, action2))
-                self.email_to_menu.addSeparator()
+                    ac = DeviceAction(dest, False, False,
+                            I('mail.png'), _('Email to') + ' ' +account)
+                    self.addAction(ac)
+                    self.email_actions.append(ac)
                 action1.a_s.connect(sync_menu.action_triggered)
                 action2.a_s.connect(sync_menu.action_triggered)
-            ac = self.addMenu(self.email_to_menu)
+            ac = self.addMenu(self.email_to_and_delete_menu)
             self.email_actions.append(ac)
         else:
             ac = self.addAction(_('Setup email based sharing of books'))

From 92b313ec7e5f4ee21a1ecd0b43503db85f2502ee Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 09:47:18 -0700
Subject: [PATCH 44/55] ...

---
 resources/recipes/le_temps.recipe | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/resources/recipes/le_temps.recipe b/resources/recipes/le_temps.recipe
index c33d9a51d2..7e320fe710 100644
--- a/resources/recipes/le_temps.recipe
+++ b/resources/recipes/le_temps.recipe
@@ -15,12 +15,26 @@ class LeTemps(BasicNewsRecipe):
      oldest_article = 7
      max_articles_per_feed = 100
      __author__ = 'Sujata Raman'
+     description = 'French news. Needs a subscription from http://www.letemps.ch'
      no_stylesheets = True
      remove_javascript = True
      recursions = 1
      encoding = 'UTF-8'
      match_regexps = [r'http://www.letemps.ch/Page/Uuid/[-0-9a-f]+\|[1-9]']
      language = 'fr'
+     needs_subscription = True
+
+     def get_browser(self):
+         br = BasicNewsRecipe.get_browser(self)
+         br.open('http://www.letemps.ch/login')
+         br['username'] = self.username
+         br['password'] = self.password
+         raw = br.submit().read()
+         if '>Login' in raw:
+             raise ValueError('Failed to login to letemp.ch. Check '
+                     'your username and password')
+         return br
+
 
      keep_only_tags = [dict(name='div', attrs={'id':'content'}),
                         dict(name='div', attrs={'class':'story'})

From 1de9e0b94b1398c577cab36b87b849569f773b0d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 10:44:18 -0700
Subject: [PATCH 45/55] use the new completer for publisher and series boxes as
 well

---
 src/calibre/gui2/complete.py               | 18 +++++++++++++-----
 src/calibre/gui2/metadata/basic_widgets.py | 14 +++++++++-----
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/calibre/gui2/complete.py b/src/calibre/gui2/complete.py
index ce8609fc99..7fbfae901a 100644
--- a/src/calibre/gui2/complete.py
+++ b/src/calibre/gui2/complete.py
@@ -64,8 +64,8 @@ class CompleteWindow(QListView): # {{{
 
     def do_selected(self, idx=None):
         idx = self.currentIndex() if idx is None else idx
-        if not idx.isValid() and self.model().rowCount() > 0:
-            idx = self.model().index(0)
+        #if not idx.isValid() and self.model().rowCount() > 0:
+        #    idx = self.model().index(0)
         if idx.isValid():
             data = unicode(self.model().data(idx, Qt.DisplayRole))
             self.completion_selected.emit(data)
@@ -175,9 +175,10 @@ class MultiCompleteLineEdit(QLineEdit):
 
         self._model = CompleteModel(parent=self)
         self.complete_window = CompleteWindow(self, self._model)
-        self.textChanged.connect(self.text_changed)
+        self.textEdited.connect(self.text_edited)
         self.cursorPositionChanged.connect(self.cursor_position_changed)
         self.complete_window.completion_selected.connect(self.completion_selected)
+        self.installEventFilter(self)
 
     # Interface {{{
     def update_items_cache(self, complete_items):
@@ -198,7 +199,7 @@ class MultiCompleteLineEdit(QLineEdit):
         return QLineEdit.eventFilter(self, o, e)
 
 
-    def text_changed(self, *args):
+    def text_edited(self, *args):
         self.update_completions()
 
     def cursor_position_changed(self, *args):
@@ -206,6 +207,8 @@ class MultiCompleteLineEdit(QLineEdit):
 
     def update_completions(self):
         ' Update the list of completions '
+        if not self.complete_window.isVisible() and not self.hasFocus():
+            return
         cpos = self.cursorPosition()
         text = unicode(self.text())
         prefix = text[:cpos]
@@ -223,7 +226,7 @@ class MultiCompleteLineEdit(QLineEdit):
         text
         '''
         if self.sep is None:
-            return text
+            return -1, -1, text
         else:
             cursor_pos = self.cursorPosition()
             before_text = unicode(self.text())[:cursor_pos]
@@ -334,6 +337,11 @@ class MultiCompleteComboBox(EnComboBox):
     def __init__(self, *args):
         EnComboBox.__init__(self, *args)
         self.setLineEdit(MultiCompleteLineEdit(self))
+        # Needed to allow changing the case of an existing item
+        # otherwise on focus out, the text is changed to the
+        # item that matches case insensitively
+        c = self.lineEdit().completer()
+        c.setCaseSensitivity(Qt.CaseSensitive)
 
     def update_items_cache(self, complete_items):
         self.lineEdit().update_items_cache(complete_items)
diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index 8ec037278e..6b89e306e6 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -12,7 +12,7 @@ from PyQt4.Qt import Qt, QDateEdit, QDate, \
     QDoubleSpinBox, QListWidgetItem, QSize, QPixmap, \
     QPushButton, QSpinBox, QLineEdit
 
-from calibre.gui2.widgets import EnLineEdit, EnComboBox, FormatList, ImageView
+from calibre.gui2.widgets import EnLineEdit, FormatList, ImageView
 from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
 from calibre.utils.icu import sort_key
 from calibre.utils.config import tweaks, prefs
@@ -283,13 +283,14 @@ class AuthorSortEdit(EnLineEdit):
 # }}}
 
 # Series {{{
-class SeriesEdit(EnComboBox):
+class SeriesEdit(MultiCompleteComboBox):
 
     TOOLTIP = _('List of known series. You can add new series.')
     LABEL = _('&Series:')
 
     def __init__(self, parent):
-        EnComboBox.__init__(self, parent)
+        MultiCompleteComboBox.__init__(self, parent)
+        self.set_separator(None)
         self.dialog = parent
         self.setSizeAdjustPolicy(
                 self.AdjustToMinimumContentsLengthWithIcon)
@@ -314,6 +315,7 @@ class SeriesEdit(EnComboBox):
     def initialize(self, db, id_):
         all_series = db.all_series()
         all_series.sort(key=lambda x : sort_key(x[1]))
+        self.update_items_cache([x[1] for x in all_series])
         series_id = db.series_id(id_, index_is_id=True)
         idx, c = None, 0
         for i in all_series:
@@ -910,11 +912,12 @@ class ISBNEdit(QLineEdit): # {{{
 
 # }}}
 
-class PublisherEdit(EnComboBox): # {{{
+class PublisherEdit(MultiCompleteComboBox): # {{{
     LABEL = _('&Publisher:')
 
     def __init__(self, parent):
-        EnComboBox.__init__(self, parent)
+        MultiCompleteComboBox.__init__(self, parent)
+        self.set_separator(None)
         self.setSizeAdjustPolicy(
                 self.AdjustToMinimumContentsLengthWithIcon)
 
@@ -935,6 +938,7 @@ class PublisherEdit(EnComboBox): # {{{
     def initialize(self, db, id_):
         all_publishers = db.all_publishers()
         all_publishers.sort(key=lambda x : sort_key(x[1]))
+        self.update_items_cache([x[1] for x in all_publishers])
         publisher_id = db.publisher_id(id_, index_is_id=True)
         idx, c = None, 0
         for i in all_publishers:

From 99fa9659c9f8618eaa89494599e782ab277ddaee Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 11:03:03 -0700
Subject: [PATCH 46/55] dont popup completer on cursor position changing as
 that can be confusing for noobs

---
 src/calibre/gui2/complete.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/calibre/gui2/complete.py b/src/calibre/gui2/complete.py
index 7fbfae901a..39ecd847ec 100644
--- a/src/calibre/gui2/complete.py
+++ b/src/calibre/gui2/complete.py
@@ -176,7 +176,6 @@ class MultiCompleteLineEdit(QLineEdit):
         self._model = CompleteModel(parent=self)
         self.complete_window = CompleteWindow(self, self._model)
         self.textEdited.connect(self.text_edited)
-        self.cursorPositionChanged.connect(self.cursor_position_changed)
         self.complete_window.completion_selected.connect(self.completion_selected)
         self.installEventFilter(self)
 
@@ -202,9 +201,6 @@ class MultiCompleteLineEdit(QLineEdit):
     def text_edited(self, *args):
         self.update_completions()
 
-    def cursor_position_changed(self, *args):
-        self.update_completions()
-
     def update_completions(self):
         ' Update the list of completions '
         if not self.complete_window.isVisible() and not self.hasFocus():

From b2055e106ee9d7d637292a08960a89a396269d83 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 11:07:16 -0700
Subject: [PATCH 47/55] Don't insert separator after completion, again
 confusing for noobs

---
 src/calibre/gui2/complete.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/src/calibre/gui2/complete.py b/src/calibre/gui2/complete.py
index 39ecd847ec..0ad8fb13d4 100644
--- a/src/calibre/gui2/complete.py
+++ b/src/calibre/gui2/complete.py
@@ -222,7 +222,7 @@ class MultiCompleteLineEdit(QLineEdit):
         text
         '''
         if self.sep is None:
-            return -1, -1, text
+            return -1, text
         else:
             cursor_pos = self.cursorPosition()
             before_text = unicode(self.text())[:cursor_pos]
@@ -231,24 +231,18 @@ class MultiCompleteLineEdit(QLineEdit):
             if len(after_parts) < 3 and not after_parts[-1].strip():
                 after_text = u''
             prefix_len = len(before_text.split(self.sep)[-1].lstrip())
-            if self.space_before_sep:
-                complete_text_pat = '%s%s %s %s'
-                len_extra = 3
-            else:
-                complete_text_pat = '%s%s%s %s'
-                len_extra = 2
-            return prefix_len, len_extra, complete_text_pat % (
-                before_text[:cursor_pos - prefix_len], text, self.sep, after_text)
+            return prefix_len, \
+                before_text[:cursor_pos - prefix_len] + text + after_text
 
     def completion_selected(self, text):
-        prefix_len, len_extra, ctext = self.get_completed_text(text)
+        prefix_len, ctext = self.get_completed_text(text)
         if self.sep is None:
             self.setText(ctext)
             self.setCursorPosition(len(ctext))
         else:
             cursor_pos = self.cursorPosition()
             self.setText(ctext)
-            self.setCursorPosition(cursor_pos - prefix_len + len(text) + len_extra)
+            self.setCursorPosition(cursor_pos - prefix_len + len(text))
 
     def update_complete_window(self, matches):
         self._model.update_matches(matches)

From fd3ed9b8f6f20519fdfd337538d16c24e0e5dfaa Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 11:40:37 -0700
Subject: [PATCH 48/55] Use the new completer in all delegates

---
 src/calibre/gui2/library/delegates.py | 57 ++++++++-------------------
 1 file changed, 17 insertions(+), 40 deletions(-)

diff --git a/src/calibre/gui2/library/delegates.py b/src/calibre/gui2/library/delegates.py
index ae9d6e2f71..fed2e42470 100644
--- a/src/calibre/gui2/library/delegates.py
+++ b/src/calibre/gui2/library/delegates.py
@@ -12,11 +12,11 @@ from PyQt4.Qt import QColor, Qt, QModelIndex, QSize, \
                      QPainterPath, QLinearGradient, QBrush, \
                      QPen, QStyle, QPainter, QStyleOptionViewItemV4, \
                      QIcon,  QDoubleSpinBox, QVariant, QSpinBox, \
-                     QStyledItemDelegate, QCompleter, \
-                     QComboBox, QTextDocument
+                     QStyledItemDelegate, QComboBox, QTextDocument
 
 from calibre.gui2 import UNDEFINED_QDATE, error_dialog
-from calibre.gui2.widgets import EnLineEdit, CompleteLineEdit
+from calibre.gui2.widgets import EnLineEdit
+from calibre.gui2.complete import MultiCompleteLineEdit
 from calibre.utils.date import now, format_date
 from calibre.utils.config import tweaks
 from calibre.utils.formatter import validation_formatter
@@ -151,38 +151,15 @@ class TextDelegate(QStyledItemDelegate): # {{{
         self.auto_complete_function = f
 
     def createEditor(self, parent, option, index):
-        editor = EnLineEdit(parent)
         if self.auto_complete_function:
+            editor = MultiCompleteLineEdit(parent)
+            editor.set_separator(None)
             complete_items = [i[1] for i in self.auto_complete_function()]
-            completer = QCompleter(complete_items, self)
-            completer.setCaseSensitivity(Qt.CaseInsensitive)
-            completer.setCompletionMode(QCompleter.PopupCompletion)
-            editor.setCompleter(completer)
-        return editor
-#}}}
-
-class TagsDelegate(QStyledItemDelegate): # {{{
-    def __init__(self, parent):
-        QStyledItemDelegate.__init__(self, parent)
-        self.db = None
-
-    def set_database(self, db):
-        self.db = db
-
-    def createEditor(self, parent, option, index):
-        if self.db:
-            col = index.model().column_map[index.column()]
-            if not index.model().is_custom_column(col):
-                editor = CompleteLineEdit(parent, self.db.all_tags())
-            else:
-                editor = CompleteLineEdit(parent,
-                        sorted(list(self.db.all_custom(label=self.db.field_metadata.key_to_label(col))),
-                               key=sort_key))
-                return editor
+            editor.update_items_cache(complete_items)
         else:
             editor = EnLineEdit(parent)
         return editor
-# }}}
+#}}}
 
 class CompleteDelegate(QStyledItemDelegate): # {{{
     def __init__(self, parent, sep, items_func_name, space_before_sep=False):
@@ -197,13 +174,15 @@ class CompleteDelegate(QStyledItemDelegate): # {{{
     def createEditor(self, parent, option, index):
         if self.db and hasattr(self.db, self.items_func_name):
             col = index.model().column_map[index.column()]
+            editor = MultiCompleteLineEdit(parent)
+            editor.set_separator(self.sep)
+            editor.set_space_before_sep(self.space_before_sep)
             if not index.model().is_custom_column(col):
-                editor = CompleteLineEdit(parent, getattr(self.db, self.items_func_name)(),
-                    self.sep, self.space_before_sep)
+                all_items = getattr(self.db, self.items_func_name)()
             else:
-                editor = CompleteLineEdit(parent,
-                    sorted(list(self.db.all_custom(label=self.db.field_metadata.key_to_label(col))),
-                    key=sort_key), self.sep, self.space_before_sep)
+                all_items = list(self.db.all_custom(
+                    label=self.db.field_metadata.key_to_label(col)))
+            editor.update_items_cache(all_items)
         else:
             editor = EnLineEdit(parent)
         return editor
@@ -273,13 +252,11 @@ class CcTextDelegate(QStyledItemDelegate): # {{{
             editor.setRange(-100., float(sys.maxint))
             editor.setDecimals(2)
         else:
-            editor = EnLineEdit(parent)
+            editor = MultiCompleteLineEdit(parent)
+            editor.set_separator(None)
             complete_items = sorted(list(m.db.all_custom(label=m.db.field_metadata.key_to_label(col))),
                                     key=sort_key)
-            completer = QCompleter(complete_items, self)
-            completer.setCaseSensitivity(Qt.CaseInsensitive)
-            completer.setCompletionMode(QCompleter.PopupCompletion)
-            editor.setCompleter(completer)
+            editor.update_items_cache(complete_items)
         return editor
 
 # }}}

From c6260c678ac073b63e65c8f6900fecdc169c5f10 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 12:12:45 -0700
Subject: [PATCH 49/55] Refactoring to use new completer

---
 src/calibre/gui2/convert/metadata.ui      | 12 +++++-----
 src/calibre/gui2/custom_column_widgets.py | 28 +++++++++++++----------
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/src/calibre/gui2/convert/metadata.ui b/src/calibre/gui2/convert/metadata.ui
index 61c27594c4..47d983d870 100644
--- a/src/calibre/gui2/convert/metadata.ui
+++ b/src/calibre/gui2/convert/metadata.ui
@@ -190,7 +190,7 @@
         </widget>
        </item>
        <item row="4" column="1">
-        <widget class="CompleteLineEdit" name="tags">
+        <widget class="MultiCompleteLineEdit" name="tags">
          <property name="toolTip">
           <string>Tags categorize the book. This is particularly useful while searching. &lt;br&gt;&lt;br&gt;They can be any words or phrases, separated by commas.</string>
          </property>
@@ -255,7 +255,7 @@
         </widget>
        </item>
        <item row="1" column="1">
-        <widget class="CompleteComboBox" name="author">
+        <widget class="MultiCompleteComboBox" name="author">
          <property name="editable">
           <bool>true</bool>
          </property>
@@ -282,14 +282,14 @@
    <header>widgets.h</header>
   </customwidget>
   <customwidget>
-   <class>CompleteComboBox</class>
+   <class>MultiCompleteComboBox</class>
    <extends>QComboBox</extends>
-   <header>widgets.h</header>
+   <header>calibre/gui2/complete.h</header>
   </customwidget>
   <customwidget>
-   <class>CompleteLineEdit</class>
+   <class>MultiCompleteLineEdit</class>
    <extends>QLineEdit</extends>
-   <header>widgets.h</header>
+   <header>calibre/gui2/complete.h</header>
   </customwidget>
   <customwidget>
    <class>ImageView</class>
diff --git a/src/calibre/gui2/custom_column_widgets.py b/src/calibre/gui2/custom_column_widgets.py
index 360a5bcd0a..0555d42b4f 100644
--- a/src/calibre/gui2/custom_column_widgets.py
+++ b/src/calibre/gui2/custom_column_widgets.py
@@ -14,7 +14,7 @@ from PyQt4.Qt import QComboBox, QLabel, QSpinBox, QDoubleSpinBox, QDateEdit, \
         QPushButton
 
 from calibre.utils.date import qt_to_dt, now
-from calibre.gui2.widgets import CompleteLineEdit, EnComboBox
+from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
 from calibre.gui2.comments_editor import Editor as CommentsEditor
 from calibre.gui2 import UNDEFINED_QDATE, error_dialog
 from calibre.utils.config import tweaks
@@ -228,10 +228,11 @@ class Text(Base):
         values = self.all_values = list(self.db.all_custom(num=self.col_id))
         values.sort(key=sort_key)
         if self.col_metadata['is_multiple']:
-            w = CompleteLineEdit(parent, values)
+            w = MultiCompleteLineEdit(parent)
+            w.update_items_cache(values)
             w.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)
         else:
-            w = EnComboBox(parent)
+            w = MultiCompleteComboBox(parent)
             w.setSizeAdjustPolicy(w.AdjustToMinimumContentsLengthWithIcon)
             w.setMinimumContentsLength(25)
         self.widgets = [QLabel('&'+self.col_metadata['name']+':', parent), w]
@@ -240,9 +241,10 @@ class Text(Base):
         val = self.db.get_custom(book_id, num=self.col_id, index_is_id=True)
         self.initial_val = val
         val = self.normalize_db_val(val)
+        self.widgets[1].update_items_cache(self.all_values)
+
         if self.col_metadata['is_multiple']:
             self.setter(val)
-            self.widgets[1].update_items_cache(self.all_values)
         else:
             idx = None
             for i, c in enumerate(self.all_values):
@@ -276,7 +278,7 @@ class Series(Base):
     def setup_ui(self, parent):
         values = self.all_values = list(self.db.all_custom(num=self.col_id))
         values.sort(key=sort_key)
-        w = EnComboBox(parent)
+        w = MultiCompleteComboBox(parent)
         w.setSizeAdjustPolicy(w.AdjustToMinimumContentsLengthWithIcon)
         w.setMinimumContentsLength(25)
         self.name_widget = w
@@ -305,6 +307,7 @@ class Series(Base):
             if c == val:
                 idx = i
             self.name_widget.addItem(c)
+        self.name_widget.update_items_cache(self.all_values)
         self.name_widget.setEditText('')
         if idx is not None:
             self.widgets[1].setCurrentIndex(idx)
@@ -670,7 +673,7 @@ class BulkDateTime(BulkBase):
 class BulkSeries(BulkBase):
 
     def setup_ui(self, parent):
-        self.make_widgets(parent, EnComboBox)
+        self.make_widgets(parent, MultiCompleteComboBox)
         values = self.all_values = list(self.db.all_custom(num=self.col_id))
         values.sort(key=sort_key)
         self.main_widget.setSizeAdjustPolicy(self.main_widget.AdjustToMinimumContentsLengthWithIcon)
@@ -705,6 +708,7 @@ class BulkSeries(BulkBase):
 
     def initialize(self, book_id):
         self.idx_widget.setChecked(False)
+        self.main_widget.update_items_cache(self.all_values)
         for c in self.all_values:
             self.main_widget.addItem(c)
         self.main_widget.setEditText('')
@@ -795,7 +799,8 @@ class RemoveTags(QWidget):
         layout.setSpacing(5)
         layout.setContentsMargins(0, 0, 0, 0)
 
-        self.tags_box = CompleteLineEdit(parent, values)
+        self.tags_box = MultiCompleteLineEdit(parent)
+        self.tags_box.update_items_cache(values)
         layout.addWidget(self.tags_box, stretch=3)
         self.checkbox = QCheckBox(_('Remove all tags'), parent)
         layout.addWidget(self.checkbox)
@@ -816,7 +821,7 @@ class BulkText(BulkBase):
         values = self.all_values = list(self.db.all_custom(num=self.col_id))
         values.sort(key=sort_key)
         if self.col_metadata['is_multiple']:
-            self.make_widgets(parent, CompleteLineEdit,
+            self.make_widgets(parent, MultiCompleteLineEdit,
                               extra_label_text=_('tags to add'))
             self.main_widget.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)
             self.adding_widget = self.main_widget
@@ -829,16 +834,15 @@ class BulkText(BulkBase):
             w.tags_box.textChanged.connect(self.a_c_checkbox_changed)
             w.checkbox.stateChanged.connect(self.a_c_checkbox_changed)
         else:
-            self.make_widgets(parent, EnComboBox)
+            self.make_widgets(parent, MultiCompleteComboBox)
             self.main_widget.setSizeAdjustPolicy(
                         self.main_widget.AdjustToMinimumContentsLengthWithIcon)
             self.main_widget.setMinimumContentsLength(25)
         self.ignore_change_signals = False
 
     def initialize(self, book_ids):
-        if self.col_metadata['is_multiple']:
-            self.main_widget.update_items_cache(self.all_values)
-        else:
+        self.main_widget.update_items_cache(self.all_values)
+        if not self.col_metadata['is_multiple']:
             val = self.get_initial_value(book_ids)
             self.initial_val = val = self.normalize_db_val(val)
             idx = None

From fc3123e4c171f504f5e8fa9cd14f6c444b1e575a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 12:20:07 -0700
Subject: [PATCH 50/55] Refactor advanced search dialog to use new completer

---
 src/calibre/gui2/dialogs/search.py |  7 ++-----
 src/calibre/gui2/dialogs/search.ui | 14 +++++++-------
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/src/calibre/gui2/dialogs/search.py b/src/calibre/gui2/dialogs/search.py
index ab3fd3ec4e..0fbe188aa5 100644
--- a/src/calibre/gui2/dialogs/search.py
+++ b/src/calibre/gui2/dialogs/search.py
@@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 
 import re, copy
 
-from PyQt4.Qt import QDialog, QDialogButtonBox, QCompleter, Qt
+from PyQt4.Qt import QDialog, QDialogButtonBox
 
 from calibre.gui2.dialogs.search_ui import Ui_Dialog
 from calibre.library.caches import CONTAINS_MATCH, EQUALS_MATCH
@@ -29,20 +29,17 @@ class SearchDialog(QDialog, Ui_Dialog):
             name = name.strip().replace('|', ',')
             self.authors_box.addItem(name)
         self.authors_box.setEditText('')
-        self.authors_box.completer().setCompletionMode(QCompleter.PopupCompletion)
-        self.authors_box.setAutoCompletionCaseSensitivity(Qt.CaseInsensitive)
         self.authors_box.set_separator('&')
         self.authors_box.set_space_before_sep(True)
         self.authors_box.update_items_cache(db.all_author_names())
 
         all_series = db.all_series()
         all_series.sort(key=lambda x : sort_key(x[1]))
+        self.series_box.update_items_cache([x[1] for x in all_series])
         for i in all_series:
             id, name = i
             self.series_box.addItem(name)
         self.series_box.setEditText('')
-        self.series_box.completer().setCompletionMode(QCompleter.PopupCompletion)
-        self.series_box.setAutoCompletionCaseSensitivity(Qt.CaseInsensitive)
 
         all_tags = db.all_tags()
         self.tags_box.update_items_cache(all_tags)
diff --git a/src/calibre/gui2/dialogs/search.ui b/src/calibre/gui2/dialogs/search.ui
index 1d013a1e9f..842787a2da 100644
--- a/src/calibre/gui2/dialogs/search.ui
+++ b/src/calibre/gui2/dialogs/search.ui
@@ -265,21 +265,21 @@
         </widget>
        </item>
        <item row="2" column="1">
-        <widget class="CompleteComboBox" name="authors_box">
+        <widget class="MultiCompleteComboBox" name="authors_box">
          <property name="toolTip">
           <string>Enter an author's name. Only one author can be used.</string>
          </property>
         </widget>
        </item>
        <item row="3" column="1">
-        <widget class="EnComboBox" name="series_box">
+        <widget class="MultiCompleteComboBox" name="series_box">
          <property name="toolTip">
           <string>Enter a series name, without an index. Only one series name can be used.</string>
          </property>
         </widget>
        </item>
        <item row="4" column="1">
-        <widget class="CompleteLineEdit" name="tags_box">
+        <widget class="MultiCompleteLineEdit" name="tags_box">
          <property name="toolTip">
           <string>Enter tags separated by spaces</string>
          </property>
@@ -360,14 +360,14 @@
    <header>widgets.h</header>
   </customwidget>
   <customwidget>
-   <class>CompleteLineEdit</class>
+   <class>MultiCompleteLineEdit</class>
    <extends>QLineEdit</extends>
-   <header>widgets.h</header>
+   <header>calibre/gui2/complete.h</header>
   </customwidget>
   <customwidget>
-   <class>CompleteComboBox</class>
+   <class>MultiCompleteComboBox</class>
    <extends>QComboBox</extends>
-   <header>widgets.h</header>
+   <header>calibre/gui2/complete.h</header>
   </customwidget>
  </customwidgets>
  <tabstops>

From c7c3027c8d2602687e1f6cb198486b19b1edd671 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 12:29:54 -0700
Subject: [PATCH 51/55] Refactor bulk metadata edit dialog to use new completer
 (apart from S&R widgets)

---
 src/calibre/gui2/dialogs/metadata_bulk.py |  2 ++
 src/calibre/gui2/dialogs/metadata_bulk.ui | 23 +++++++++--------------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py
index 533a344de5..9239a0e136 100644
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@@ -764,6 +764,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
     def initialize_series(self):
         all_series = self.db.all_series()
         all_series.sort(key=lambda x : sort_key(x[1]))
+        self.series.update_items_cache([x[1] for x in all_series])
 
         for i in all_series:
             id, name = i
@@ -773,6 +774,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
     def initialize_publisher(self):
         all_publishers = self.db.all_publishers()
         all_publishers.sort(key=lambda x : sort_key(x[1]))
+        self.publisher.update_items_cache([x[1] for x in all_publishers])
 
         for i in all_publishers:
             id, name = i
diff --git a/src/calibre/gui2/dialogs/metadata_bulk.ui b/src/calibre/gui2/dialogs/metadata_bulk.ui
index b0f2c144fc..ecdb396662 100644
--- a/src/calibre/gui2/dialogs/metadata_bulk.ui
+++ b/src/calibre/gui2/dialogs/metadata_bulk.ui
@@ -76,7 +76,7 @@
             </widget>
            </item>
            <item row="0" column="1">
-            <widget class="CompleteComboBox" name="authors">
+            <widget class="MultiCompleteComboBox" name="authors">
              <property name="editable">
               <bool>true</bool>
              </property>
@@ -175,7 +175,7 @@
             </widget>
            </item>
            <item row="4" column="1">
-            <widget class="EnComboBox" name="publisher">
+            <widget class="MultiCompleteComboBox" name="publisher">
              <property name="editable">
               <bool>true</bool>
              </property>
@@ -195,7 +195,7 @@
             </widget>
            </item>
            <item row="5" column="1">
-            <widget class="CompleteLineEdit" name="tags">
+            <widget class="MultiCompleteLineEdit" name="tags">
              <property name="toolTip">
               <string>Tags categorize the book. This is particularly useful while searching. &lt;br&gt;&lt;br&gt;They can be any words or phrases, separated by commas.</string>
              </property>
@@ -229,7 +229,7 @@
             </widget>
            </item>
            <item row="6" column="1">
-            <widget class="CompleteLineEdit" name="remove_tags">
+            <widget class="MultiCompleteLineEdit" name="remove_tags">
              <property name="toolTip">
               <string>Comma separated list of tags to remove from the books. </string>
              </property>
@@ -262,7 +262,7 @@
             </widget>
            </item>
            <item row="7" column="1">
-            <widget class="EnComboBox" name="series">
+            <widget class="MultiCompleteComboBox" name="series">
              <property name="sizePolicy">
               <sizepolicy hsizetype="Preferred" vsizetype="Fixed">
                <horstretch>0</horstretch>
@@ -1072,19 +1072,14 @@ not multiple and the destination field is multiple</string>
    <header>widgets.h</header>
   </customwidget>
   <customwidget>
-   <class>EnComboBox</class>
+   <class>MultiCompleteComboBox</class>
    <extends>QComboBox</extends>
-   <header>widgets.h</header>
+   <header>calibre/gui2/complete.h</header>
   </customwidget>
   <customwidget>
-   <class>CompleteComboBox</class>
-   <extends>QComboBox</extends>
-   <header>widgets.h</header>
-  </customwidget>
-  <customwidget>
-   <class>CompleteLineEdit</class>
+   <class>MultiCompleteLineEdit</class>
    <extends>QLineEdit</extends>
-   <header>widgets.h</header>
+   <header>calibre/gui2/complete.h</header>
   </customwidget>
   <customwidget>
    <class>HistoryLineEdit</class>

From b02186802f8320c1bcb4f1bf11c09624c8bec29d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 12:43:19 -0700
Subject: [PATCH 52/55] ...

---
 src/calibre/gui2/convert/metadata.py      | 10 +++++++---
 src/calibre/gui2/convert/metadata.ui      |  9 ++-------
 src/calibre/gui2/custom_column_widgets.py |  3 +++
 src/calibre/gui2/dialogs/metadata_bulk.py |  2 ++
 src/calibre/gui2/dialogs/search.py        |  1 +
 5 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/calibre/gui2/convert/metadata.py b/src/calibre/gui2/convert/metadata.py
index 23cac74cf8..81274f25a8 100644
--- a/src/calibre/gui2/convert/metadata.py
+++ b/src/calibre/gui2/convert/metadata.py
@@ -70,9 +70,6 @@ class MetadataWidget(Widget, Ui_Form):
     def initialize_metadata_options(self):
         self.initialize_combos()
         self.author.editTextChanged.connect(self.deduce_author_sort)
-        self.author.set_separator('&')
-        self.author.set_space_before_sep(True)
-        self.author.update_items_cache(self.db.all_author_names())
 
         mi = self.db.get_metadata(self.book_id, index_is_id=True)
         self.title.setText(mi.title)
@@ -109,6 +106,9 @@ class MetadataWidget(Widget, Ui_Form):
     def initalize_authors(self):
         all_authors = self.db.all_authors()
         all_authors.sort(key=lambda x : sort_key(x[1]))
+        self.author.set_separator('&')
+        self.author.set_space_before_sep(True)
+        self.author.update_items_cache(self.db.all_author_names())
 
         for i in all_authors:
             id, name = i
@@ -124,6 +124,8 @@ class MetadataWidget(Widget, Ui_Form):
     def initialize_series(self):
         all_series = self.db.all_series()
         all_series.sort(key=lambda x : sort_key(x[1]))
+        self.series.set_separator(None)
+        self.series.update_items_cache([x[1] for x in all_series])
 
         for i in all_series:
             id, name = i
@@ -133,6 +135,8 @@ class MetadataWidget(Widget, Ui_Form):
     def initialize_publisher(self):
         all_publishers = self.db.all_publishers()
         all_publishers.sort(key=lambda x : sort_key(x[1]))
+        self.publisher.set_separator(None)
+        self.publisher.update_items_cache([x[1] for x in all_publishers])
 
         for i in all_publishers:
             id, name = i
diff --git a/src/calibre/gui2/convert/metadata.ui b/src/calibre/gui2/convert/metadata.ui
index 47d983d870..95ccac6890 100644
--- a/src/calibre/gui2/convert/metadata.ui
+++ b/src/calibre/gui2/convert/metadata.ui
@@ -213,7 +213,7 @@
         </widget>
        </item>
        <item row="5" column="1">
-        <widget class="EnComboBox" name="series">
+        <widget class="MultiCompleteComboBox" name="series">
          <property name="sizePolicy">
           <sizepolicy hsizetype="Preferred" vsizetype="Fixed">
            <horstretch>10</horstretch>
@@ -248,7 +248,7 @@
         </widget>
        </item>
        <item row="3" column="1">
-        <widget class="EnComboBox" name="publisher">
+        <widget class="MultiCompleteComboBox" name="publisher">
          <property name="editable">
           <bool>true</bool>
          </property>
@@ -276,11 +276,6 @@
    <extends>QLineEdit</extends>
    <header>widgets.h</header>
   </customwidget>
-  <customwidget>
-   <class>EnComboBox</class>
-   <extends>QComboBox</extends>
-   <header>widgets.h</header>
-  </customwidget>
   <customwidget>
    <class>MultiCompleteComboBox</class>
    <extends>QComboBox</extends>
diff --git a/src/calibre/gui2/custom_column_widgets.py b/src/calibre/gui2/custom_column_widgets.py
index 0555d42b4f..5180999379 100644
--- a/src/calibre/gui2/custom_column_widgets.py
+++ b/src/calibre/gui2/custom_column_widgets.py
@@ -233,6 +233,7 @@ class Text(Base):
             w.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)
         else:
             w = MultiCompleteComboBox(parent)
+            w.set_separator(None)
             w.setSizeAdjustPolicy(w.AdjustToMinimumContentsLengthWithIcon)
             w.setMinimumContentsLength(25)
         self.widgets = [QLabel('&'+self.col_metadata['name']+':', parent), w]
@@ -708,6 +709,7 @@ class BulkSeries(BulkBase):
 
     def initialize(self, book_id):
         self.idx_widget.setChecked(False)
+        self.main_widget.set_separator(None)
         self.main_widget.update_items_cache(self.all_values)
         for c in self.all_values:
             self.main_widget.addItem(c)
@@ -835,6 +837,7 @@ class BulkText(BulkBase):
             w.checkbox.stateChanged.connect(self.a_c_checkbox_changed)
         else:
             self.make_widgets(parent, MultiCompleteComboBox)
+            self.main_widget.set_separator(None)
             self.main_widget.setSizeAdjustPolicy(
                         self.main_widget.AdjustToMinimumContentsLengthWithIcon)
             self.main_widget.setMinimumContentsLength(25)
diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py
index 9239a0e136..12f49baaca 100644
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@@ -764,6 +764,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
     def initialize_series(self):
         all_series = self.db.all_series()
         all_series.sort(key=lambda x : sort_key(x[1]))
+        self.series.set_separator(None)
         self.series.update_items_cache([x[1] for x in all_series])
 
         for i in all_series:
@@ -774,6 +775,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
     def initialize_publisher(self):
         all_publishers = self.db.all_publishers()
         all_publishers.sort(key=lambda x : sort_key(x[1]))
+        self.publisher.set_separator(None)
         self.publisher.update_items_cache([x[1] for x in all_publishers])
 
         for i in all_publishers:
diff --git a/src/calibre/gui2/dialogs/search.py b/src/calibre/gui2/dialogs/search.py
index 0fbe188aa5..9c91446f3c 100644
--- a/src/calibre/gui2/dialogs/search.py
+++ b/src/calibre/gui2/dialogs/search.py
@@ -35,6 +35,7 @@ class SearchDialog(QDialog, Ui_Dialog):
 
         all_series = db.all_series()
         all_series.sort(key=lambda x : sort_key(x[1]))
+        self.series_box.set_separator(None)
         self.series_box.update_items_cache([x[1] for x in all_series])
         for i in all_series:
             id, name = i

From f76a8e5f4f493eaaf2329db418cb9d84d7c17fdc Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 12:50:31 -0700
Subject: [PATCH 53/55] Refactor old edit metadata dialog to use new completer

---
 src/calibre/gui2/dialogs/metadata_single.py |  4 ++++
 src/calibre/gui2/dialogs/metadata_single.ui | 21 ++++++++-------------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index fa20658c12..f36fd3019d 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -739,6 +739,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
         self.series.setSizeAdjustPolicy(self.series.AdjustToContentsOnFirstShow)
         all_series = self.db.all_series()
         all_series.sort(key=lambda x : sort_key(x[1]))
+        self.series.set_separator(None)
+        self.series.update_items_cache([x[1] for x in all_series])
         series_id = self.db.series_id(self.row)
         idx, c = None, 0
         for i in all_series:
@@ -756,6 +758,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
     def initialize_publisher(self):
         all_publishers = self.db.all_publishers()
         all_publishers.sort(key=lambda x : sort_key(x[1]))
+        self.publisher.set_separator(None)
+        self.publisher.update_items_cache([x[1] for x in all_publishers])
         publisher_id = self.db.publisher_id(self.row)
         idx, c = None, 0
         for i in all_publishers:
diff --git a/src/calibre/gui2/dialogs/metadata_single.ui b/src/calibre/gui2/dialogs/metadata_single.ui
index 23efc45399..5bcf268aaa 100644
--- a/src/calibre/gui2/dialogs/metadata_single.ui
+++ b/src/calibre/gui2/dialogs/metadata_single.ui
@@ -240,7 +240,7 @@ Using this button to create author sort will change author sort from red to gree
                    </widget>
                   </item>
                   <item row="2" column="1">
-                   <widget class="CompleteComboBox" name="authors">
+                   <widget class="MultiCompleteComboBox" name="authors">
                     <property name="editable">
                      <bool>true</bool>
                     </property>
@@ -313,7 +313,7 @@ If the box is colored green, then text matches the individual author's sort stri
                    </widget>
                   </item>
                   <item row="5" column="1" colspan="2">
-                   <widget class="EnComboBox" name="publisher">
+                   <widget class="MultiCompleteComboBox" name="publisher">
                     <property name="editable">
                      <bool>true</bool>
                     </property>
@@ -335,7 +335,7 @@ If the box is colored green, then text matches the individual author's sort stri
                   <item row="6" column="1">
                    <layout class="QHBoxLayout" name="_2">
                     <item>
-                     <widget class="CompleteLineEdit" name="tags">
+                     <widget class="MultiCompleteLineEdit" name="tags">
                       <property name="toolTip">
                        <string>Tags categorize the book. This is particularly useful while searching. &lt;br&gt;&lt;br&gt;They can be any words or phrases, separated by commas.</string>
                       </property>
@@ -379,7 +379,7 @@ If the box is colored green, then text matches the individual author's sort stri
                      <number>5</number>
                     </property>
                     <item>
-                     <widget class="EnComboBox" name="series">
+                     <widget class="MultiCompleteComboBox" name="series">
                       <property name="toolTip">
                        <string>List of known series. You can add new series.</string>
                       </property>
@@ -837,19 +837,14 @@ If the box is colored green, then text matches the individual author's sort stri
    <header>widgets.h</header>
   </customwidget>
   <customwidget>
-   <class>EnComboBox</class>
-   <extends>QComboBox</extends>
-   <header>widgets.h</header>
-  </customwidget>
-  <customwidget>
-   <class>CompleteLineEdit</class>
+   <class>MultiCompleteLineEdit</class>
    <extends>QLineEdit</extends>
-   <header>widgets.h</header>
+   <header>calibre/gui2/complete.h</header>
   </customwidget>
   <customwidget>
-   <class>CompleteComboBox</class>
+   <class>MultiCompleteComboBox</class>
    <extends>QComboBox</extends>
-   <header>widgets.h</header>
+   <header>calibre/gui2/complete.h</header>
   </customwidget>
   <customwidget>
    <class>FormatList</class>

From ef88d0b5ccc5f30cde2df90fc364d509c397576b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 12:54:10 -0700
Subject: [PATCH 54/55] ...

---
 src/calibre/gui2/dialogs/add_empty_book.py | 4 ++--
 src/calibre/gui2/dialogs/search.ui         | 5 -----
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/calibre/gui2/dialogs/add_empty_book.py b/src/calibre/gui2/dialogs/add_empty_book.py
index b8339f95f5..9e5fb07308 100644
--- a/src/calibre/gui2/dialogs/add_empty_book.py
+++ b/src/calibre/gui2/dialogs/add_empty_book.py
@@ -7,8 +7,8 @@ __license__   = 'GPL v3'
 from PyQt4.Qt import QDialog, QGridLayout, QLabel, QDialogButtonBox,  \
             QApplication, QSpinBox, QToolButton, QIcon
 from calibre.ebooks.metadata import authors_to_string, string_to_authors
-from calibre.gui2.widgets import CompleteComboBox
 from calibre.utils.icu import sort_key
+from calibre.gui2.complete import MultiCompleteComboBox
 
 class AddEmptyBookDialog(QDialog):
 
@@ -32,7 +32,7 @@ class AddEmptyBookDialog(QDialog):
         self.author_label = QLabel(_('Set the author of the new books to:'))
         self._layout.addWidget(self.author_label, 2, 0, 1, 2)
 
-        self.authors_combo = CompleteComboBox(self)
+        self.authors_combo = MultiCompleteComboBox(self)
         self.authors_combo.setSizeAdjustPolicy(
                 self.authors_combo.AdjustToMinimumContentsLengthWithIcon)
         self.authors_combo.setEditable(True)
diff --git a/src/calibre/gui2/dialogs/search.ui b/src/calibre/gui2/dialogs/search.ui
index 842787a2da..eb6fffdb60 100644
--- a/src/calibre/gui2/dialogs/search.ui
+++ b/src/calibre/gui2/dialogs/search.ui
@@ -354,11 +354,6 @@
    <extends>QLineEdit</extends>
    <header>widgets.h</header>
   </customwidget>
-  <customwidget>
-   <class>EnComboBox</class>
-   <extends>QComboBox</extends>
-   <header>widgets.h</header>
-  </customwidget>
   <customwidget>
    <class>MultiCompleteLineEdit</class>
    <extends>QLineEdit</extends>

From f52ba0aae37d0534503619bd730ac5ffaa89636c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Feb 2011 14:13:24 -0700
Subject: [PATCH 55/55] ...

---
 src/calibre/gui2/complete.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/complete.py b/src/calibre/gui2/complete.py
index 0ad8fb13d4..bdfbaaf0da 100644
--- a/src/calibre/gui2/complete.py
+++ b/src/calibre/gui2/complete.py
@@ -64,8 +64,6 @@ class CompleteWindow(QListView): # {{{
 
     def do_selected(self, idx=None):
         idx = self.currentIndex() if idx is None else idx
-        #if not idx.isValid() and self.model().rowCount() > 0:
-        #    idx = self.model().index(0)
         if idx.isValid():
             data = unicode(self.model().data(idx, Qt.DisplayRole))
             self.completion_selected.emit(data)
@@ -81,6 +79,9 @@ class CompleteWindow(QListView): # {{{
                 self.hide()
                 return True
             elif key in (Qt.Key_Enter, Qt.Key_Return, Qt.Key_Tab):
+                if key == Qt.Key_Tab and not self.currentIndex().isValid():
+                    if self.model().rowCount() > 0:
+                        self.setCurrentIndex(self.model().index(0))
                 self.do_selected()
                 return True
             elif key in (Qt.Key_Up, Qt.Key_Down, Qt.Key_PageUp,