merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-02-06 13:31:47 +08:00 · 2011-02-06 13:31:47 +08:00 · 6c495f6a5a
commit 6c495f6a5a
parent 7c156c7062 47c25481f2
22 changed files with 381 additions and 399 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -55,7 +55,7 @@
    - title: "Add search to the plugin preferences dialog"
  bug fixes:
-    - title: "Fix a bug that could cause fiels to be lost when changing metadata on east asian windows installs if the title and/or author is very long."
+    - title: "Fix a bug that could cause files to be lost when changing metadata on east asian windows installs if the title and/or author is very long."
      tickets: [8620]
    - title: "Tag browser: Fix searching with items in a user category not owrking if the main category is hidden"
@ -88,7 +88,7 @@
    - title: "Do not discard the result of a conversion if the user opens the edit metadata dialog while the conversion is running"
      tickets: [8672]
-    - title: "CHM Input: When the chm file lacks a hhc, lookf for index.html instead"
+    - title: "CHM Input: When the chm file lacks a hhc, look for index.html instead"
      tickets: [8688]
    - title: "EPUB Input: Filter some invalid media types from the spine"
--- a/resources/images/news/kopalniawiedzy.png
+++ b/resources/images/news/kopalniawiedzy.png
--- a/resources/images/news/korespondent.png
+++ b/resources/images/news/korespondent.png
--- a/resources/jacket/stylesheet.css
+++ b/resources/jacket/stylesheet.css
@ -113,8 +113,8 @@ table.cbj_header tr.cbj_series {
 	/* display:none; */
 	}
-table.cbj_header tr.cbj_pubdate {
+table.cbj_header tr.cbj_pubdata {
-	/* Uncomment the next line to remove 'Published' from banner section */
+	/* Uncomment the next line to remove 'Published (year of publication)' from banner section */
 	/* display:none; */
 	}
--- a/resources/recipes/kopalniawiedzy.recipe
+++ b/resources/recipes/kopalniawiedzy.recipe
@ -0,0 +1,80 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2011, Attis <attis@attis.one.pl>'
 __version__ = 'v. 0.1'
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class KopalniaWiedzy(BasicNewsRecipe):
 		title          = u'Kopalnia Wiedzy'
 		publisher      = u'Kopalnia Wiedzy'
 		description    = u'Ciekawostki ze świata nauki i techniki'
 		encoding       = 'utf-8'
 		__author__     = 'Attis'
 		language       = 'pl'
 		oldest_article = 7
 		max_articles_per_feed = 100
 		INDEX          = u'http://kopalniawiedzy.pl/'
 		remove_javascript     = True
 		no_stylesheets        = True
 		remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'} }]
 		remove_tags_after = dict(attrs={'class':'ad-square'})
 		keep_only_tags    = [dict(name="div", attrs={'id':'articleContent'})]
 		extra_css      = '.topimage {margin-top: 30px}'
 		preprocess_regexps = [
 				(re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
 				lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
 				(re.compile(u'<br  /><br  />'),
 				lambda match: '<br\/>')
 			]
 		feeds = [
 			(u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
 			(u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
 			(u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
 			(u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
 			(u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
 			(u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
 		]
 		def is_link_wanted(self, url, tag):
 			return tag['class'] == 'next'
 		def remove_beyond(self, tag, next):
 				while tag is not None and getattr(tag, 'name', None) != 'body':
 						after = getattr(tag, next)
 						while after is not None:
 								ns = getattr(tag, next)
 								after.extract()
 								after = ns
 						tag = tag.parent
 		def append_page(self, soup, appendtag, position):
 				pager = soup.find('a',attrs={'class':'next'})
 				if pager:
 					nexturl = self.INDEX + pager['href']
 					soup2 = self.index_to_soup(nexturl)
 					texttag = soup2.find('div', attrs={'id':'articleContent'})
 					tag = texttag.find(attrs={'class':'pages'})
 					self.remove_beyond(tag, 'nextSibling')
 					newpos = len(texttag.contents)
 					self.append_page(soup2,texttag,newpos)
 					appendtag.insert(position,texttag)
 		def preprocess_html(self, soup):
 				self.append_page(soup, soup.body, 3)
 				for item in soup.findAll('div',attrs={'class':'pages'}):
 					item.extract()
 				for item in soup.findAll('p', attrs={'class':'wykop'}):
 					item.extract()
 				return soup
--- a/resources/recipes/korespondent.recipe
+++ b/resources/recipes/korespondent.recipe
@ -0,0 +1,40 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2011, Attis <attis@attis.one.pl>'
 __version__ = 'v. 0.1'
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class KorespondentPL(BasicNewsRecipe):
    title          = u'Korespondent.pl'
    publisher      = u'Korespondent.pl'
    description    = u'Centrum wolnorynkowe - serwis ludzi wolnych'
    encoding       = 'utf-8'
    __author__     = 'Attis'
    language       = 'pl'
    oldest_article = 15
    max_articles_per_feed = 100
    remove_javascript     = True
    no_stylesheets        = True
    keep_only_tags = [dict(name='div', attrs={'class':'publicystyka'})]
    remove_tags    = [{'name': 'meta'}, {'name':'div', 'attrs': {'class': 'zdjecie'} }]
    extra_css      = '.naglowek {font-size: small}\n .tytul {font-size: x-large; padding-bottom: 10px; padding-top: 30px} \n .external {font-size: small}'
    preprocess_regexps = [
 			(re.compile(u'<a href="index\.php.*>(.*)</a>'),
 			lambda match: match.group(1) ),
 			(re.compile(u'<i>'),
 			lambda match:'<i class="external">' ),
 			(re.compile(u'<p></p>Więcej'),
 			lambda match:'Więcej' ),
 			(re.compile(u'target="_blank"'),
 			lambda match:'target="_blank" class="external"' ),
 			(re.compile(u'<p align="center">\nPoczytaj inne teksty w <a href="http://www.korespondent.pl">Serwisie wolnorynkowym Korespondent.pl</a>.*</body>', re.DOTALL|re.IGNORECASE),
 			lambda match: '</div></body>'),
 			]
    feeds = [(u'Serwis informacyjny', u'http://korespondent.pl/rss.xml')]
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -149,17 +149,17 @@ class HeuristicProcessor(object):
        ]
        ITALICIZE_STYLE_PATS = [
-            r'(?msu)(?<=\s)_(?P<words>\S[^_]{0,40}?\S)?_(?=[\s\.,\!\?])',
+            r'(?msu)(?<=[\s>])_(?P<words>[^_]+)?_',
-            r'(?msu)(?<=\s)/(?P<words>\S[^/]{0,40}?\S)?/(?=[\s\.,\!\?])',
+            r'(?msu)(?<=[\s>])/(?P<words>[^/]+)?/',
-            r'(?msu)(?<=\s)~~(?P<words>\S[^~]{0,40}?\S)?~~(?=[\s\.,\!\?])',
+            r'(?msu)(?<=[\s>])~~(?P<words>[^~]+)?~~',
-            r'(?msu)(?<=\s)\*(?P<words>\S[^\*]{0,40}?\S)?\*(?=[\s\.,\!\?])',
+            r'(?msu)(?<=[\s>])\*(?P<words>[^\*]+)?\*',
-            r'(?msu)(?<=\s)~(?P<words>\S[^~]{0,40}?\S)?~(?=[\s\.,\!\?])',
+            r'(?msu)(?<=[\s>])~(?P<words>[^~]+)?~',
-            r'(?msu)(?<=\s)_/(?P<words>\S[^/_]{0,40}?\S)?/_(?=[\s\.,\!\?])',
+            r'(?msu)(?<=[\s>])_/(?P<words>[^/_]+)?/_',
-            r'(?msu)(?<=\s)_\*(?P<words>\S[^\*_]{0,40}?\S)?\*_(?=[\s\.,\!\?])',
+            r'(?msu)(?<=[\s>])_\*(?P<words>[^\*_]+)?\*_',
-            r'(?msu)(?<=\s)\*/(?P<words>\S[^/\*]{0,40}?\S)?/\*(?=[\s\.,\!\?])',
+            r'(?msu)(?<=[\s>])\*/(?P<words>[^/\*]+)?/\*',
-            r'(?msu)(?<=\s)_\*/(?P<words>\S[^\*_]{0,40}?\S)?/\*_(?=[\s\.,\!\?])',
+            r'(?msu)(?<=[\s>])_\*/(?P<words>[^\*_]+)?/\*_',
-            r'(?msu)(?<=\s)/:(?P<words>\S[^:/]{0,40}?\S)?:/(?=[\s\.,\!\?])',
+            r'(?msu)(?<=[\s>])/:(?P<words>[^:/]+)?:/',
-            r'(?msu)(?<=\s)\|:(?P<words>\S[^:\|]{0,40}?\S)?:\|(?=[\s\.,\!\?])',
+            r'(?msu)(?<=[\s>])\|:(?P<words>[^:\|]+)?:\|',
        ]
        for word in ITALICIZE_WORDS:
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@ -109,7 +109,7 @@ def get_rating(rating, rchar, e_rchar):
 def render_jacket(mi, output_profile,
        alt_title=_('Unknown'), alt_tags=[], alt_comments='',
-        alt_publisher=('Unknown publisher')):
+        alt_publisher=('')):
    css = P('jacket/stylesheet.css', data=True).decode('utf-8')
    try:
@ -127,7 +127,7 @@ def render_jacket(mi, output_profile,
    try:
        publisher = mi.publisher if mi.publisher else alt_publisher
    except:
-        publisher = _('Unknown publisher')
+        publisher = ''
    try:
        pubdate = strftime(u'%Y', mi.pubdate.timetuple())
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -226,7 +226,7 @@ class ParseRtf:
        try:
            return_value = process_tokens_obj.process_tokens()
        except InvalidRtfException, msg:
-            #Check to see if the file is correctly encoded
+            # Check to see if the file is correctly encoded
            encode_obj = default_encoding.DefaultEncoding(
            in_file = self.__temp_file,
            run_level = self.__run_level,
@ -237,14 +237,14 @@ class ParseRtf:
            check_encoding_obj = check_encoding.CheckEncoding(
                    bug_handler = RtfInvalidCodeException,
                        )
-            enc = 'cp' + encode_obj.get_codepage()
+            enc = encode_obj.get_codepage()
-            if enc == 'cp10000':
+            if enc != 'mac_roman':
-                enc = 'mac_roman'
+                enc = 'cp' + enc
-            msg = 'Exception in token processing'
+            msg = '%s\nException in token processing' % str(msg)
            if check_encoding_obj.check_encoding(self.__file, enc):
                file_name = self.__file if isinstance(self.__file, str) \
                                    else self.__file.encode('utf-8')
-                msg = 'File %s does not appear to be correctly encoded.\n' % file_name
+                msg +='\nFile %s does not appear to be correctly encoded.\n' % file_name
            try:
                os.remove(self.__temp_file)
            except OSError:
--- a/src/calibre/ebooks/rtf2xml/colors.py
+++ b/src/calibre/ebooks/rtf2xml/colors.py
@ -210,7 +210,7 @@ class Colors:
            hex_num = self.__color_dict.get(num)
        if hex_num is None:
            hex_num = '0'
-            if self.__run_level > 5:
+            if self.__run_level > 3:
                msg = 'no value in self.__color_dict' \
                'for key %s at line %d\n' % (num, self.__line)
                raise self.__bug_handler, msg
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@ -786,21 +786,23 @@ class ProcessTokens:
                    token = line.replace("\n","")
                    line_count += 1
                    if line_count == 1 and token != '\\{':
-                            msg = 'Invalid RTF: document doesn\'t start with {\n'
+                            msg = '\nInvalid RTF: document doesn\'t start with {\n'
                            raise self.__exception_handler, msg
                    elif line_count == 2 and token[0:4] != '\\rtf':
-                            msg = 'Invalid RTF: document doesn\'t start with \\rtf \n'
+                            msg = '\nInvalid RTF: document doesn\'t start with \\rtf \n'
                            raise self.__exception_handler, msg
                    the_index = token.find('\\ ')
                    if token is not None and  the_index > -1:
-                        msg = 'Invalid RTF: token "\\ " not valid.\n'
+                        msg = '\nInvalid RTF: token "\\ " not valid.\nError at line %d'\
                            % line_count
                        raise self.__exception_handler, msg
                    elif token[:1] == "\\":
                        try:
                            token.decode('us-ascii')
                        except UnicodeError, msg:
-                            msg = 'Invalid RTF: Tokens not ascii encoded.\n%s' % str(msg)
+                            msg = '\nInvalid RTF: Tokens not ascii encoded.\n%s\nError at line %d'\
                                % (str(msg), line_count)
                            raise self.__exception_handler, msg
                        line = self.process_cw(token)
                        if line is not None:
@ -816,7 +818,7 @@ class ProcessTokens:
                                write_obj.write('tx<nu<__________<%s\n' % field)
        if not line_count:
-            msg = 'Invalid RTF: file appears to be empty.\n'
+            msg = '\nInvalid RTF: file appears to be empty.\n'
            raise self.__exception_handler, msg
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
@ -827,7 +829,7 @@ class ProcessTokens:
        bad_brackets = self.__check_brackets(self.__file)
        if bad_brackets:
-            msg = 'Invalid RTF: document does not have matching brackets.\n'
+            msg = '\nInvalid RTF: document does not have matching brackets.\n'
            raise self.__exception_handler, msg
        else:
            return self.__return_code
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@ -117,6 +117,7 @@ class Tokenize:
        input_file = self.__replace_spchar.mreplace(input_file)
        # this is for older RTF
        input_file = self.__par_exp.sub('\n\\par \n', input_file)
        input_file = self.__cwdigit_exp.sub("\g<1>\n\g<2>", input_file)
        input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
        input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
        #remove \n in bin data
@ -139,17 +140,17 @@ class Tokenize:
            "\\_": "\\_ ",
            "\\:": "\\: ",
            "\\-": "\\- ",
-            # turn into a generic token to eliminate special
+            #turn into a generic token to eliminate special
-            # cases and make processing easier
+            #cases and make processing easier
            "\\{": "\\ob ",
-            # turn into a generic token to eliminate special
+            #turn into a generic token to eliminate special
-            # cases and make processing easier
+            #cases and make processing easier
            "\\}": "\\cb ",
-            # put a backslash in front of to eliminate special cases and
+            #put a backslash in front of to eliminate special cases and
-            # make processing easier
+            #make processing easier
            "{": "\\{",
-            # put a backslash in front of to eliminate special cases and
+            #put a backslash in front of to eliminate special cases and
-            # make processing easier
+            #make processing easier
            "}": "\\}",
            }
        self.__replace_spchar = MReplace(SIMPLE_RPL)
@ -165,21 +166,9 @@ class Tokenize:
        #remove \n from endline char
        self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
        #this is for old RTF
-        self.__par_exp = re.compile(r'\\\n+')
+        self.__par_exp = re.compile(r'(\\\n+|\\ )')
        #handle cw using a digit as argument and without space as delimiter
        self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
        #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
        #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
        #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
        #self.__remove_line = re.compile(r'\n+')
        ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
    def __correct_spliting(self, token):
        match_obj = re.search(self.__cwdigit_exp, token)
        if match_obj is None:
            return token
        else:
            return '%s\n%s' % (match_obj.group(1), match_obj.group(2))
    def tokenize(self):
        """Main class for handling other methods. Reads the file \
@ -196,8 +185,6 @@ class Tokenize:
        tokens = map(self.__unicode_process, tokens)
        #remove empty items created by removing \uc
        tokens = filter(lambda x: len(x) > 0, tokens)
        #handles bothersome cases
        tokens = map(self.__correct_spliting, tokens)
        #write
        with open(self.__write_to, 'wb') as write_obj:
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -12,7 +12,7 @@ from calibre.ebooks.chardet import detect
 from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
    separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
    preserve_spaces, detect_paragraph_type, detect_formatting_type, \
-    normalize_line_endings, convert_textile
+    normalize_line_endings, convert_textile, remove_indents
 from calibre import _ent_pat, xml_entity_to_unicode
 class TXTInput(InputFormatPlugin):
@ -47,6 +47,9 @@ class TXTInput(InputFormatPlugin):
        OptionRecommendation(name='preserve_spaces', recommended_value=False,
            help=_('Normally extra spaces are condensed into a single space. '
                'With this option all spaces will be displayed.')),
        OptionRecommendation(name='txt_in_remove_indents', recommended_value=False,
            help=_('Normally extra space at the beginning of lines is retained. '
                   'With this option they will be removed.')),
        OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
            help=_('Do not insert a Table of Contents into the output text.')),
    ])
@ -77,20 +80,6 @@ class TXTInput(InputFormatPlugin):
        # Normalize line endings
        txt = normalize_line_endings(txt)
        # Detect formatting
        if options.formatting_type == 'auto':
            options.formatting_type = detect_formatting_type(txt)
            log.debug('Auto detected formatting as %s' % options.formatting_type)
        if options.formatting_type == 'heuristic':
            setattr(options, 'enable_heuristics', True)
            setattr(options, 'markup_chapter_headings', True)
            setattr(options, 'italicize_common_cases', True)
            setattr(options, 'fix_indents', True)
            setattr(options, 'delete_blank_paragraphs', True)
            setattr(options, 'format_scene_breaks', True)
            setattr(options, 'dehyphenate', True)
        # Determine the paragraph type of the document.
        if options.paragraph_type == 'auto':
            options.paragraph_type = detect_paragraph_type(txt)
@ -100,15 +89,23 @@ class TXTInput(InputFormatPlugin):
            else:
                log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
        # Detect formatting
        if options.formatting_type == 'auto':
            options.formatting_type = detect_formatting_type(txt)
            log.debug('Auto detected formatting as %s' % options.formatting_type)
        if options.formatting_type == 'heuristic':
            setattr(options, 'enable_heuristics', True)
            setattr(options, 'unwrap_lines', False)
        if options.txt_in_remove_indents:
            txt = remove_indents(txt)
        # Preserve spaces will replace multiple spaces to a space
        # followed by the &nbsp; entity.
        if options.preserve_spaces:
            txt = preserve_spaces(txt)
        # Get length for hyphen removal and punctuation unwrap
        docanalysis = DocAnalysis('txt', txt)
        length = docanalysis.line_length(.5)
        # Reformat paragraphs to block formatting based on the detected type.
        # We don't check for block because the processor assumes block.
        # single and print at transformed to block for processing.
@ -119,9 +116,17 @@ class TXTInput(InputFormatPlugin):
        elif options.paragraph_type == 'unformatted':
            from calibre.ebooks.conversion.utils import HeuristicProcessor
            # unwrap lines based on punctuation
            docanalysis = DocAnalysis('txt', txt)
            length = docanalysis.line_length(.5)
            preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None))
            txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
        if getattr(options, 'enable_heuristics', False) and getattr(options, 'dehyphenate', False):
            docanalysis = DocAnalysis('txt', txt)
            length = docanalysis.line_length(.5)
            dehyphenator = Dehyphenator(options.verbose, log=self.log)
            txt = dehyphenator(txt,'txt', length)
        # Process the text using the appropriate text processor.
        html = ''
        if options.formatting_type == 'markdown':
@ -134,14 +139,8 @@ class TXTInput(InputFormatPlugin):
        elif options.formatting_type == 'textile':
            log.debug('Running text through textile conversion...')
            html = convert_textile(txt)
        else:
            log.debug('Running text through basic conversion...')
            if options.formatting_type == 'heuristic':
                # Dehyphenate
                dehyphenator = Dehyphenator(options.verbose, log=self.log)
                txt = dehyphenator(txt,'txt', length)
            flow_size = getattr(options, 'flow_size', 0)
            html = convert_basic(txt, epub_split_size_kb=flow_size)
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -24,14 +24,14 @@ def clean_txt(txt):
    # all line breaks with \n.
    txt = '\n'.join([line.rstrip() for line in txt.splitlines()])
-    # Replace whitespace at the beginning of the list with &nbsp;
+    # Replace whitespace at the beginning of the line with &nbsp;
-    txt = re.sub('(?m)(?P<space>[ ]+)', lambda mo: '&nbsp;' * mo.groups('space').count(' '), txt)
+    txt = re.sub('(?m)(?P<space>^[ ]+)(?=.)', lambda mo: '&nbsp;' * mo.groups('space').count(' '), txt)
-    txt = re.sub('(?m)(?P<space>[\t]+)', lambda mo: '&nbsp;' * 4 * mo.groups('space').count('\t'), txt)
+    txt = re.sub('(?m)(?P<space>^[\t]+)(?=.)', lambda mo: '&nbsp;' * 4 * mo.groups('space').count('\t'), txt)
    # Condense redundant spaces
    txt = re.sub('[ ]{2,}', ' ', txt)
-    # Remove blank lines from the beginning and end of the document.
+    # Remove blank space from the beginning and end of the document.
    txt = re.sub('^\s+(?=.)', '', txt)
    txt = re.sub('(?<=.)\s+$', '', txt)
    # Remove excessive line breaks.
@ -107,6 +107,10 @@ def preserve_spaces(txt):
    txt = txt.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')
    return txt
 def remove_indents(txt):
    txt = re.sub('(?miu)^\s+', '', txt)
    return txt
 def opf_writer(path, opf_name, manifest, spine, mi):
    opf = OPFCreator(path, mi)
    opf.create_manifest(manifest)
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@ -55,6 +55,7 @@ class TXTMLizer(object):
        self.log.info('Converting XHTML to TXT...')
        self.oeb_book = oeb_book
        self.opts = opts
        self.toc_titles = []
        self.toc_ids = []
        self.last_was_heading = False
@ -94,8 +95,8 @@ class TXTMLizer(object):
        if getattr(self.opts, 'inline_toc', None):
            self.log.debug('Generating table of contents...')
            toc.append(u'%s\n\n' % _(u'Table of Contents:'))
-            for item in self.oeb_book.toc:
+            for item in self.toc_titles:
-                toc.append(u'* %s\n\n' % item.title)
+                toc.append(u'* %s\n\n' % item)
        return ''.join(toc)
    def create_flat_toc(self, nodes):
@ -103,6 +104,7 @@ class TXTMLizer(object):
        Turns a hierarchical list of TOC href's into a flat list.
        '''
        for item in nodes:
            self.toc_titles.append(item.title)
            self.toc_ids.append(item.href)
            self.create_flat_toc(item.nodes)
--- a/src/calibre/gui2/actions/device.py
+++ b/src/calibre/gui2/actions/device.py
@ -94,6 +94,7 @@ class ShareConnMenu(QMenu): # {{{
                            I('mail.png'), _('Email to') + ' ' +account)
                    self.addAction(ac)
                    self.email_actions.append(ac)
                    ac.a_s.connect(sync_menu.action_triggered)
                action1.a_s.connect(sync_menu.action_triggered)
                action2.a_s.connect(sync_menu.action_triggered)
            ac = self.addMenu(self.email_to_and_delete_menu)
--- a/src/calibre/gui2/complete.py
+++ b/src/calibre/gui2/complete.py
@ -6,157 +6,38 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-from PyQt4.Qt import QLineEdit, QListView, QAbstractListModel, Qt, QTimer, \
+from PyQt4.Qt import QLineEdit, QAbstractListModel, Qt, \
-        QApplication, QPoint, QItemDelegate, QStyleOptionViewItem, \
+        QApplication, QCompleter
        QStyle, QEvent, pyqtSignal
 from calibre.utils.config import tweaks
 from calibre.utils.icu import sort_key, lower
 from calibre.gui2 import NONE
 from calibre.gui2.widgets import EnComboBox
 class CompleterItemDelegate(QItemDelegate): # {{{
    ''' Renders the current item as thought it were selected '''
    def __init__(self, view):
        self.view = view
        QItemDelegate.__init__(self, view)
    def paint(self, p, opt, idx):
        opt = QStyleOptionViewItem(opt)
        opt.showDecorationSelected = True
        if self.view.currentIndex() == idx:
            opt.state |= QStyle.State_HasFocus
        QItemDelegate.paint(self, p, opt, idx)
 # }}}
 class CompleteWindow(QListView): # {{{
    '''
    The completion popup. For keyboard and mouse handling see
    :meth:`eventFilter`.
    '''
    #: This signal is emitted when the user selects one of the listed
    #: completions, by mouse or keyboard
    completion_selected = pyqtSignal(object)
    def __init__(self, widget, model):
        self.widget = widget
        QListView.__init__(self)
        self.setVisible(False)
        self.setParent(None, Qt.Popup)
        self.setAlternatingRowColors(True)
        self.setFocusPolicy(Qt.NoFocus)
        self._d = CompleterItemDelegate(self)
        self.setItemDelegate(self._d)
        self.setModel(model)
        self.setFocusProxy(widget)
        self.installEventFilter(self)
        self.clicked.connect(self.do_selected)
        self.entered.connect(self.do_entered)
        self.setMouseTracking(True)
    def do_entered(self, idx):
        if idx.isValid():
            self.setCurrentIndex(idx)
    def do_selected(self, idx=None):
        idx = self.currentIndex() if idx is None else idx
        if idx.isValid():
            data = unicode(self.model().data(idx, Qt.DisplayRole))
            self.completion_selected.emit(data)
        self.hide()
    def eventFilter(self, o, e):
        if o is not self:
            return False
        if e.type() == e.KeyPress:
            key = e.key()
            if key in (Qt.Key_Escape, Qt.Key_Backtab) or \
                    (key == Qt.Key_F4 and (e.modifiers() & Qt.AltModifier)):
                self.hide()
                return True
            elif key in (Qt.Key_Enter, Qt.Key_Return, Qt.Key_Tab):
                if key == Qt.Key_Tab and not self.currentIndex().isValid():
                    if self.model().rowCount() > 0:
                        self.setCurrentIndex(self.model().index(0))
                self.do_selected()
                return True
            elif key in (Qt.Key_Up, Qt.Key_Down, Qt.Key_PageUp,
                    Qt.Key_PageDown):
                return False
            # Send key event to associated line edit
            self.widget.eat_focus_out = False
            try:
                self.widget.event(e)
            finally:
                self.widget.eat_focus_out = True
            if not self.widget.hasFocus():
                # Line edit lost focus
                self.hide()
            if e.isAccepted():
                # Line edit consumed event
                return True
        elif e.type() == e.MouseButtonPress:
            # Hide popup if user clicks outside it, otherwise
            # pass event to popup
            if not self.underMouse():
                self.hide()
                return True
        elif e.type() in (e.InputMethod, e.ShortcutOverride):
            QApplication.sendEvent(self.widget, e)
        return False # Do not filter this event
 # }}}
 class CompleteModel(QAbstractListModel):
    def __init__(self, parent=None):
        QAbstractListModel.__init__(self, parent)
        self.sep = ','
        self.space_before_sep = False
        self.items = []
        self.lowered_items = []
        self.matches = []
    def set_items(self, items):
        items = [unicode(x.strip()) for x in items]
        self.items = list(sorted(items, key=lambda x: sort_key(x)))
        self.lowered_items = [lower(x) for x in self.items]
        self.matches = []
        self.reset()
    def rowCount(self, *args):
-        return len(self.matches)
+        return len(self.items)
    def data(self, index, role):
        if role == Qt.DisplayRole:
            r = index.row()
            try:
-                return self.matches[r]
+                return self.items[r]
            except IndexError:
                pass
        return NONE
    def get_matches(self, prefix):
        '''
        Return all matches that (case insensitively) start with prefix
        '''
        prefix = lower(prefix)
        ans = []
        if prefix:
            for i, test in enumerate(self.lowered_items):
                if test.startswith(prefix):
                    ans.append(self.items[i])
        return ans
    def update_matches(self, matches):
        self.matches = matches
        self.reset()
 class MultiCompleteLineEdit(QLineEdit):
    '''
@ -170,16 +51,26 @@ class MultiCompleteLineEdit(QLineEdit):
    '''
    def __init__(self, parent=None):
        self.eat_focus_out = True
        self.max_visible_items = 7
        self.current_prefix = None
        QLineEdit.__init__(self, parent)
        self.sep = ','
        self.space_before_sep = False
        self._model = CompleteModel(parent=self)
-        self.complete_window = CompleteWindow(self, self._model)
+        self._completer = c = QCompleter(self._model, self)
        c.setWidget(self)
        c.setCompletionMode(QCompleter.PopupCompletion)
        c.setCaseSensitivity(Qt.CaseInsensitive)
        c.setModelSorting(QCompleter.CaseInsensitivelySortedModel)
        c.setCompletionRole(Qt.DisplayRole)
        p = c.popup()
        p.setMouseTracking(True)
        p.entered.connect(self.item_entered)
        c.popup().setAlternatingRowColors(True)
        c.activated.connect(self.completion_selected,
                type=Qt.QueuedConnection)
        self.textEdited.connect(self.text_edited)
        self.complete_window.completion_selected.connect(self.completion_selected)
        self.installEventFilter(self)
    # Interface {{{
    def update_items_cache(self, complete_items):
@ -193,33 +84,23 @@ class MultiCompleteLineEdit(QLineEdit):
    # }}}
-    def eventFilter(self, o, e):
+    def item_entered(self, idx):
-        if self.eat_focus_out and o is self and e.type() == QEvent.FocusOut:
+        self._completer.popup().setCurrentIndex(idx)
            if self.complete_window.isVisible():
                return True # Filter this event since the cw is visible
        return QLineEdit.eventFilter(self, o, e)
    def hide_completion_window(self):
        self.complete_window.hide()
    def text_edited(self, *args):
        self.update_completions()
        self._completer.complete()
    def update_completions(self):
        ' Update the list of completions '
        if not self.complete_window.isVisible() and not self.hasFocus():
            return
        cpos = self.cursorPosition()
        text = unicode(self.text())
        prefix = text[:cpos]
        self.current_prefix = prefix
        complete_prefix = prefix.lstrip()
        if self.sep:
-            complete_prefix = prefix = prefix.split(self.sep)[-1].lstrip()
+            complete_prefix = prefix.split(self.sep)[-1].lstrip()
-
+        self._completer.setCompletionPrefix(complete_prefix)
        matches = self._model.get_matches(complete_prefix)
        self.update_complete_window(matches)
    def get_completed_text(self, text):
        '''
@ -246,7 +127,7 @@ class MultiCompleteLineEdit(QLineEdit):
    def completion_selected(self, text):
-        prefix_len, ctext = self.get_completed_text(text)
+        prefix_len, ctext = self.get_completed_text(unicode(text))
        if self.sep is None:
            self.setText(ctext)
            self.setCursorPosition(len(ctext))
@ -255,60 +136,6 @@ class MultiCompleteLineEdit(QLineEdit):
            self.setText(ctext)
            self.setCursorPosition(cursor_pos - prefix_len + len(text))
    def update_complete_window(self, matches):
        self._model.update_matches(matches)
        if matches:
            self.show_complete_window()
        else:
            self.complete_window.hide()
    def position_complete_window(self):
        popup = self.complete_window
        screen = QApplication.desktop().availableGeometry(self)
        h = (popup.sizeHintForRow(0) * min(self.max_visible_items,
            popup.model().rowCount()) + 3) + 3
        hsb = popup.horizontalScrollBar()
        if hsb and hsb.isVisible():
            h += hsb.sizeHint().height()
        rh = self.height()
        pos = self.mapToGlobal(QPoint(0, self.height() - 2))
        w = self.width()
        if w > screen.width():
            w = screen.width()
        if (pos.x() + w) > (screen.x() + screen.width()):
            pos.setX(screen.x() + screen.width() - w)
        if (pos.x() < screen.x()):
            pos.setX(screen.x())
        top = pos.y() - rh - screen.top() + 2
        bottom = screen.bottom() - pos.y()
        h = max(h, popup.minimumHeight())
        if h > bottom:
            h = min(max(top, bottom), h)
            if top > bottom:
                pos.setY(pos.y() - h - rh + 2)
        popup.setGeometry(pos.x(), pos.y(), w, h)
    def show_complete_window(self):
        self.position_complete_window()
        self.complete_window.show()
    def moveEvent(self, ev):
        ret = QLineEdit.moveEvent(self, ev)
        QTimer.singleShot(0, self.position_complete_window)
        return ret
    def resizeEvent(self, ev):
        ret = QLineEdit.resizeEvent(self, ev)
        QTimer.singleShot(0, self.position_complete_window)
        return ret
    @dynamic_property
    def all_items(self):
        def fget(self):
@ -317,22 +144,6 @@ class MultiCompleteLineEdit(QLineEdit):
            self._model.set_items(items)
        return property(fget=fget, fset=fset)
    @dynamic_property
    def sep(self):
        def fget(self):
            return self._model.sep
        def fset(self, val):
            self._model.sep = val
        return property(fget=fget, fset=fset)
    @dynamic_property
    def space_before_sep(self):
        def fget(self):
            return self._model.space_before_sep
        def fset(self, val):
            self._model.space_before_sep = val
        return property(fget=fget, fset=fset)
 class MultiCompleteComboBox(EnComboBox):
    def __init__(self, *args):
--- a/src/calibre/gui2/convert/txt_input.py
+++ b/src/calibre/gui2/convert/txt_input.py
@ -16,7 +16,8 @@ class PluginWidget(Widget, Ui_Form):
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent,
-            ['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
+            ['paragraph_type', 'formatting_type', 'markdown_disable_toc',
             'preserve_spaces', 'txt_in_remove_indents'])
        self.db, self.book_id = db, book_id
        for x in get_option('paragraph_type').option.choices:
            self.opt_paragraph_type.addItem(x)
--- a/src/calibre/gui2/convert/txt_input.ui
+++ b/src/calibre/gui2/convert/txt_input.ui
@ -7,57 +7,95 @@
    <x>0</x>
    <y>0</y>
    <width>518</width>
-    <height>300</height>
+    <height>353</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>Form</string>
  </property>
  <layout class="QVBoxLayout" name="verticalLayout_3">
   <item>
    <widget class="QGroupBox" name="groupBox_3">
     <property name="title">
      <string>Structure</string>
     </property>
     <layout class="QGridLayout" name="gridLayout">
      <item row="0" column="0">
       <widget class="QLabel" name="label_2">
        <property name="sizePolicy">
         <sizepolicy hsizetype="Minimum" vsizetype="Preferred">
          <horstretch>0</horstretch>
          <verstretch>0</verstretch>
         </sizepolicy>
        </property>
        <property name="text">
         <string>Paragraph style:</string>
        </property>
       </widget>
      </item>
      <item row="0" column="1">
-    <widget class="QComboBox" name="opt_paragraph_type"/>
+       <widget class="QComboBox" name="opt_paragraph_type">
        <property name="sizePolicy">
         <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
          <horstretch>0</horstretch>
          <verstretch>0</verstretch>
         </sizepolicy>
        </property>
       </widget>
      </item>
-   <item row="5" column="0" colspan="2">
+      <item row="1" column="0">
       <widget class="QLabel" name="label_3">
        <property name="sizePolicy">
         <sizepolicy hsizetype="Minimum" vsizetype="Preferred">
          <horstretch>0</horstretch>
          <verstretch>0</verstretch>
         </sizepolicy>
        </property>
        <property name="text">
         <string>Formatting style:</string>
        </property>
       </widget>
      </item>
      <item row="1" column="1">
       <widget class="QComboBox" name="opt_formatting_type">
        <property name="sizePolicy">
         <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
          <horstretch>0</horstretch>
          <verstretch>0</verstretch>
         </sizepolicy>
        </property>
       </widget>
      </item>
     </layout>
    </widget>
   </item>
   <item>
    <widget class="QGroupBox" name="groupBox_2">
     <property name="title">
      <string>Common</string>
     </property>
     <layout class="QVBoxLayout" name="verticalLayout_2">
      <item>
       <widget class="QCheckBox" name="opt_preserve_spaces">
        <property name="text">
         <string>Preserve &amp;spaces</string>
        </property>
       </widget>
      </item>
-   <item row="6" column="0" colspan="2">
+      <item>
-    <spacer name="verticalSpacer">
+       <widget class="QCheckBox" name="opt_txt_in_remove_indents">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
     </property>
     <property name="sizeHint" stdset="0">
      <size>
       <width>20</width>
       <height>213</height>
      </size>
     </property>
    </spacer>
   </item>
   <item row="1" column="1">
    <widget class="QComboBox" name="opt_formatting_type"/>
   </item>
   <item row="1" column="0">
    <widget class="QLabel" name="label_3">
        <property name="text">
-      <string>Formatting style:</string>
+         <string>Remove indents at the beginning of lines</string>
        </property>
       </widget>
      </item>
-   <item row="2" column="0" rowspan="2" colspan="2">
+     </layout>
    </widget>
   </item>
   <item>
    <widget class="QGroupBox" name="groupBox">
     <property name="title">
-      <string>Markdown Options</string>
+      <string>Markdown</string>
     </property>
     <layout class="QVBoxLayout" name="verticalLayout">
      <item>
@ -83,6 +121,19 @@
     </layout>
    </widget>
   </item>
   <item>
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
     </property>
     <property name="sizeHint" stdset="0">
      <size>
       <width>20</width>
       <height>213</height>
      </size>
     </property>
    </spacer>
   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/metadata/bulk_download.py
+++ b/src/calibre/gui2/metadata/bulk_download.py
@ -11,7 +11,7 @@ from threading import Thread
 from Queue import Queue, Empty
 from functools import partial
-from PyQt4.Qt import QObject, Qt, pyqtSignal, QTimer, QDialog, \
+from PyQt4.Qt import QObject, QTimer, QDialog, \
        QVBoxLayout, QTextBrowser, QLabel, QGroupBox, QDialogButtonBox
 from calibre.ebooks.metadata.fetch import search, get_social_metadata
@ -163,27 +163,23 @@ class DownloadMetadata(Thread):
 class DoDownload(QObject):
    idle_process = pyqtSignal()
    def __init__(self, parent, title, db, ids, get_covers, set_metadata=True,
            get_social_metadata=True):
        QObject.__init__(self, parent)
        self.pd = ProgressDialog(title, min=0, max=0, parent=parent)
        self.pd.canceled_signal.connect(self.cancel)
        self.idle_process.connect(self.do_one, type=Qt.QueuedConnection)
        self.downloader = None
        self.create = partial(DownloadMetadata, db, ids, get_covers,
                set_metadata=set_metadata,
                get_social_metadata=get_social_metadata)
        self.timer = QTimer(self)
        self.get_covers = get_covers
        self.timer.timeout.connect(self.do_one, type=Qt.QueuedConnection)
        self.db = db
        self.updated = set([])
        self.total = len(ids)
        self.keep_going = True
    def exec_(self):
-        self.timer.start(50)
+        QTimer.singleShot(50, self.do_one)
        ret = self.pd.exec_()
        if getattr(self.downloader, 'exception', None) is not None and \
                ret == self.pd.Accepted:
@ -194,11 +190,14 @@ class DoDownload(QObject):
        return ret
    def cancel(self, *args):
-        self.timer.stop()
+        self.keep_going = False
        self.downloader.keep_going = False
        self.pd.reject()
    def do_one(self):
        try:
            if not self.keep_going:
                return
            if self.downloader is None:
                self.downloader = self.create()
                self.downloader.start()
@ -210,7 +209,6 @@ class DoDownload(QObject):
            except Empty:
                pass
            if not self.downloader.is_alive():
            self.timer.stop()
                while True:
                    try:
                        r = self.downloader.results.get_nowait()
@ -218,6 +216,11 @@ class DoDownload(QObject):
                    except Empty:
                        break
                self.pd.accept()
                return
        except:
            self.cancel()
            raise
        QTimer.singleShot(50, self.do_one)
    def handle_result(self, r):
        id_, typ, ok, title = r
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -9,7 +9,7 @@ Logic for setting up conversion jobs
 import cPickle, os
-from PyQt4.Qt import QDialog, QProgressDialog, QString, QTimer, SIGNAL
+from PyQt4.Qt import QDialog, QProgressDialog, QString, QTimer
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.gui2 import warning_dialog, question_dialog
@ -24,7 +24,8 @@ from calibre.ebooks.conversion.config import GuiRecommendations, \
    load_defaults, load_specifics, save_specifics
 from calibre.gui2.convert import bulk_defaults_for_input_format
-def convert_single_ebook(parent, db, book_ids, auto_conversion=False, out_format=None):
+def convert_single_ebook(parent, db, book_ids, auto_conversion=False, # {{{
        out_format=None):
    changed = False
    jobs = []
    bad = []
@ -95,7 +96,9 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, out_format
            msg).exec_()
    return jobs, changed, bad
 # }}}
 # Bulk convert {{{
 def convert_bulk_ebook(parent, queue, db, book_ids, out_format=None, args=[]):
    total = len(book_ids)
    if total == 0:
@ -125,14 +128,11 @@ class QueueBulk(QProgressDialog):
        self.parent = parent
        self.use_saved_single_settings = use_saved_single_settings
        self.i, self.bad, self.jobs, self.changed = 0, [], [], False
-        self.timer = QTimer(self)
+        QTimer.singleShot(0, self.do_book)
        self.connect(self.timer, SIGNAL('timeout()'), self.do_book)
        self.timer.start()
        self.exec_()
    def do_book(self):
        if self.i >= len(self.book_ids):
            self.timer.stop()
            return self.do_queue()
        book_id = self.book_ids[self.i]
        self.i += 1
@ -191,6 +191,7 @@ class QueueBulk(QProgressDialog):
            self.setValue(self.i)
        except NoSupportedInputFormats:
            self.bad.append(book_id)
        QTimer.singleShot(0, self.do_book)
    def do_queue(self):
        self.hide()
@ -209,7 +210,9 @@ class QueueBulk(QProgressDialog):
        self.jobs.reverse()
        self.queue(self.jobs, self.changed, self.bad, *self.args)
-def fetch_scheduled_recipe(arg):
+# }}}
 def fetch_scheduled_recipe(arg): # {{{
    fmt = prefs['output_format'].lower()
    pt = PersistentTemporaryFile(suffix='_recipe_out.%s'%fmt.lower())
    pt.close()
@ -250,7 +253,9 @@ def fetch_scheduled_recipe(arg):
    return 'gui_convert', args, _('Fetch news from ')+arg['title'], fmt.upper(), [pt]
-def generate_catalog(parent, dbspec, ids, device_manager, db):
+# }}}
 def generate_catalog(parent, dbspec, ids, device_manager, db): # {{{
    from calibre.gui2.dialogs.catalog import Catalog
    # Build the Catalog dialog in gui2.dialogs.catalog
@ -308,8 +313,9 @@ def generate_catalog(parent, dbspec, ids, device_manager, db):
    # Which then calls gui2.convert.gui_conversion:gui_catalog() with the args inline
    return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \
            d.catalog_title
 # }}}
-def convert_existing(parent, db, book_ids, output_format):
+def convert_existing(parent, db, book_ids, output_format): # {{{
    already_converted_ids = []
    already_converted_titles = []
    for book_id in book_ids:
@ -325,3 +331,5 @@ def convert_existing(parent, db, book_ids, output_format):
            book_ids = [x for x in book_ids if x not in already_converted_ids]
    return book_ids
 # }}}
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -4442,6 +4442,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
            # Insert the link to the series or remove <a class="series">
            aTag = body.find('a', attrs={'class':'series_id'})
            if aTag:
                if book['series']:
                    if self.opts.generate_series:
                        aTag['href'] = "%s.html#%s_series" % ('BySeries',
@ -4449,39 +4450,31 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
                else:
                    aTag.extract()
-            # Insert the author link (always)
+            # Insert the author link
            aTag = body.find('a', attrs={'class':'author'})
-            if self.opts.generate_authors:
+            if self.opts.generate_authors and aTag:
                aTag['href'] = "%s.html#%s" % ("ByAlphaAuthor",
                                            self.generateAuthorAnchor(book['author']))
            if publisher == ' ':
                try:
                publisherTag = body.find('td', attrs={'class':'publisher'})
                if publisherTag:
                    publisherTag.contents[0].replaceWith('&nbsp;')
                except:
                    pass
            if not genres:
                try:
                genresTag = body.find('p',attrs={'class':'genres'})
                if genresTag:
                    genresTag.extract()
                except:
                    pass
            if not formats:
                try:
                formatsTag = body.find('p',attrs={'class':'formats'})
                if formatsTag:
                    formatsTag.extract()
                except:
                    pass
            if note_content == '':
                try:
                tdTag = body.find('td', attrs={'class':'notes'})
                if tdTag:
                    tdTag.contents[0].replaceWith('&nbsp;')
                except:
                    pass
            emptyTags = body.findAll('td', attrs={'class':'empty'})
            for mt in emptyTags: