From 626f1b25584705f2f0409ab63f52cf04e3324ad6 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 11 Jan 2011 07:49:08 -0500
Subject: [PATCH 01/55] TXT Input: Textile support.

---
 src/calibre/ebooks/textile/__init__.py  |   3 +
 src/calibre/ebooks/textile/functions.py | 981 ++++++++++++++++++++++++
 src/calibre/ebooks/txt/input.py         |  10 +-
 src/calibre/ebooks/txt/processor.py     |  19 +
 4 files changed, 1012 insertions(+), 1 deletion(-)
 create mode 100644 src/calibre/ebooks/textile/__init__.py
 create mode 100644 src/calibre/ebooks/textile/functions.py
diff --git a/src/calibre/ebooks/textile/__init__.py b/src/calibre/ebooks/textile/__init__.py
new file mode 100644
index 0000000000..eeaeb33940
--- /dev/null
+++ b/src/calibre/ebooks/textile/__init__.py
@@ -0,0 +1,3 @@
+from functions import textile, textile_restricted, Textile
+
+__all__ = ['textile', 'textile_restricted']
diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py
new file mode 100644
index 0000000000..ec70f591eb
--- /dev/null
+++ b/src/calibre/ebooks/textile/functions.py
@@ -0,0 +1,981 @@
+#!/usr/bin/env python
+"""
+PyTextile
+
+A Humane Web Text Generator
+"""
+
+__version__ = '2.1.4'
+
+__date__ = '2009/12/04'
+
+__copyright__ = """
+Copyright (c) 2009, Jason Samsa, http://jsamsa.com/
+Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
+Copyright (c) 2003, Mark Pilgrim, http://diveintomark.org/
+
+Original PHP Version:
+Copyright (c) 2003-2004, Dean Allen <dean@textism.com>
+All rights reserved.
+
+Thanks to Carlo Zottmann <carlo@g-blog.net> for refactoring
+Textile's procedural code into a class framework
+
+Additions and fixes Copyright (c) 2006 Alex Shiels http://thresholdstate.com/
+
+"""
+
+__license__ = """
+L I C E N S E
+=============
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name Textile nor the names of its contributors may be used to
+  endorse or promote products derived from this software without specific
+  prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+"""
+
+import re
+import uuid
+from urlparse import urlparse
+
+def _normalize_newlines(string):
+    out = re.sub(r'\r\n', '\n', string)
+    out = re.sub(r'\n{3,}', '\n\n', out)
+    out = re.sub(r'\n\s*\n', '\n\n', out)
+    out = re.sub(r'"$', '" ', out)
+    return out
+
+def getimagesize(url):
+    """
+    Attempts to determine an image's width and height, and returns a string
+    suitable for use in an <img> tag, or None in case of failure.
+    Requires that PIL is installed.
+
+    >>> getimagesize("http://www.google.com/intl/en_ALL/images/logo.gif")
+    ... #doctest: +ELLIPSIS, +SKIP
+    'width="..." height="..."'
+
+    """
+
+    try:
+        import ImageFile
+        import urllib2
+    except ImportError:
+        return None
+
+    try:
+        p = ImageFile.Parser()
+        f = urllib2.urlopen(url)
+        while True:
+            s = f.read(1024)
+            if not s:
+                break
+            p.feed(s)
+            if p.image:
+                return 'width="%i" height="%i"' % p.image.size
+    except (IOError, ValueError):
+        return None
+
+class Textile(object):
+    hlgn = r'(?:\<(?!>)|(?<!<)\>|\<\>|\=|[()]+(?! ))'
+    vlgn = r'[\-^~]'
+    clas = r'(?:\([^)]+\))'
+    lnge = r'(?:\[[^\]]+\])'
+    styl = r'(?:\{[^}]+\})'
+    cspn = r'(?:\\\d+)'
+    rspn = r'(?:\/\d+)'
+    a = r'(?:%s|%s)*' % (hlgn, vlgn)
+    s = r'(?:%s|%s)*' % (cspn, rspn)
+    c = r'(?:%s)*' % '|'.join([clas, styl, lnge, hlgn])
+
+    pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
+    # urlch = r'[\w"$\-_.+!*\'(),";/?:@=&%#{}|\\^~\[\]`]'
+    urlch = '[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]'
+
+    url_schemes = ('http', 'https', 'ftp', 'mailto')
+
+    btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p')
+    btag_lite = ('bq', 'bc', 'p')
+
+    glyph_defaults = (
+        ('txt_quote_single_open',  '&#8216;'),
+        ('txt_quote_single_close', '&#8217;'),
+        ('txt_quote_double_open',  '&#8220;'),
+        ('txt_quote_double_close', '&#8221;'),
+        ('txt_apostrophe',         '&#8217;'),
+        ('txt_prime',              '&#8242;'),
+        ('txt_prime_double',       '&#8243;'),
+        ('txt_ellipsis',           '&#8230;'),
+        ('txt_emdash',             '&#8212;'),
+        ('txt_endash',             '&#8211;'),
+        ('txt_dimension',          '&#215;'),
+        ('txt_trademark',          '&#8482;'),
+        ('txt_registered',         '&#174;'),
+        ('txt_copyright',          '&#169;'),
+    )
+
+    def __init__(self, restricted=False, lite=False, noimage=False):
+        """docstring for __init__"""
+        self.restricted = restricted
+        self.lite = lite
+        self.noimage = noimage
+        self.get_sizes = False
+        self.fn = {}
+        self.urlrefs = {}
+        self.shelf = {}
+        self.rel = ''
+        self.html_type = 'xhtml'
+
+    def textile(self, text, rel=None, head_offset=0, html_type='xhtml'):
+        """
+        >>> import textile
+        >>> textile.textile('some textile')
+        u'\\t<p>some textile</p>'
+        """
+        self.html_type = html_type
+
+        # text = unicode(text)
+        text = _normalize_newlines(text)
+
+        if self.restricted:
+            text = self.encode_html(text, quotes=False)
+
+        if rel:
+            self.rel = ' rel="%s"' % rel
+
+        text = self.getRefs(text)
+
+        text = self.block(text, int(head_offset))
+
+        text = self.retrieve(text)
+
+        return text
+
+    def pba(self, input, element=None):
+        """
+        Parse block attributes.
+
+        >>> t = Textile()
+        >>> t.pba(r'\3')
+        ''
+        >>> t.pba(r'\\3', element='td')
+        ' colspan="3"'
+        >>> t.pba(r'/4', element='td')
+        ' rowspan="4"'
+        >>> t.pba(r'\\3/4', element='td')
+        ' colspan="3" rowspan="4"'
+
+        >>> t.vAlign('^')
+        'top'
+
+        >>> t.pba('^', element='td')
+        ' style="vertical-align:top;"'
+
+        >>> t.pba('{line-height:18px}')
+        ' style="line-height:18px;"'
+
+        >>> t.pba('(foo-bar)')
+        ' class="foo-bar"'
+
+        >>> t.pba('(#myid)')
+        ' id="myid"'
+
+        >>> t.pba('(foo-bar#myid)')
+        ' class="foo-bar" id="myid"'
+
+        >>> t.pba('((((')
+        ' style="padding-left:4em;"'
+
+        >>> t.pba(')))')
+        ' style="padding-right:3em;"'
+
+        >>> t.pba('[fr]')
+        ' lang="fr"'
+
+        """
+        style = []
+        aclass = ''
+        lang = ''
+        colspan = ''
+        rowspan = ''
+        id = ''
+
+        if not input:
+            return ''
+
+        matched = input
+        if element == 'td':
+            m = re.search(r'\\(\d+)', matched)
+            if m:
+                colspan = m.group(1)
+
+            m = re.search(r'/(\d+)', matched)
+            if m:
+                rowspan = m.group(1)
+
+        if element == 'td' or element == 'tr':
+            m = re.search(r'(%s)' % self.vlgn, matched)
+            if m:
+                style.append("vertical-align:%s;" % self.vAlign(m.group(1)))
+
+        m = re.search(r'\{([^}]*)\}', matched)
+        if m:
+            style.append(m.group(1).rstrip(';') + ';')
+            matched = matched.replace(m.group(0), '')
+
+        m = re.search(r'\[([^\]]+)\]', matched, re.U)
+        if m:
+            lang = m.group(1)
+            matched = matched.replace(m.group(0), '')
+
+        m = re.search(r'\(([^()]+)\)', matched, re.U)
+        if m:
+            aclass = m.group(1)
+            matched = matched.replace(m.group(0), '')
+
+        m = re.search(r'([(]+)', matched)
+        if m:
+            style.append("padding-left:%sem;" % len(m.group(1)))
+            matched = matched.replace(m.group(0), '')
+
+        m = re.search(r'([)]+)', matched)
+        if m:
+            style.append("padding-right:%sem;" % len(m.group(1)))
+            matched = matched.replace(m.group(0), '')
+
+        m = re.search(r'(%s)' % self.hlgn, matched)
+        if m:
+            style.append("text-align:%s;" % self.hAlign(m.group(1)))
+
+        m = re.search(r'^(.*)#(.*)$', aclass)
+        if m:
+            id = m.group(2)
+            aclass = m.group(1)
+
+        if self.restricted:
+            if lang:
+                return ' lang="%s"'
+            else:
+                return ''
+
+        result = []
+        if style:
+            result.append(' style="%s"' % "".join(style))
+        if aclass:
+            result.append(' class="%s"' % aclass)
+        if lang:
+            result.append(' lang="%s"' % lang)
+        if id:
+            result.append(' id="%s"' % id)
+        if colspan:
+            result.append(' colspan="%s"' % colspan)
+        if rowspan:
+            result.append(' rowspan="%s"' % rowspan)
+        return ''.join(result)
+
+    def hasRawText(self, text):
+        """
+        checks whether the text has text not already enclosed by a block tag
+
+        >>> t = Textile()
+        >>> t.hasRawText('<p>foo bar biz baz</p>')
+        False
+
+        >>> t.hasRawText(' why yes, yes it does')
+        True
+
+        """
+        r = re.compile(r'<(p|blockquote|div|form|table|ul|ol|pre|h\d)[^>]*?>.*</\1>', re.S).sub('', text.strip()).strip()
+        r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r)
+        return '' != r
+
+    def table(self, text):
+        r"""
+        >>> t = Textile()
+        >>> t.table('|one|two|three|\n|a|b|c|')
+        '\t<table>\n\t\t<tr>\n\t\t\t<td>one</td>\n\t\t\t<td>two</td>\n\t\t\t<td>three</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t</tr>\n\t</table>\n\n'
+        """
+        text = text + "\n\n"
+        pattern = re.compile(r'^(?:table(_?%(s)s%(a)s%(c)s)\. ?\n)?^(%(a)s%(c)s\.? ?\|.*\|)\n\n' % {'s':self.s, 'a':self.a, 'c':self.c}, re.S|re.M|re.U)
+        return pattern.sub(self.fTable, text)
+
+    def fTable(self, match):
+        tatts = self.pba(match.group(1), 'table')
+        rows = []
+        for row in [ x for x in match.group(2).split('\n') if x]:
+            rmtch = re.search(r'^(%s%s\. )(.*)' % (self.a, self.c), row.lstrip())
+            if rmtch:
+                ratts = self.pba(rmtch.group(1), 'tr')
+                row = rmtch.group(2)
+            else:
+                ratts = ''
+
+            cells = []
+            for cell in row.split('|')[1:-1]:
+                ctyp = 'd'
+                if re.search(r'^_', cell):
+                    ctyp = "h"
+                cmtch = re.search(r'^(_?%s%s%s\. )(.*)' % (self.s, self.a, self.c), cell)
+                if cmtch:
+                    catts = self.pba(cmtch.group(1), 'td')
+                    cell = cmtch.group(2)
+                else:
+                    catts = ''
+
+                cell = self.graf(self.span(cell))
+                cells.append('\t\t\t<t%s%s>%s</t%s>' % (ctyp, catts, cell, ctyp))
+            rows.append("\t\t<tr%s>\n%s\n\t\t</tr>" % (ratts, '\n'.join(cells)))
+            cells = []
+            catts = None
+        return "\t<table%s>\n%s\n\t</table>\n\n" % (tatts, '\n'.join(rows))
+
+    def lists(self, text):
+        """
+        >>> t = Textile()
+        >>> t.lists("* one\\n* two\\n* three")
+        '\\t<ul>\\n\\t\\t<li>one</li>\\n\\t\\t<li>two</li>\\n\\t\\t<li>three</li>\\n\\t</ul>'
+        """
+        pattern = re.compile(r'^([#*]+%s .*)$(?![^#*])' % self.c, re.U|re.M|re.S)
+        return pattern.sub(self.fList, text)
+
+    def fList(self, match):
+        text = match.group(0).split("\n")
+        result = []
+        lists = []
+        for i, line in enumerate(text):
+            try:
+                nextline = text[i+1]
+            except IndexError:
+                nextline = ''
+
+            m = re.search(r"^([#*]+)(%s%s) (.*)$" % (self.a, self.c), line, re.S)
+            if m:
+                tl, atts, content = m.groups()
+                nl = ''
+                nm = re.search(r'^([#*]+)\s.*', nextline)
+                if nm:
+                    nl = nm.group(1)
+                if tl not in lists:
+                    lists.append(tl)
+                    atts = self.pba(atts)
+                    line = "\t<%sl%s>\n\t\t<li>%s" % (self.lT(tl), atts, self.graf(content))
+                else:
+                    line = "\t\t<li>" + self.graf(content)
+
+                if len(nl) <= len(tl):
+                    line = line + "</li>"
+                for k in reversed(lists):
+                    if len(k) > len(nl):
+                        line = line + "\n\t</%sl>" % self.lT(k)
+                        if len(k) > 1:
+                            line = line + "</li>"
+                        lists.remove(k)
+
+            result.append(line)
+        return "\n".join(result)
+
+    def lT(self, input):
+        if re.search(r'^#+', input):
+            return 'o'
+        else:
+            return 'u'
+
+    def doPBr(self, in_):
+        return re.compile(r'<(p)([^>]*?)>(.*)(</\1>)', re.S).sub(self.doBr, in_)
+
+    def doBr(self, match):
+        if self.html_type == 'html':
+            content = re.sub(r'(.+)(?:(?<!<br>)|(?<!<br />))\n(?![#*\s|])', '\\1<br>', match.group(3))
+        else:
+            content = re.sub(r'(.+)(?:(?<!<br>)|(?<!<br />))\n(?![#*\s|])', '\\1<br />', match.group(3))
+        return '<%s%s>%s%s' % (match.group(1), match.group(2), content, match.group(4))
+
+    def block(self, text, head_offset = 0):
+        """
+        >>> t = Textile()
+        >>> t.block('h1. foobar baby')
+        '\\t<h1>foobar baby</h1>'
+        """
+        if not self.lite:
+            tre = '|'.join(self.btag)
+        else:
+            tre = '|'.join(self.btag_lite)
+        text = text.split('\n\n')
+
+        tag = 'p'
+        atts = cite = graf = ext = ''
+
+        out = []
+
+        anon = False
+        for line in text:
+            pattern = r'^(%s)(%s%s)\.(\.?)(?::(\S+))? (.*)$' % (tre, self.a, self.c)
+            match = re.search(pattern, line, re.S)
+            if match:
+                if ext:
+                    out.append(out.pop() + c1)
+
+                tag, atts, ext, cite, graf = match.groups()
+                h_match = re.search(r'h([1-6])', tag)
+                if h_match:
+                    head_level, = h_match.groups()
+                    tag = 'h%i' % max(1, 
+                                      min(int(head_level) + head_offset,
+                                          6))
+                o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext, 
+                                                      cite, graf)
+                # leave off c1 if this block is extended,
+                # we'll close it at the start of the next block
+                
+                if ext:
+                    line = "%s%s%s%s" % (o1, o2, content, c2)
+                else:
+                    line = "%s%s%s%s%s" % (o1, o2, content, c2, c1)
+
+            else:
+                anon = True
+                if ext or not re.search(r'^\s', line):
+                    o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext,
+                                                          cite, line)
+                    # skip $o1/$c1 because this is part of a continuing
+                    # extended block
+                    if tag == 'p' and not self.hasRawText(content):
+                        line = content
+                    else:
+                        line = "%s%s%s" % (o2, content, c2)
+                else:
+                    line = self.graf(line)
+
+            line = self.doPBr(line)
+            if self.html_type == 'xhtml':
+                line = re.sub(r'<br>', '<br />', line)
+
+            if ext and anon:
+                out.append(out.pop() + "\n" + line)
+            else:
+                out.append(line)
+
+            if not ext:
+                tag = 'p'
+                atts = ''
+                cite = ''
+                graf = ''
+
+        if ext:
+            out.append(out.pop() + c1)
+        return '\n\n'.join(out)
+
+    def fBlock(self, tag, atts, ext, cite, content):
+        """
+        >>> t = Textile()
+        >>> t.fBlock("bq", "", None, "", "Hello BlockQuote")
+        ('\\t<blockquote>\\n', '\\t\\t<p>', 'Hello BlockQuote', '</p>', '\\n\\t</blockquote>')
+
+        >>> t.fBlock("bq", "", None, "http://google.com", "Hello BlockQuote")
+        ('\\t<blockquote cite="http://google.com">\\n', '\\t\\t<p>', 'Hello BlockQuote', '</p>', '\\n\\t</blockquote>')
+
+        >>> t.fBlock("bc", "", None, "", 'printf "Hello, World";') # doctest: +ELLIPSIS
+        ('<pre>', '<code>', ..., '</code>', '</pre>')
+
+        >>> t.fBlock("h1", "", None, "", "foobar")
+        ('', '\\t<h1>', 'foobar', '</h1>', '')
+        """
+        atts = self.pba(atts)
+        o1 = o2 = c2 = c1 = ''
+
+        m = re.search(r'fn(\d+)', tag)
+        if m:
+            tag = 'p'
+            if m.group(1) in self.fn:
+                fnid = self.fn[m.group(1)]
+            else:
+                fnid = m.group(1)
+            atts = atts + ' id="fn%s"' % fnid
+            if atts.find('class=') < 0:
+                atts = atts + ' class="footnote"'
+            content = ('<sup>%s</sup>' % m.group(1)) + content
+
+        if tag == 'bq':
+            cite = self.checkRefs(cite)
+            if cite:
+                cite = ' cite="%s"' % cite
+            else:
+                cite = ''
+            o1 = "\t<blockquote%s%s>\n" % (cite, atts)
+            o2 = "\t\t<p%s>" % atts
+            c2 = "</p>"
+            c1 = "\n\t</blockquote>"
+
+        elif tag == 'bc':
+            o1 = "<pre%s>" % atts
+            o2 = "<code%s>" % atts
+            c2 = "</code>"
+            c1 = "</pre>"
+            content = self.shelve(self.encode_html(content.rstrip("\n") + "\n"))
+
+        elif tag == 'notextile':
+            content = self.shelve(content)
+            o1 = o2 = ''
+            c1 = c2 = ''
+
+        elif tag == 'pre':
+            content = self.shelve(self.encode_html(content.rstrip("\n") + "\n"))
+            o1 = "<pre%s>" % atts
+            o2 = c2 = ''
+            c1 = '</pre>'
+
+        else:
+            o2 = "\t<%s%s>" % (tag, atts)
+            c2 = "</%s>" % tag
+
+        content = self.graf(content)
+        return o1, o2, content, c2, c1
+
+    def footnoteRef(self, text):
+        """
+        >>> t = Textile()
+        >>> t.footnoteRef('foo[1] ') # doctest: +ELLIPSIS
+        'foo<sup class="footnote"><a href="#fn...">1</a></sup> '
+        """
+        return re.sub(r'\b\[([0-9]+)\](\s)?', self.footnoteID, text)
+
+    def footnoteID(self, match):
+        id, t = match.groups()
+        if id not in self.fn:
+            self.fn[id] = str(uuid.uuid4())
+        fnid = self.fn[id]
+        if not t:
+            t = ''
+        return '<sup class="footnote"><a href="#fn%s">%s</a></sup>%s' % (fnid, id, t)
+
+    def glyphs(self, text):
+        """
+        >>> t = Textile()
+
+        >>> t.glyphs("apostrophe's")
+        'apostrophe&#8217;s'
+
+        >>> t.glyphs("back in '88")
+        'back in &#8217;88'
+
+        >>> t.glyphs('foo ...')
+        'foo &#8230;'
+
+        >>> t.glyphs('--')
+        '&#8212;'
+
+        >>> t.glyphs('FooBar[tm]')
+        'FooBar&#8482;'
+
+        >>> t.glyphs("<p><cite>Cat's Cradle</cite> by Vonnegut</p>")
+        '<p><cite>Cat&#8217;s Cradle</cite> by Vonnegut</p>'
+
+        """
+         # fix: hackish
+        text = re.sub(r'"\Z', '\" ', text)
+
+        glyph_search = (
+            re.compile(r"(\w)\'(\w)"),                                      # apostrophe's
+            re.compile(r'(\s)\'(\d+\w?)\b(?!\')'),                          # back in '88
+            re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'),                       #  single closing
+            re.compile(r'\'/'),                                             #  single opening
+            re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'),                       #  double closing
+            re.compile(r'"'),                                               #  double opening
+            re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'),        #  3+ uppercase acronym
+            re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'),           #  3+ uppercase
+            re.compile(r'\b(\s{0,1})?\.{3}'),                                     #  ellipsis
+            re.compile(r'(\s?)--(\s?)'),                                    #  em dash
+            re.compile(r'\s-(?:\s|$)'),                                     #  en dash
+            re.compile(r'(\d+)( ?)x( ?)(?=\d+)'),                           #  dimension sign
+            re.compile(r'\b ?[([]TM[])]', re.I),                            #  trademark
+            re.compile(r'\b ?[([]R[])]', re.I),                             #  registered
+            re.compile(r'\b ?[([]C[])]', re.I),                             #  copyright
+         )
+
+        glyph_replace = [x % dict(self.glyph_defaults) for x in (
+            r'\1%(txt_apostrophe)s\2',           # apostrophe's
+            r'\1%(txt_apostrophe)s\2',           # back in '88
+            r'\1%(txt_quote_single_close)s',     #  single closing
+            r'%(txt_quote_single_open)s',         #  single opening
+            r'\1%(txt_quote_double_close)s',        #  double closing
+            r'%(txt_quote_double_open)s',             #  double opening
+            r'<acronym title="\2">\1</acronym>', #  3+ uppercase acronym
+            r'<span class="caps">\1</span>',     #  3+ uppercase
+            r'\1%(txt_ellipsis)s',                  #  ellipsis
+            r'\1%(txt_emdash)s\2',               #  em dash
+            r' %(txt_endash)s ',                 #  en dash
+            r'\1\2%(txt_dimension)s\3',          #  dimension sign
+            r'%(txt_trademark)s',                #  trademark
+            r'%(txt_registered)s',                #  registered
+            r'%(txt_copyright)s',                #  copyright
+        )]
+
+        result = []
+        for line in re.compile(r'(<.*?>)', re.U).split(text):
+            if not re.search(r'<.*>', line):
+                for s, r in zip(glyph_search, glyph_replace):
+                    line = s.sub(r, line)
+            result.append(line)
+        return ''.join(result)
+
+    def vAlign(self, input):
+        d = {'^':'top', '-':'middle', '~':'bottom'}
+        return d.get(input, '')
+
+    def hAlign(self, input):
+        d = {'<':'left', '=':'center', '>':'right', '<>': 'justify'}
+        return d.get(input, '')
+
+    def getRefs(self, text):
+        """
+        what is this for?
+        """
+        pattern = re.compile(r'(?:(?<=^)|(?<=\s))\[(.+)\]((?:http(?:s?):\/\/|\/)\S+)(?=\s|$)', re.U)
+        text = pattern.sub(self.refs, text)
+        return text
+
+    def refs(self, match):
+        flag, url = match.groups()
+        self.urlrefs[flag] = url
+        return ''
+
+    def checkRefs(self, url):
+        return self.urlrefs.get(url, url)
+
+    def isRelURL(self, url):
+        """
+        Identify relative urls.
+
+        >>> t = Textile()
+        >>> t.isRelURL("http://www.google.com/")
+        False
+        >>> t.isRelURL("/foo")
+        True
+
+        """
+        (scheme, netloc) = urlparse(url)[0:2]
+        return not scheme and not netloc
+
+    def relURL(self, url):
+        scheme = urlparse(url)[0]
+        if self.restricted and scheme and scheme not in self.url_schemes:
+            return '#'
+        return url
+
+    def shelve(self, text):
+        id = str(uuid.uuid4())
+        self.shelf[id] = text
+        return id
+
+    def retrieve(self, text):
+        """
+        >>> t = Textile()
+        >>> id = t.shelve("foobar")
+        >>> t.retrieve(id)
+        'foobar'
+        """
+        while True:
+            old = text
+            for k, v in self.shelf.items():
+                text = text.replace(k, v)
+            if text == old:
+                break
+        return text
+
+    def encode_html(self, text, quotes=True):
+        a = (
+            ('&', '&#38;'),
+            ('<', '&#60;'),
+            ('>', '&#62;')
+        )
+
+        if quotes:
+            a = a + (
+                ("'", '&#39;'),
+                ('"', '&#34;')
+            )
+
+        for k, v in a:
+            text = text.replace(k, v)
+        return text
+
+    def graf(self, text):
+        if not self.lite:
+            text = self.noTextile(text)
+            text = self.code(text)
+
+        text = self.links(text)
+
+        if not self.noimage:
+            text = self.image(text)
+
+        if not self.lite:
+            text = self.lists(text)
+            text = self.table(text)
+
+        text = self.span(text)
+        text = self.footnoteRef(text)
+        text = self.glyphs(text)
+
+        return text.rstrip('\n')
+
+    def links(self, text):
+        """
+        >>> t = Textile()
+        >>> t.links('fooobar "Google":http://google.com/foobar/ and hello world "flickr":http://flickr.com/photos/jsamsa/ ') # doctest: +ELLIPSIS
+        'fooobar ... and hello world ...'
+        """
+
+        punct = '!"#$%&\'*+,-./:;=?@\\^_`|~'
+
+        pattern = r'''
+            (?P<pre>    [\s\[{(]|[%s]   )?
+            "                          # start
+            (?P<atts>   %s       )
+            (?P<text>   [^"]+?   )
+            \s?
+            (?:   \(([^)]+?)\)(?=")   )?     # $title
+            ":
+            (?P<url>    (?:ftp|https?)? (?: :// )? [-A-Za-z0-9+&@#/?=~_()|!:,.;]*[-A-Za-z0-9+&@#/=~_()|]   )
+            (?P<post>   [^\w\/;]*?   )
+            (?=<|\s|$)
+        ''' % (re.escape(punct), self.c)
+
+        text = re.compile(pattern, re.X).sub(self.fLink, text)
+
+        return text
+
+    def fLink(self, match):
+        pre, atts, text, title, url, post = match.groups()
+
+        if pre == None:
+            pre = ''
+            
+        # assume ) at the end of the url is not actually part of the url
+        # unless the url also contains a (
+        if url.endswith(')') and not url.find('(') > -1:
+            post = url[-1] + post
+            url = url[:-1]
+
+        url = self.checkRefs(url)
+
+        atts = self.pba(atts)
+        if title:
+            atts = atts +  ' title="%s"' % self.encode_html(title)
+
+        if not self.noimage:
+            text = self.image(text)
+
+        text = self.span(text)
+        text = self.glyphs(text)
+
+        url = self.relURL(url)
+        out = '<a href="%s"%s%s>%s</a>' % (self.encode_html(url), atts, self.rel, text)
+        out = self.shelve(out)
+        return ''.join([pre, out, post])
+
+    def span(self, text):
+        """
+        >>> t = Textile()
+        >>> t.span(r"hello %(bob)span *strong* and **bold**% goodbye")
+        'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
+        """
+        qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^')
+        pnct = ".,\"'?!;:"
+
+        for qtag in qtags:
+            pattern = re.compile(r"""
+                (?:^|(?<=[\s>%(pnct)s])|([\]}]))
+                (%(qtag)s)(?!%(qtag)s)
+                (%(c)s)
+                (?::(\S+))?
+                ([^\s%(qtag)s]+|\S[^%(qtag)s\n]*[^\s%(qtag)s\n])
+                ([%(pnct)s]*)
+                %(qtag)s
+                (?:$|([\]}])|(?=%(selfpnct)s{1,2}|\s))
+            """ % {'qtag':qtag, 'c':self.c, 'pnct':pnct,
+                   'selfpnct':self.pnct}, re.X)
+            text = pattern.sub(self.fSpan, text)
+        return text
+
+
+    def fSpan(self, match):
+        _, tag, atts, cite, content, end, _ = match.groups()
+
+        qtags = {
+            '*': 'strong',
+            '**': 'b',
+            '??': 'cite',
+            '_' : 'em',
+            '__': 'i',
+            '-' : 'del',
+            '%' : 'span',
+            '+' : 'ins',
+            '~' : 'sub',
+            '^' : 'sup'
+        }
+        tag = qtags[tag]
+        atts = self.pba(atts)
+        if cite:
+            atts = atts + 'cite="%s"' % cite
+
+        content = self.span(content)
+
+        out = "<%s%s>%s%s</%s>" % (tag, atts, content, end, tag)
+        return out
+
+    def image(self, text):
+        """
+        >>> t = Textile()
+        >>> t.image('!/imgs/myphoto.jpg!:http://jsamsa.com')
+        '<a href="http://jsamsa.com"><img src="/imgs/myphoto.jpg" alt="" /></a>'
+        """
+        pattern = re.compile(r"""
+            (?:[\[{])?          # pre
+            \!                 # opening !
+            (%s)               # optional style,class atts
+            (?:\. )?           # optional dot-space
+            ([^\s(!]+)         # presume this is the src
+            \s?                # optional space
+            (?:\(([^\)]+)\))?  # optional title
+            \!                 # closing
+            (?::(\S+))?        # optional href
+            (?:[\]}]|(?=\s|$)) # lookahead: space or end of string
+        """ % self.c, re.U|re.X)
+        return pattern.sub(self.fImage, text)
+
+    def fImage(self, match):
+        # (None, '', '/imgs/myphoto.jpg', None, None)
+        atts, url, title, href = match.groups()
+        atts  = self.pba(atts)
+
+        if title:
+            atts = atts + ' title="%s" alt="%s"' % (title, title)
+        else:
+            atts = atts + ' alt=""'
+            
+        if not self.isRelURL(url) and self.get_sizes:
+            size = getimagesize(url)
+            if (size):
+                atts += " %s" % size
+
+        if href:
+            href = self.checkRefs(href)
+
+        url = self.checkRefs(url)
+        url = self.relURL(url)
+
+        out = []
+        if href:
+            out.append('<a href="%s" class="img">' % href)
+        if self.html_type == 'html':
+            out.append('<img src="%s"%s>' % (url, atts))
+        else:
+            out.append('<img src="%s"%s />' % (url, atts))
+        if href: 
+            out.append('</a>')
+
+        return ''.join(out)
+
+    def code(self, text):
+        text = self.doSpecial(text, '<code>', '</code>', self.fCode)
+        text = self.doSpecial(text, '@', '@', self.fCode)
+        text = self.doSpecial(text, '<pre>', '</pre>', self.fPre)
+        return text
+
+    def fCode(self, match):
+        before, text, after = match.groups()
+        if after == None:
+            after = ''
+        # text needs to be escaped
+        if not self.restricted:
+            text = self.encode_html(text)
+        return ''.join([before, self.shelve('<code>%s</code>' % text), after])
+
+    def fPre(self, match):
+        before, text, after = match.groups()
+        if after == None:
+            after = ''
+        # text needs to be escapedd
+        if not self.restricted:
+            text = self.encode_html(text)
+        return ''.join([before, '<pre>', self.shelve(text), '</pre>', after])
+
+    def doSpecial(self, text, start, end, method=None):
+        if method == None:
+            method = self.fSpecial
+        pattern = re.compile(r'(^|\s|[\[({>])%s(.*?)%s(\s|$|[\])}])?' % (re.escape(start), re.escape(end)), re.M|re.S)
+        return pattern.sub(method, text)
+
+    def fSpecial(self, match):
+        """
+        special blocks like notextile or code
+        """
+        before, text, after = match.groups()
+        if after == None:
+            after = ''
+        return ''.join([before, self.shelve(self.encode_html(text)), after])
+
+    def noTextile(self, text):
+        text = self.doSpecial(text, '<notextile>', '</notextile>', self.fTextile)
+        return self.doSpecial(text, '==', '==', self.fTextile)
+
+    def fTextile(self, match):
+        before, notextile, after = match.groups()
+        if after == None:
+            after = ''
+        return ''.join([before, self.shelve(notextile), after])
+
+
+def textile(text, head_offset=0, html_type='xhtml', encoding=None, output=None):
+    """
+    this function takes additional parameters:
+    head_offset - offset to apply to heading levels (default: 0)
+    html_type - 'xhtml' or 'html' style tags (default: 'xhtml')
+    """
+    return Textile().textile(text, head_offset=head_offset,
+                             html_type=html_type)
+
+def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
+    """
+    Restricted version of Textile designed for weblog comments and other
+    untrusted input.
+
+    Raw HTML is escaped.
+    Style attributes are disabled.
+    rel='nofollow' is added to external links.
+
+    When lite=True is set (the default):
+    Block tags are restricted to p, bq, and bc.
+    Lists and tables are disabled.
+    
+    When noimage=True is set (the default):
+    Image tags are disabled.
+
+    """
+    return Textile(restricted=True, lite=lite,
+                   noimage=noimage).textile(text, rel='nofollow',
+                                            html_type=html_type)
+
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index aaff8b55c0..73af3acde4 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -12,7 +12,7 @@ from calibre.ebooks.chardet import detect
 from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
     separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
     preserve_spaces, detect_paragraph_type, detect_formatting_type, \
-    convert_heuristic, normalize_line_endings
+    convert_heuristic, normalize_line_endings, convert_textile
 from calibre import _ent_pat, xml_entity_to_unicode
 
 class TXTInput(InputFormatPlugin):
@@ -41,6 +41,7 @@ class TXTInput(InputFormatPlugin):
                    'paragraph and no styling is applied.\n'
                    '* heuristic: Process using heuristics to determine formatting such '
                    'as chapter headings and italic text.\n'
+                   '* textile: Processing using textile formatting.\n'
                    '* markdown: Processing using markdown formatting. '
                    'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
         OptionRecommendation(name='preserve_spaces', recommended_value=False,
@@ -91,6 +92,13 @@ class TXTInput(InputFormatPlugin):
             except RuntimeError:
                 raise ValueError('This txt file has malformed markup, it cannot be'
                     ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
+        elif options.formatting_type == 'textile':
+            log.debug('Running text though textile conversion...')
+            try:
+                html = convert_textile(txt)
+            except RuntimeError:
+                raise ValueError('This txt file has malformed markup, it cannot be'
+                    ' converted by calibre.')
         else:
             # Determine the paragraph type of the document.
             if options.paragraph_type == 'auto':
diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index 6a1a106681..d0526bd9fc 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -8,6 +8,7 @@ import os, re
 
 from calibre import prepare_string_for_xml, isbytestring
 from calibre.ebooks.markdown import markdown
+from calibre.ebooks.textile import textile
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor
 from calibre.ebooks.conversion.preprocess import DocAnalysis
@@ -80,6 +81,10 @@ def convert_markdown(txt, title='', disable_toc=False):
           safe_mode=False)
     return HTML_TEMPLATE % (title, md.convert(txt))
 
+def convert_textile(txt, title=''):
+    html = textile(txt, encoding='utf-8')
+    return HTML_TEMPLATE % (title, html)
+
 def normalize_line_endings(txt):
     txt = txt.replace('\r\n', '\n')
     txt = txt.replace('\r', '\n')
@@ -176,5 +181,19 @@ def detect_formatting_type(txt):
     for c in md_escapted_characters:
         if txt.count('\\'+c) > 10:
             return 'markdown'
+        
+    # Check for textile
+    # Headings
+    if len(re.findall(r'h[1-6]\.', txt)) >= 5:
+        return 'textile'
+    # Block quote.
+    if len(re.findall(r'bq\.', txt)) >= 5:
+        return 'textile'
+    # Images
+    if len(re.findall(r'\![^\s]+(:[^\s]+)*', txt)) >= 5:
+        return 'textile'
+    # Links
+    if len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt)) >= 5:
+        return 'textile'
     
     return 'heuristic'

From 1468e17c1791b472e2e7a4626e25f3c2fa5517da Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 11 Jan 2011 08:35:46 -0700
Subject: [PATCH 02/55] ...

---
 src/calibre/gui2/viewer/documentview.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py
index 13469f5622..55abae0392 100644
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@@ -279,7 +279,7 @@ class Document(QWebPage): # {{{
 
     @pyqtSignature("")
     def init_hyphenate(self):
-        if self.hyphenate:
+        if self.hyphenate and getattr(self, 'loaded_lang', ''):
             self.javascript('do_hyphenation("%s")'%self.loaded_lang)
 
     def after_load(self):

From 50dc7b1f09dcd18f2b70c54571ce2b4f2e3bd008 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 11 Jan 2011 09:30:22 -0700
Subject: [PATCH 03/55] Plugin handling cleanups and warning message

---
 src/calibre/gui2/preferences/plugins.py | 34 +++++++++-----
 src/calibre/gui2/preferences/plugins.ui | 60 ++-----------------------
 src/calibre/gui2/ui.py                  | 10 ++++-
 3 files changed, 34 insertions(+), 70 deletions(-)

diff --git a/src/calibre/gui2/preferences/plugins.py b/src/calibre/gui2/preferences/plugins.py
index 2fe2b3bf01..8b4a221f56 100644
--- a/src/calibre/gui2/preferences/plugins.py
+++ b/src/calibre/gui2/preferences/plugins.py
@@ -15,7 +15,8 @@ from calibre.gui2.preferences.plugins_ui import Ui_Form
 from calibre.customize.ui import initialized_plugins, is_disabled, enable_plugin, \
                                  disable_plugin, plugin_customization, add_plugin, \
                                  remove_plugin
-from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files
+from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files, \
+        question_dialog
 
 class PluginModel(QAbstractItemModel): # {{{
 
@@ -132,7 +133,6 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
         self.toggle_plugin_button.clicked.connect(self.toggle_plugin)
         self.customize_plugin_button.clicked.connect(self.customize_plugin)
         self.remove_plugin_button.clicked.connect(self.remove_plugin)
-        self.button_plugin_browse.clicked.connect(self.find_plugin)
         self.button_plugin_add.clicked.connect(self.add_plugin)
 
     def toggle_plugin(self, *args):
@@ -149,23 +149,33 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
         self.modify_plugin(op='remove')
 
     def add_plugin(self):
-        path = unicode(self.plugin_path.text())
-        if path and os.access(path, os.R_OK) and path.lower().endswith('.zip'):
-            add_plugin(path)
+        path = choose_files(self, 'add a plugin dialog', _('Add plugin'),
+                filters=[(_('Plugins'), ['zip'])], all_files=False,
+                    select_only_single_file=True)
+        if not path:
+            return
+        path = path[0]
+        if path and  os.access(path, os.R_OK) and path.lower().endswith('.zip'):
+            if not question_dialog(self, _('Are you sure?'), '<p>' + \
+                    _('Installing plugins is a <b>security risk</b>. '
+                    'Plugins can contain a virus/malware. '
+                        'Only install it if you got it from a trusted source.'
+                        ' Are you sure you want to proceed?'),
+                    show_copy_button=False):
+                return
+            plugin = add_plugin(path)
             self._plugin_model.populate()
             self._plugin_model.reset()
             self.changed_signal.emit()
-            self.plugin_path.setText('')
+            info_dialog(self, _('Success'),
+                    _('Plugin <b>{0}</b> successfully installed under <b>'
+                        ' {1} plugins</b>. You may have to restart calibre '
+                        'for the plugin to take effect.').format(plugin.name, plugin.type),
+                    show=True)
         else:
             error_dialog(self, _('No valid plugin path'),
                          _('%s is not a valid plugin path')%path).exec_()
 
-    def find_plugin(self):
-        path = choose_files(self, 'choose plugin dialog', _('Choose plugin'),
-                            filters=[('Plugins', ['zip'])], all_files=False,
-                            select_only_single_file=True)
-        if path:
-            self.plugin_path.setText(path[0])
 
     def modify_plugin(self, op=''):
         index = self.plugin_view.currentIndex()
diff --git a/src/calibre/gui2/preferences/plugins.ui b/src/calibre/gui2/preferences/plugins.ui
index 8979867bbc..18f0786a66 100644
--- a/src/calibre/gui2/preferences/plugins.ui
+++ b/src/calibre/gui2/preferences/plugins.ui
@@ -72,64 +72,10 @@
     </layout>
    </item>
    <item>
-    <widget class="QGroupBox" name="groupBox_4">
-     <property name="title">
-      <string>Add new plugin</string>
+    <widget class="QPushButton" name="button_plugin_add">
+     <property name="text">
+      <string>&amp;Add a new plugin</string>
      </property>
-     <layout class="QVBoxLayout" name="verticalLayout_5">
-      <item>
-       <layout class="QHBoxLayout" name="horizontalLayout_5">
-        <item>
-         <widget class="QLabel" name="label_14">
-          <property name="text">
-           <string>Plugin &amp;file:</string>
-          </property>
-          <property name="buddy">
-           <cstring>plugin_path</cstring>
-          </property>
-         </widget>
-        </item>
-        <item>
-         <widget class="QLineEdit" name="plugin_path"/>
-        </item>
-        <item>
-         <widget class="QToolButton" name="button_plugin_browse">
-          <property name="text">
-           <string>...</string>
-          </property>
-          <property name="icon">
-           <iconset resource="../../../../resources/images.qrc">
-            <normaloff>:/images/document_open.png</normaloff>:/images/document_open.png</iconset>
-          </property>
-         </widget>
-        </item>
-       </layout>
-      </item>
-      <item>
-       <layout class="QHBoxLayout" name="horizontalLayout_4">
-        <item>
-         <spacer name="horizontalSpacer_2">
-          <property name="orientation">
-           <enum>Qt::Horizontal</enum>
-          </property>
-          <property name="sizeHint" stdset="0">
-           <size>
-            <width>40</width>
-            <height>20</height>
-           </size>
-          </property>
-         </spacer>
-        </item>
-        <item>
-         <widget class="QPushButton" name="button_plugin_add">
-          <property name="text">
-           <string>&amp;Add</string>
-          </property>
-         </widget>
-        </item>
-       </layout>
-      </item>
-     </layout>
     </widget>
    </item>
   </layout>
diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py
index 7e22839bdf..0732ff4650 100644
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@@ -103,7 +103,15 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
         self.gui_debug = gui_debug
         acmap = OrderedDict()
         for action in interface_actions():
-            ac = action.load_actual_plugin(self)
+            try:
+                ac = action.load_actual_plugin(self)
+            except:
+                # Ignore errors in loading user supplied plugins
+                import traceback
+                traceback.print_exc()
+                if ac.plugin_path is None:
+                    raise
+
             ac.plugin_path = action.plugin_path
             ac.interface_action_base_plugin = action
             if ac.name in acmap:

From 61c7b310377ac21fbd5e5497eadaf9277b2aa176 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Tue, 11 Jan 2011 16:41:09 +0000
Subject: [PATCH 04/55] Remove get_metadata cache

---
 src/calibre/gui2/ui.py                |  3 ---
 src/calibre/library/caches.py         |  4 ----
 src/calibre/library/database2.py      | 14 --------------
 src/calibre/library/field_metadata.py |  9 ---------
 4 files changed, 30 deletions(-)

diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py
index 7e22839bdf..0bdd155cbf 100644
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@@ -582,9 +582,6 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
             # Goes here, because if cf is valid, db is valid.
             db.prefs['field_metadata'] = db.field_metadata.all_metadata()
             db.commit_dirty_cache()
-            if DEBUG and db.gm_count > 0:
-                print 'get_metadata cache: {0:d} calls, {1:4.2f}% misses'.format(
-                        db.gm_count, (db.gm_missed*100.0)/db.gm_count)
         for action in self.iactions.values():
             if not action.shutting_down():
                 return
diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index 0763318912..6aef45dbbd 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -598,7 +598,6 @@ class ResultCache(SearchQueryParser): # {{{
 
     def set(self, row, col, val, row_is_id=False):
         id = row if row_is_id else self._map_filtered[row]
-        self._data[id][self.FIELD_MAP['all_metadata']] = None
         self._data[id][col] = val
 
     def get(self, row, col, row_is_id=False):
@@ -629,7 +628,6 @@ class ResultCache(SearchQueryParser): # {{{
                 self._data[id] = CacheRow(db, self.composites,
                         db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0])
                 self._data[id].append(db.book_on_device_string(id))
-                self._data[id].append(None)
             except IndexError:
                 return None
         try:
@@ -646,7 +644,6 @@ class ResultCache(SearchQueryParser): # {{{
             self._data[id] = CacheRow(db, self.composites,
                         db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0])
             self._data[id].append(db.book_on_device_string(id))
-            self._data[id].append(None)
         self._map[0:0] = ids
         self._map_filtered[0:0] = ids
 
@@ -671,7 +668,6 @@ class ResultCache(SearchQueryParser): # {{{
         for item in self._data:
             if item is not None:
                 item.append(db.book_on_device_string(item[0]))
-                item.append(None)
         self._map = [i[0] for i in self._data if i is not None]
         if field is not None:
             self.sort(field, ascending)
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 138560020e..dc82825607 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -300,8 +300,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
 
         self.FIELD_MAP['ondevice'] = base+1
         self.field_metadata.set_field_record_index('ondevice', base+1, prefer_custom=False)
-        self.FIELD_MAP['all_metadata'] = base+2
-        self.field_metadata.set_field_record_index('all_metadata', base+2, prefer_custom=False)
 
         script = '''
         DROP VIEW IF EXISTS meta2;
@@ -690,19 +688,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         '''
         row = self.data._data[idx] if index_is_id else self.data[idx]
         fm = self.FIELD_MAP
-
-        self.gm_count += 1
-        mi = row[self.FIELD_MAP['all_metadata']]
-        if mi is not None:
-            if get_cover:
-                # Always get the cover, because the value can be wrong if the
-                # original mi was from the OPF
-                mi.cover = self.cover(idx, index_is_id=index_is_id, as_path=True)
-            return mi
-
-        self.gm_missed += 1
         mi = Metadata(None)
-        self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id)
 
         aut_list = row[fm['au_map']]
         aut_list = [p.split(':::') for p in aut_list.split(':#:')]
diff --git a/src/calibre/library/field_metadata.py b/src/calibre/library/field_metadata.py
index 676eb13d2b..2a9b7e7003 100644
--- a/src/calibre/library/field_metadata.py
+++ b/src/calibre/library/field_metadata.py
@@ -162,15 +162,6 @@ class FieldMetadata(dict):
                            'search_terms':['tags', 'tag'],
                            'is_custom':False,
                            'is_category':True}),
-            ('all_metadata',{'table':None,
-                             'column':None,
-                             'datatype':None,
-                             'is_multiple':None,
-                             'kind':'field',
-                             'name':None,
-                             'search_terms':[],
-                             'is_custom':False,
-                             'is_category':False}),
             ('author_sort',{'table':None,
                             'column':None,
                             'datatype':'text',

From f7ae68a8c97aa5448f7dbacc575582c1fbcb7030 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 11 Jan 2011 09:55:36 -0700
Subject: [PATCH 05/55] After adding plugin scroll to it in the plugins list

---
 src/calibre/gui2/__init__.py            |  6 +++++-
 src/calibre/gui2/preferences/plugins.py | 21 ++++++++++++++++++++-
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index df6ac45e5b..e699551150 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -269,10 +269,14 @@ def question_dialog(parent, title, msg, det_msg='', show_copy_button=True,
 
     return d.exec_() == yes_button
 
-def info_dialog(parent, title, msg, det_msg='', show=False):
+def info_dialog(parent, title, msg, det_msg='', show=False,
+        show_copy_button=True):
     d = MessageBox(QMessageBox.Information, title, msg, QMessageBox.Ok,
                     parent, det_msg)
     d.setIconPixmap(QPixmap(I('dialog_information.png')))
+    if not show_copy_button:
+        d.cb.setVisible(False)
+
     if show:
         return d.exec_()
     return d
diff --git a/src/calibre/gui2/preferences/plugins.py b/src/calibre/gui2/preferences/plugins.py
index 8b4a221f56..c53c634ab4 100644
--- a/src/calibre/gui2/preferences/plugins.py
+++ b/src/calibre/gui2/preferences/plugins.py
@@ -77,6 +77,16 @@ class PluginModel(QAbstractItemModel): # {{{
                     return self.index(j, 0, parent)
         return QModelIndex()
 
+    def plugin_to_index_by_properties(self, plugin):
+        for i, category in enumerate(self.categories):
+            parent = self.index(i, 0, QModelIndex())
+            for j, p in enumerate(self._data[category]):
+                if plugin.name == p.name and plugin.type == p.type and \
+                        plugin.author == p.author and plugin.version == p.version:
+                    return self.index(j, 0, parent)
+        return QModelIndex()
+
+
     def refresh_plugin(self, plugin, rescan=False):
         if rescan:
             self.populate()
@@ -171,7 +181,13 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
                     _('Plugin <b>{0}</b> successfully installed under <b>'
                         ' {1} plugins</b>. You may have to restart calibre '
                         'for the plugin to take effect.').format(plugin.name, plugin.type),
-                    show=True)
+                    show=True, show_copy_button=False)
+            idx = self._plugin_model.plugin_to_index_by_properties(plugin)
+            if idx.isValid():
+                self.plugin_view.scrollTo(idx,
+                        self.plugin_view.PositionAtCenter)
+                self.plugin_view.scrollTo(idx,
+                        self.plugin_view.PositionAtCenter)
         else:
             error_dialog(self, _('No valid plugin path'),
                          _('%s is not a valid plugin path')%path).exec_()
@@ -201,10 +217,13 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
                 if plugin.do_user_config():
                     self._plugin_model.refresh_plugin(plugin)
             elif op == 'remove':
+                msg = _('Plugin {0} successfully removed').format(plugin.name)
                 if remove_plugin(plugin):
                     self._plugin_model.populate()
                     self._plugin_model.reset()
                     self.changed_signal.emit()
+                    info_dialog(self, _('Success'), msg, show=True,
+                            show_copy_button=False)
                 else:
                     error_dialog(self, _('Cannot remove builtin plugin'),
                          plugin.name + _(' cannot be removed. It is a '

From 2ea905dffd76f31f860962982a76ca997d219745 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 11 Jan 2011 10:38:10 -0700
Subject: [PATCH 06/55] Fix bug that caused automatic news removal to remove
 any book that has a tag that contains the word 'news' instead of only books
 that have the tag News

---
 src/calibre/library/database2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 3b06064226..6ab003f2d5 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -1373,7 +1373,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
             if r is not None:
                 if (now - r[self.FIELD_MAP['timestamp']]) > delta:
                     tags = r[self.FIELD_MAP['tags']]
-                    if tags and tag in tags.lower():
+                    if tags and tag in tags.lower().split(','):
                         yield r[self.FIELD_MAP['id']]
 
     def get_next_series_num_for(self, series):

From ba22e309d0143aa39427d130cb85a604a810a8a7 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Tue, 11 Jan 2011 17:46:08 +0000
Subject: [PATCH 07/55] Fix tags_older_than

---
 src/calibre/library/database2.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index dc82825607..495cd9b685 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -1373,7 +1373,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
             if r is not None:
                 if (now - r[self.FIELD_MAP['timestamp']]) > delta:
                     tags = r[self.FIELD_MAP['tags']]
-                    if tags and tag in tags.lower():
+                    tags = tags.lower().split() if tags else []
+                    tags = [tag.strip() for tag in tags if tag.strip()]
+                    if tag in tags:
                         yield r[self.FIELD_MAP['id']]
 
     def get_next_series_num_for(self, series):

From 5dcdfefceda2c4a39baf2dca2a8509a58de6c869 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Tue, 11 Jan 2011 17:49:36 +0000
Subject: [PATCH 08/55] ...

---
 src/calibre/library/database2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 495cd9b685..48c62efffe 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -1373,7 +1373,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
             if r is not None:
                 if (now - r[self.FIELD_MAP['timestamp']]) > delta:
                     tags = r[self.FIELD_MAP['tags']]
-                    tags = tags.lower().split() if tags else []
+                    tags = tags.lower().split(',') if tags else []
                     tags = [tag.strip() for tag in tags if tag.strip()]
                     if tag in tags:
                         yield r[self.FIELD_MAP['id']]

From efda0e0275f2f3127a53dc619994dceba70dcc9d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 11 Jan 2011 10:55:03 -0700
Subject: [PATCH 09/55] ...

---
 src/calibre/library/database2.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 6ab003f2d5..aa491aff28 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -1373,7 +1373,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
             if r is not None:
                 if (now - r[self.FIELD_MAP['timestamp']]) > delta:
                     tags = r[self.FIELD_MAP['tags']]
-                    if tags and tag in tags.lower().split(','):
+                    if tags and tag in [x.strip() for x in
+                            tags.lower().split(',')]:
                         yield r[self.FIELD_MAP['id']]
 
     def get_next_series_num_for(self, series):

From 9eb8b031d97e6432683952c3e7bdcdd2a8117b97 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 11 Jan 2011 11:14:19 -0700
Subject: [PATCH 10/55] Code to put downloaded news into the magazines category
 on the nook color. Commented out, pending testing

---
 src/calibre/devices/nook/driver.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/calibre/devices/nook/driver.py b/src/calibre/devices/nook/driver.py
index 987b90c748..ca05885645 100644
--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@@ -91,3 +91,19 @@ class NOOK_COLOR(NOOK):
 
     EBOOK_DIR_MAIN = 'My Files/Books'
 
+    '''
+    def create_upload_path(self, path, mdata, fname, create_dirs=True):
+        filepath = NOOK.create_upload_path(self, path, mdata, fname,
+                create_dirs=create_dirs)
+        edm = self.EBOOK_DIR_MAIN.replace('/', os.sep)
+        npath = os.path.join(edm, _('News')) + os.sep
+        if npath in filepath:
+            filepath = filepath.replace(npath, os.sep.join('My Files',
+                'Magazines')+os.sep)
+            filedir = os.path.dirname(filepath)
+            if create_dirs and not os.path.exists(filedir):
+                os.makedirs(filedir)
+
+        return filepath
+    '''
+

From b61fdf0eac17b578c73ce68c344ec8efe952239a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 11 Jan 2011 13:15:29 -0700
Subject: [PATCH 11/55] Updated NYTimes

---
 resources/recipes/nytimes.recipe     | 25 +++++++++++++++++++++++++
 resources/recipes/nytimes_sub.recipe | 23 +++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/resources/recipes/nytimes.recipe b/resources/recipes/nytimes.recipe
index eaa428e731..6f80f4f85f 100644
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@@ -685,3 +685,28 @@ class NYTimes(BasicNewsRecipe):
             divTag.replaceWith(tag)
 
         return soup
+
+    def populate_article_metadata(self, article, soup, first):
+        shortparagraph = ""
+        try:
+            if len(article.text_summary.strip()) == 0:
+                articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
+                if articlebodies:
+                    for articlebody in articlebodies:
+                        if articlebody:
+                            paras = articlebody.findAll('p')
+                            for p in paras:
+                                refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
+                                #account for blank paragraphs and short paragraphs by appending them to longer ones
+                                if len(refparagraph) > 0:
+                                    if len(refparagraph) > 70: #approximately one line of text
+                                        article.summary = article.text_summary = shortparagraph + refparagraph
+                                        return
+                                    else:
+                                        shortparagraph = refparagraph + " "
+                                        if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
+                                            shortparagraph = shortparagraph + "- "
+        except:
+            self.log("Error creating article descriptions")
+            return
+
diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe
index e56fd9cdec..8ac7c735f7 100644
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@@ -685,4 +685,27 @@ class NYTimes(BasicNewsRecipe):
             divTag.replaceWith(tag)
 
         return soup
+    def populate_article_metadata(self, article, soup, first):
+        shortparagraph = ""
+        try:
+            if len(article.text_summary.strip()) == 0:
+                articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
+                if articlebodies:
+                    for articlebody in articlebodies:
+                        if articlebody:
+                            paras = articlebody.findAll('p')
+                            for p in paras:
+                                refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
+                                #account for blank paragraphs and short paragraphs by appending them to longer ones
+                                if len(refparagraph) > 0:
+                                    if len(refparagraph) > 70: #approximately one line of text
+                                        article.summary = article.text_summary = shortparagraph + refparagraph
+                                        return
+                                    else:
+                                        shortparagraph = refparagraph + " "
+                                        if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
+                                            shortparagraph = shortparagraph + "- "
+        except:
+            self.log("Error creating article descriptions")
+            return
 

From bfa79729a27f6f2bb504d2137f0bc89d192e5f68 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 11 Jan 2011 13:34:18 -0700
Subject: [PATCH 12/55] ...

---
 src/calibre/gui2/preferences/plugins.ui | 4 ++++
 src/calibre/gui2/shortcuts.py           | 7 ++++++-
 src/calibre/gui2/viewer/documentview.py | 7 +++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/calibre/gui2/preferences/plugins.ui b/src/calibre/gui2/preferences/plugins.ui
index 18f0786a66..83a904eb08 100644
--- a/src/calibre/gui2/preferences/plugins.ui
+++ b/src/calibre/gui2/preferences/plugins.ui
@@ -76,6 +76,10 @@
      <property name="text">
       <string>&amp;Add a new plugin</string>
      </property>
+     <property name="icon">
+      <iconset resource="../../../../resources/images.qrc">
+       <normaloff>:/images/plugins.png</normaloff>:/images/plugins.png</iconset>
+     </property>
     </widget>
    </item>
   </layout>
diff --git a/src/calibre/gui2/shortcuts.py b/src/calibre/gui2/shortcuts.py
index bdd699a69d..5e56435e10 100644
--- a/src/calibre/gui2/shortcuts.py
+++ b/src/calibre/gui2/shortcuts.py
@@ -150,7 +150,7 @@ class Delegate(QStyledItemDelegate):
         custom = []
         if editor.custom.isChecked():
             for x in ('1', '2'):
-                sc = getattr(editor, 'shortcut'+x)
+                sc = getattr(editor, 'shortcut'+x, None)
                 if sc is not None:
                     custom.append(sc)
 
@@ -266,6 +266,11 @@ class ShortcutConfig(QWidget):
         self.view.scrollTo(index)
 
 
+    @property
+    def is_editing(self):
+        return self.view.state() == self.view.EditingState
+
+
 if __name__ == '__main__':
     from calibre.gui2 import is_ok_to_use_qt
     from calibre.gui2.viewer.keys import SHORTCUTS
diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py
index 55abae0392..4485e63373 100644
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@@ -120,6 +120,13 @@ class ConfigDialog(QDialog, Ui_Dialog):
 
 
     def accept(self, *args):
+        if self.shortcut_config.is_editing:
+            from calibre.gui2 import info_dialog
+            info_dialog(self, _('Still editing'),
+                    _('You are in the middle of editing a keyboard shortcut'
+                        ' first complete that, by clicking outside the '
+                        ' shortcut editing box.'), show=True)
+            return
         c = config()
         c.set('serif_family', unicode(self.serif_family.currentFont().family()))
         c.set('sans_family', unicode(self.sans_family.currentFont().family()))

From 66b870e6d89bc68c02b6c321ed9f6f963e4303ed Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 11 Jan 2011 13:42:04 -0700
Subject: [PATCH 13/55] ...

---
 src/calibre/ebooks/conversion/plumber.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index b1d760ea2d..9b22fb46ec 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -88,6 +88,7 @@ class Plumber(object):
         self.ui_reporter = report_progress
         self.abort_after_input_dump = abort_after_input_dump
 
+        # Pipeline options {{{
         # Initialize the conversion options that are independent of input and
         # output formats. The input and output plugins can still disable these
         # options via recommendations.
@@ -527,6 +528,7 @@ OptionRecommendation(name='timestamp',
     help=_('Set the book timestamp (used by the date column in calibre).')),
 
 ]
+        # }}}
 
         input_fmt = os.path.splitext(self.input)[1]
         if not input_fmt:

From 3cd9ffcec6c14cada79b989fbfa92df49db5100c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 11 Jan 2011 15:08:15 -0700
Subject: [PATCH 14/55] Fix #8281 (Error when customizing builtin recipes with
 same name (e.g. The Nation))

---
 src/calibre/gui2/dialogs/user_profiles.py   | 69 ++++++++++++++++-----
 src/calibre/web/feeds/recipes/collection.py | 19 +++++-
 2 files changed, 71 insertions(+), 17 deletions(-)

diff --git a/src/calibre/gui2/dialogs/user_profiles.py b/src/calibre/gui2/dialogs/user_profiles.py
index 71c9ebcd04..04c41f0c5e 100644
--- a/src/calibre/gui2/dialogs/user_profiles.py
+++ b/src/calibre/gui2/dialogs/user_profiles.py
@@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import time, os
 
 from PyQt4.Qt import SIGNAL, QUrl, QAbstractListModel, Qt, \
-        QVariant, QInputDialog
+        QVariant
 
 from calibre.web.feeds.recipes import compile_recipe
 from calibre.web.feeds.news import AutomaticNewsRecipe
@@ -256,24 +256,61 @@ class %(classname)s(%(base_class)s):
 
     def add_builtin_recipe(self):
         from calibre.web.feeds.recipes.collection import \
-            get_builtin_recipe_by_title, get_builtin_recipe_titles
-        items = sorted(get_builtin_recipe_titles(), key=sort_key)
+            get_builtin_recipe_collection, get_builtin_recipe_by_id
+        from PyQt4.Qt import QDialog, QVBoxLayout, QListWidgetItem, \
+                QListWidget, QDialogButtonBox, QSize
 
+        d = QDialog(self)
+        d.l = QVBoxLayout()
+        d.setLayout(d.l)
+        d.list = QListWidget(d)
+        d.list.doubleClicked.connect(lambda x: d.accept())
+        d.l.addWidget(d.list)
+        d.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
+                Qt.Horizontal, d)
+        d.bb.accepted.connect(d.accept)
+        d.bb.rejected.connect(d.reject)
+        d.l.addWidget(d.bb)
+        d.setWindowTitle(_('Choose builtin recipe'))
+        items = []
+        for r in get_builtin_recipe_collection():
+            id_ = r.get('id', '')
+            title = r.get('title', '')
+            lang = r.get('language', '')
+            if id_ and title:
+                items.append((title + ' [%s]'%lang, id_))
 
-        title, ok = QInputDialog.getItem(self, _('Pick recipe'), _('Pick the recipe to customize'),
-                                     items, 0, False)
-        if ok:
-            title = unicode(title)
-            profile = get_builtin_recipe_by_title(title)
-            if self._model.has_title(title):
-                if question_dialog(self, _('Replace recipe?'),
-                    _('A custom recipe named %s already exists. Do you want to '
-                        'replace it?')%title):
-                    self._model.replace_by_title(title, profile)
-                else:
-                    return
+        items.sort(key=lambda x:sort_key(x[0]))
+        for title, id_ in items:
+            item = QListWidgetItem(title)
+            item.setData(Qt.UserRole, id_)
+            d.list.addItem(item)
+
+        d.resize(QSize(450, 400))
+        ret = d.exec_()
+        d.list.doubleClicked.disconnect()
+        if ret != d.Accepted:
+            return
+
+        items = list(d.list.selectedItems())
+        if not items:
+            return
+        item = items[-1]
+        id_ = unicode(item.data(Qt.UserRole).toString())
+        title = unicode(item.data(Qt.DisplayRole).toString()).rpartition(' [')[0]
+        profile = get_builtin_recipe_by_id(id_)
+        if profile is None:
+            raise Exception('Something weird happened')
+
+        if self._model.has_title(title):
+            if question_dialog(self, _('Replace recipe?'),
+                _('A custom recipe named %s already exists. Do you want to '
+                    'replace it?')%title):
+                self._model.replace_by_title(title, profile)
             else:
-                self.model.add(title, profile)
+                return
+        else:
+            self.model.add(title, profile)
 
         self.clear()
 
diff --git a/src/calibre/web/feeds/recipes/collection.py b/src/calibre/web/feeds/recipes/collection.py
index a513cf3880..5dd360213b 100644
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@@ -108,7 +108,6 @@ def download_builtin_recipe(urn):
     br = browser()
     return br.open_novisit('http://status.calibre-ebook.com/recipe/'+urn).read()
 
-
 def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
     for x in get_builtin_recipe_collection():
         if x.get('title') == title:
@@ -127,6 +126,24 @@ def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
                         'Failed to download recipe, using builtin version')
             return P('recipes/%s.recipe'%urn, data=True)
 
+def get_builtin_recipe_by_id(id_, log=None, download_recipe=False):
+    for x in get_builtin_recipe_collection():
+        if x.get('id') == id_:
+            urn = x.get('id')[8:]
+            if download_recipe:
+                try:
+                    if log is not None:
+                        log('Trying to get latest version of recipe:', urn)
+                    return download_builtin_recipe(urn)
+                except:
+                    if log is None:
+                        import traceback
+                        traceback.print_exc()
+                    else:
+                        log.exception(
+                        'Failed to download recipe, using builtin version')
+            return P('recipes/%s.recipe'%urn, data=True)
+
 class SchedulerConfig(object):
 
     def __init__(self):

From 9585ba655c810bb9132f3d6d7299455d23d47493 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 11 Jan 2011 18:08:55 -0500
Subject: [PATCH 15/55] TXT Input: remove unnecessary try block. Rework
 markdown and textile detection.

---
 src/calibre/ebooks/txt/input.py     |  6 +----
 src/calibre/ebooks/txt/processor.py | 41 +++++++++++++----------------
 2 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index 73af3acde4..0b0bd6d570 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -94,11 +94,7 @@ class TXTInput(InputFormatPlugin):
                     ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
         elif options.formatting_type == 'textile':
             log.debug('Running text though textile conversion...')
-            try:
-                html = convert_textile(txt)
-            except RuntimeError:
-                raise ValueError('This txt file has malformed markup, it cannot be'
-                    ' converted by calibre.')
+            html = convert_textile(txt)
         else:
             # Determine the paragraph type of the document.
             if options.paragraph_type == 'auto':
diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index d0526bd9fc..d59fd4121a 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -162,38 +162,33 @@ def detect_paragraph_type(txt):
 
 
 def detect_formatting_type(txt):
+    markdown_count = 0
+    textile_count = 0
+    
     # Check for markdown
     # Headings
-    if len(re.findall('(?mu)^#+', txt)) >= 5:
-        return 'markdown'
-    if len(re.findall('(?mu)^=+$', txt)) >= 5:
-        return 'markdown'
-    if len(re.findall('(?mu)^-+$', txt)) >= 5:
-        return 'markdown'
+    markdown_count += len(re.findall('(?mu)^#+', txt)) 
+    markdown_count += len(re.findall('(?mu)^=+$', txt))
+    markdown_count += len(re.findall('(?mu)^-+$', txt))
     # Images
-    if len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) >= 5:
-        return 'markdown'
+    markdown_count += len(re.findall('(?u)!\[.*?\]\(.+?\)', txt))
     # Links
-    if len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt)) >= 5:
-        return 'markdown'
-    # Escaped characters
-    md_escapted_characters = ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '.', '!']
-    for c in md_escapted_characters:
-        if txt.count('\\'+c) > 10:
-            return 'markdown'
+    markdown_count += len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt))
         
     # Check for textile
     # Headings
-    if len(re.findall(r'h[1-6]\.', txt)) >= 5:
-        return 'textile'
+    textile_count += len(re.findall(r'(?mu)^h[1-6]\.', txt))
     # Block quote.
-    if len(re.findall(r'bq\.', txt)) >= 5:
-        return 'textile'
+    textile_count += len(re.findall(r'(?mu)^bq\.', txt))
     # Images
-    if len(re.findall(r'\![^\s]+(:[^\s]+)*', txt)) >= 5:
-        return 'textile'
+    textile_count += len(re.findall(r'\![^\s]+(:[^\s]+)*', txt))
     # Links
-    if len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt)) >= 5:
-        return 'textile'
+    textile_count += len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt))
+    
+    if markdown_count > 5 or textile_count > 5:
+        if markdown_count > textile_count:
+            return 'markdown'
+        else:
+            return 'textile'
     
     return 'heuristic'

From f058f9adab9b1ae6bbc0673b1d8f5dce4550072f Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 11 Jan 2011 18:56:23 -0500
Subject: [PATCH 16/55] FB2 Output: Implement #8277, Write cover to coverpage
 tag within metadata.

---
 src/calibre/ebooks/fb2/fb2ml.py | 68 +++++++++++++++++++--------------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 4dd6e7c7ae..037a805e74 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -102,6 +102,7 @@ class FB2MLizer(object):
         metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year)
         metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en'
         metadata['id'] = None
+        metadata['cover'] = self.get_cover()
 
         author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
         if len(author_parts) == 1:
@@ -124,7 +125,8 @@ class FB2MLizer(object):
             metadata['id'] = str(uuid.uuid4()) 
 
         for key, value in metadata.items():
-            metadata[key] = prepare_string_for_xml(value)
+            if not key == 'cover':
+                metadata[key] = prepare_string_for_xml(value)
 
         return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \
                 '<description>' \
@@ -136,6 +138,7 @@ class FB2MLizer(object):
                             '<last-name>%(author_last)s</last-name>' \
                         '</author>' \
                         '<book-title>%(title)s</book-title>' \
+                        '%(cover)s' \
                         '<lang>%(lang)s</lang>' \
                     '</title-info>' \
                     '<document-info>' \
@@ -154,6 +157,41 @@ class FB2MLizer(object):
     def fb2_footer(self):
         return u'</FictionBook>'
 
+    def get_cover(self):
+        cover_href = None
+        
+        # Get the raster cover if it's available.
+        if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
+            id = unicode(self.oeb_book.metadata.cover[0])
+            cover_item = self.oeb_book.manifest.ids[id]
+            if cover_item.media_type in OEB_RASTER_IMAGES:
+                cover_href = cover_item.href
+            print 1
+        else:
+            # Figure out if we have a title page or a cover page
+            page_name = ''
+            if 'titlepage' in self.oeb_book.guide:
+                page_name = 'titlepage'
+            elif 'cover' in self.oeb_book.guide:
+                page_name = 'cover'
+
+            if page_name:
+                cover_item = self.oeb_book.manifest.hrefs[self.oeb_book.guide[page_name].href]
+                # Get the first image in the page
+                for img in cover_item.xpath('//img'):
+                    cover_href = cover_item.abshref(img.get('src'))
+                    print cover_href
+                    break
+                
+        if cover_href:
+            # Only write the image tag if it is in the manifest.
+            if cover_href in self.oeb_book.manifest.hrefs.keys():
+                if cover_href not in self.image_hrefs.keys():
+                    self.image_hrefs[cover_href] = '_%s.jpg' % len(self.image_hrefs.keys())
+            return u'<coverpage><image xlink:href="#%s" /></coverpage>' % self.image_hrefs[cover_href]
+        
+        return u'' 
+
     def get_text(self):
         text = ['<body>']
         
@@ -162,23 +200,6 @@ class FB2MLizer(object):
             text.append('<section>')
             self.section_level += 1
         
-        # Insert the title page / cover into the spine if it is not already referenced.
-        title_name = u''
-        if 'titlepage' in self.oeb_book.guide:
-            title_name = 'titlepage'
-        elif 'cover' in self.oeb_book.guide:
-            title_name = 'cover'
-        if title_name:
-            title_item = self.oeb_book.manifest.hrefs[self.oeb_book.guide[title_name].href]
-            if title_item.spine_position is None and title_item.media_type == 'application/xhtml+xml':
-                self.oeb_book.spine.insert(0, title_item, True)
-        # Create xhtml page to reference cover image so it can be used.
-        if not title_name and self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
-            id = unicode(self.oeb_book.metadata.cover[0])
-            cover_item = self.oeb_book.manifest.ids[id]
-            if cover_item.media_type in OEB_RASTER_IMAGES:
-                self.insert_image_cover(cover_item.href)
-        
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to FictionBook2 XML' % item.href)
             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
@@ -203,17 +224,6 @@ class FB2MLizer(object):
 
         return ''.join(text) + '</body>'
 
-    def insert_image_cover(self, image_href):
-        from calibre.ebooks.oeb.base import RECOVER_PARSER
-        try:
-            root = etree.fromstring(u'<html xmlns="%s"><body><img src="%s" /></body></html>' % (XHTML_NS, image_href), parser=RECOVER_PARSER)
-        except:
-            root = etree.fromstring(u'', parser=RECOVER_PARSER)
-        
-        id, href = self.oeb_book.manifest.generate('fb2_cover', 'fb2_cover.xhtml')
-        item = self.oeb_book.manifest.add(id, href, guess_type(href)[0], data=root)
-        self.oeb_book.spine.insert(0, item, True)
-
     def fb2mlize_images(self):
         '''
         This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.

From 0bab82e9b1e0dec31ca0e924fe7e1e72c9de83f6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 11 Jan 2011 17:25:35 -0700
Subject: [PATCH 17/55] RTF Input: Substitute a dummy image for WMF images in
 the RTF document

---
 src/calibre/ebooks/rtf/input.py   | 28 +++++++++++++++---
 src/calibre/utils/wmf/__init__.py | 47 +++++++++++++++++++++++++++++
 src/calibre/utils/wmf/wmf.c       | 49 ++++++++++++++++++++++++++-----
 3 files changed, 112 insertions(+), 12 deletions(-)

diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 5154373eda..714a5b656f 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -159,11 +159,31 @@ class RTFInput(InputFormatPlugin):
         return imap
 
     def convert_image(self, name):
-        from calibre.utils.magick import Image
-        img = Image()
-        img.open(name)
+        try:
+            return self.rasterize_wmf(name)
+        except:
+            self.log.exception('Failed to convert WMF image %r'%name)
+        return self.replace_wmf(name)
+
+    def replace_wmf(self, name):
+        from calibre.ebooks import calibre_cover
+        data = calibre_cover('Conversion of WMF images is not supported',
+            'Use Microsoft Word or OpenOffice to save this RTF file'
+            ' as HTML and convert that in calibre.', title_size=36,
+            author_size=20)
         name = name.replace('.wmf', '.jpg')
-        img.save(name)
+        with open(name, 'wb') as f:
+            f.write(data)
+        return name
+
+    def rasterize_wmf(self, name):
+        from calibre.utils.wmf import extract_raster_image
+        with open(name, 'rb') as f:
+            data = f.read()
+        data = extract_raster_image(data)
+        name = name.replace('.wmf', '.jpg')
+        with open(name, 'wb') as f:
+            f.write(data)
         return name
 
 
diff --git a/src/calibre/utils/wmf/__init__.py b/src/calibre/utils/wmf/__init__.py
index 68dfb8d2b5..cb7736e06a 100644
--- a/src/calibre/utils/wmf/__init__.py
+++ b/src/calibre/utils/wmf/__init__.py
@@ -5,5 +5,52 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
+import glob
+
+from calibre.constants import plugins, iswindows, filesystem_encoding
+from calibre.ptempfile import TemporaryDirectory
+from calibre import CurrentDir
+from calibre.utils.magick import Image, PixelWand
+
+class Unavailable(Exception):
+    pass
+
+class NoRaster(Exception):
+    pass
+
+def extract_raster_image(wmf_data):
+    try:
+        wmf, wmf_err = plugins['wmf']
+    except KeyError:
+        raise Unavailable('libwmf not available on this platform')
+    if wmf_err:
+        raise Unavailable(wmf_err)
+
+    if iswindows:
+        import sys, os
+        appdir = sys.app_dir
+        if isinstance(appdir, unicode):
+            appdir = appdir.encode(filesystem_encoding)
+        fdir = os.path.join(appdir, 'wmffonts')
+        wmf.set_font_dir(fdir)
+
+    data = ''
+
+    with TemporaryDirectory('wmf2png') as tdir:
+        with CurrentDir(tdir):
+            wmf.render(wmf_data)
+
+            images = list(sorted(glob.glob('*.png')))
+            if not images:
+                raise NoRaster('No raster images in WMF')
+            data = open(images[0], 'rb').read()
+
+    im = Image()
+    im.load(data)
+    pw = PixelWand()
+    pw.color = '#ffffff'
+    im.rotate(pw, 180)
+
+    return im.export('png')
 
 
diff --git a/src/calibre/utils/wmf/wmf.c b/src/calibre/utils/wmf/wmf.c
index 1f8e8a27f3..74d3ca813f 100644
--- a/src/calibre/utils/wmf/wmf.c
+++ b/src/calibre/utils/wmf/wmf.c
@@ -4,6 +4,7 @@
 
 #include <libwmf/api.h>
 #include <libwmf/svg.h>
+//#include <libwmf/gd.h>
 
 typedef struct {
     char *data;
@@ -13,7 +14,7 @@ typedef struct {
 
 //This code is taken mostly from the Abiword wmf plugin
 
-
+// Buffer read {{{
 // returns unsigned char cast to int, or EOF
 static int wmf_WMF_read(void * context) {
     char c;
@@ -22,11 +23,11 @@ static int wmf_WMF_read(void * context) {
 	if (info->pos == info->len)
 		return EOF;
 
-	c = info->data[pos];
+	c = info->data[info->pos];
 
 	info->pos++;
 
-	return (int)c;
+	return (int)((unsigned char)c);
 }
 
 // returns (-1) on error, else 0
@@ -44,8 +45,17 @@ static long wmf_WMF_tell(void * context) {
 
 	return (long) info->pos;
 }
+// }}}
 
 
+char _png_name_buf[100];
+char *wmf_png_name(void *ctxt) {
+    int *num = (int*)ctxt;
+    *num = *num + 1;
+    snprintf(_png_name_buf, 90, "%04d.png", *num);
+    return _png_name_buf;
+}
+
 #define CLEANUP if(API) { if (stream) wmf_free(API, stream); wmf_api_destroy(API); };
 
 static PyObject *
@@ -66,9 +76,9 @@ wmf_render(PyObject *self, PyObject *args) {
 
 	unsigned int max_width  = 1600;
 	unsigned int max_height = 1200;
-	unsigned long max_flags = 0;
 
 	static const char* Default_Description = "wmf2svg";
+    int fname_counter = 0;
 
 	wmf_error_t err;
 
@@ -125,6 +135,8 @@ wmf_render(PyObject *self, PyObject *args) {
 	ddata->Description = (char *)Default_Description;
 
 	ddata->bbox = bbox;
+    ddata->image.context = (void *)&fname_counter;
+    ddata->image.name = wmf_png_name;
 
 	wmf_display_size(API, &disp_width, &disp_height, 96, 96);
 
@@ -156,9 +168,9 @@ wmf_render(PyObject *self, PyObject *args) {
 		ddata->height = (unsigned int) ceil ((double) wmf_height);
 	}
 
-	ddata->flags |= WMF_SVG_INLINE_IMAGES;
-
-	ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER;
+    // Needs GD
+	//ddata->flags |= WMF_SVG_INLINE_IMAGES;
+	//ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER;
 
     err = wmf_play(API, 0, &(bbox));
 
@@ -178,11 +190,32 @@ wmf_render(PyObject *self, PyObject *args) {
     return ans;
 }
 
+#ifdef _WIN32
+void set_libwmf_fontdir(const char *);
+
+static PyObject *
+wmf_setfontdir(PyObject *self, PyObject *args) {
+    char *path;
+    if (!PyArg_ParseTuple(args, "s", &path))
+        return NULL;
+    set_libwmf_fontdir(path);
+
+    Py_RETURN_NONE;
+}
+#endif
+
+
+
 
 static PyMethodDef wmf_methods[] = {
     {"render", wmf_render, METH_VARARGS,
-        "render(path) -> Render wmf as svg."
+        "render(data) -> Render wmf as svg."
     },
+#ifdef _WIN32
+    {"set_font_dir", wmf_setfontdir, METH_VARARGS,
+        "set_font_dir(path) -> Set the path to the fonts dir on windows, must be called at least once before using render()"
+    },
+#endif
 
     {NULL}  /* Sentinel */
 };

From 06723a07483cfd59d63ffefc58ce1c53efaa5f92 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 11 Jan 2011 19:57:57 -0500
Subject: [PATCH 18/55] ...

---
 src/calibre/ebooks/fb2/fb2ml.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 037a805e74..7a618ab54a 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -166,7 +166,6 @@ class FB2MLizer(object):
             cover_item = self.oeb_book.manifest.ids[id]
             if cover_item.media_type in OEB_RASTER_IMAGES:
                 cover_href = cover_item.href
-            print 1
         else:
             # Figure out if we have a title page or a cover page
             page_name = ''
@@ -180,7 +179,6 @@ class FB2MLizer(object):
                 # Get the first image in the page
                 for img in cover_item.xpath('//img'):
                     cover_href = cover_item.abshref(img.get('src'))
-                    print cover_href
                     break
                 
         if cover_href:

From d2d65d805663ebfbfd11e723dc94c241bff30ce6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 11 Jan 2011 18:12:29 -0700
Subject: [PATCH 19/55] ...

---
 resources/recipes/tyzden.recipe | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/recipes/tyzden.recipe b/resources/recipes/tyzden.recipe
index c206244ff6..b8d7389fbe 100644
--- a/resources/recipes/tyzden.recipe
+++ b/resources/recipes/tyzden.recipe
@@ -28,7 +28,7 @@ class TyzdenRecipe(BasicNewsRecipe):
     if (weeknum > 1):
         weeknum -= 1
 
-    title = u'.tyzden ' + str(weeknum) + '/' + str(year)
+    title = u'tyzden'
 
     base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum)
     base_url = base_url_path + '.html'

From dbacc79d97ce0b0b42c3d559dbc3214400f64fa7 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 12 Jan 2011 09:45:50 +0000
Subject: [PATCH 20/55] Remove declarations of get_metadata cache counters

---
 src/calibre/library/database2.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index aa491aff28..d2654577b9 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -341,10 +341,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         self.has_id  = self.data.has_id
         self.count   = self.data.count
 
-        # Count times get_metadata is called, and how many times in the cache
-        self.gm_count  = 0
-        self.gm_missed = 0
-
         for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
                      'publisher', 'rating', 'series', 'series_index', 'tags',
                      'title', 'timestamp', 'uuid', 'pubdate', 'ondevice'):

From bf183471eaec193d6ff345199f4f2188c82116cd Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 12 Jan 2011 11:11:35 +0000
Subject: [PATCH 21/55] Fix problem in search -- the canidates parameter was
 not passed to get_dates_matches or get_numeric_matches, resulting in an
 exception.

---
 src/calibre/library/caches.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index 6aef45dbbd..4168360d3a 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -411,7 +411,8 @@ class ResultCache(SearchQueryParser): # {{{
             if isinstance(location, list):
                 if allow_recursion:
                     for loc in location:
-                        matches |= self.get_matches(loc, query, allow_recursion=False)
+                        matches |= self.get_matches(loc, query, candidates,
+                                                    allow_recursion=False)
                     return matches
                 raise ParseException(query, len(query), 'Recursive query group detected', self)
 
@@ -419,11 +420,11 @@ class ResultCache(SearchQueryParser): # {{{
                 fm = self.field_metadata[location]
                 # take care of dates special case
                 if fm['datatype'] == 'datetime':
-                    return self.get_dates_matches(location, query.lower())
+                    return self.get_dates_matches(location, query.lower(), candidates)
 
                 # take care of numbers special case
                 if fm['datatype'] in ('rating', 'int', 'float'):
-                    return self.get_numeric_matches(location, query.lower())
+                    return self.get_numeric_matches(location, query.lower(), candidates)
 
                 # take care of the 'count' operator for is_multiples
                 if fm['is_multiple'] and \
@@ -431,7 +432,8 @@ class ResultCache(SearchQueryParser): # {{{
                         query[1:1] in '=<>!':
                     vf = lambda item, loc=fm['rec_index'], ms=fm['is_multiple']:\
                             len(item[loc].split(ms)) if item[loc] is not None else 0
-                    return self.get_numeric_matches(location, query[1:], val_func=vf)
+                    return self.get_numeric_matches(location, query[1:],
+                                                    candidates, val_func=vf)
 
             # everything else, or 'all' matches
             matchkind = CONTAINS_MATCH

From b09b3fc993c7c7566e88861926ae9f22a893cd66 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 12 Jan 2011 11:13:14 +0000
Subject: [PATCH 22/55] Fix problem with plugin registration. If a plugin
 disappears without being correctly removed, then building the current action
 set threw an exception. The only way around it was to hack the preferences.
 The fix simply ignores missing actions.

---
 src/calibre/gui2/preferences/toolbar.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/preferences/toolbar.py b/src/calibre/gui2/preferences/toolbar.py
index c13d956aea..26cdea19d3 100644
--- a/src/calibre/gui2/preferences/toolbar.py
+++ b/src/calibre/gui2/preferences/toolbar.py
@@ -37,7 +37,10 @@ class BaseModel(QAbstractListModel):
                     dont_remove_from=set(['toolbar-device']))
         if name is None:
             return FakeAction('--- '+_('Separator')+' ---', None)
-        return gui.iactions[name]
+        try:
+            return gui.iactions[name]
+        except:
+            return None
 
     def rowCount(self, parent):
         return len(self._data)
@@ -124,7 +127,8 @@ class CurrentModel(BaseModel):
         BaseModel.__init__(self)
         self.gprefs_name = 'action-layout-'+key
         current = gprefs[self.gprefs_name]
-        self._data =  [self.name_to_action(x, gui) for x in current]
+        self._data = [self.name_to_action(x, gui) for x in current]
+        self._data = [x for x in self._data if x is not None]
         self.key = key
         self.gui = gui
 

From 3407f7cf0dd5ad76e43c94fad891469666bd8aa7 Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Wed, 12 Jan 2011 07:30:50 -0700
Subject: [PATCH 23/55] GwR add support for lists in notes field

---
 src/calibre/library/catalog.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index 349800c8ba..98cc4b7ecd 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -1524,19 +1524,19 @@ class EPUB_MOBI(CatalogPlugin):
                     this_title['formats'] = formats
 
                 # Add user notes to be displayed in header
-                # Special case handling for datetime fields
+                # Special case handling for datetime fields and lists
                 if self.opts.header_note_source_field:
                     field_md = self.__db.metadata_for_field(self.opts.header_note_source_field)
                     notes = self.__db.get_field(record['id'],
                                         self.opts.header_note_source_field,
                                         index_is_id=True)
-                    if notes and field_md['datatype'] == 'datetime':
-                        # Reformat date fields to match UI presentation: dd MMM YYYY
-                        notes = format_date(notes,'dd MMM yyyy')
-
                     if notes:
+                        if field_md['datatype'] == 'text' and isinstance(notes,list):
+                            notes = ' &middot; '.join(notes)
+                        elif field_md['datatype'] == 'datetime':
+                            notes = format_date(notes,'dd MMM yyyy')
                         this_title['notes'] = {'source':field_md['name'],
-                                               'content':notes}
+                                                   'content':notes}
 
                 titles.append(this_title)
 

From 34424068389e66c82156dca78b54175c114859cb Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 12 Jan 2011 15:32:00 +0000
Subject: [PATCH 24/55] Add ability to highlight when searching instead of
 restrict

---
 src/calibre/gui2/layout.py         | 14 ++++++++++----
 src/calibre/gui2/library/models.py | 22 ++++++++++++++++++++--
 src/calibre/gui2/search_box.py     |  4 ++++
 3 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/calibre/gui2/layout.py b/src/calibre/gui2/layout.py
index aaaf1b0267..4009f99201 100644
--- a/src/calibre/gui2/layout.py
+++ b/src/calibre/gui2/layout.py
@@ -8,9 +8,9 @@ __docformat__ = 'restructuredtext en'
 from functools import partial
 
 from PyQt4.Qt import QIcon, Qt, QWidget, QToolBar, QSize, \
-    pyqtSignal, QToolButton, QPushButton, \
-    QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup, \
-    QMenu
+    pyqtSignal, QToolButton, QMenu, QCheckBox, \
+    QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup
+
 
 from calibre.constants import __appname__
 from calibre.gui2.search_box import SearchBox2, SavedSearchBox
@@ -178,7 +178,9 @@ class SearchBar(QWidget): # {{{
         x.setToolTip(_("<p>Search the list of books by title, author, publisher, tags, comments, etc.<br><br>Words separated by spaces are ANDed"))
         l.addWidget(x)
 
-        self.search_button = QPushButton(_('&Go!'))
+        self.search_button = QToolButton()
+        self.search_button.setToolButtonStyle(Qt.ToolButtonTextOnly)
+        self.search_button.setText(_('&Go!'))
         l.addWidget(self.search_button)
         self.search_button.setSizePolicy(QSizePolicy.Minimum,
                 QSizePolicy.Minimum)
@@ -192,6 +194,10 @@ class SearchBar(QWidget): # {{{
         l.addWidget(x)
         x.setToolTip(_("Reset Quick Search"))
 
+        x = parent.search_highlight_only = QCheckBox()
+        x.setText(_('Highlight'))
+        l.addWidget(x)
+
         x = parent.saved_search = SavedSearchBox(self)
         x.setMaximumSize(QSize(150, 16777215))
         x.setMinimumContentsLength(15)
diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py
index 49cb1ce182..98e61acf33 100644
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@@ -10,7 +10,7 @@ from contextlib import closing
 from operator import attrgetter
 
 from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \
-        QModelIndex, QVariant, QDate
+        QModelIndex, QVariant, QDate, QColor
 
 from calibre.gui2 import NONE, config, UNDEFINED_QDATE
 from calibre.utils.pyparsing import ParseException
@@ -93,6 +93,8 @@ class BooksModel(QAbstractTableModel): # {{{
         self.bool_no_icon = QIcon(I('list_remove.png'))
         self.bool_blank_icon = QIcon(I('blank.png'))
         self.device_connected = False
+        self.rows_matching = set()
+        self.highlight_only = False
         self.read_config()
 
     def change_alignment(self, colname, alignment):
@@ -229,9 +231,22 @@ class BooksModel(QAbstractTableModel): # {{{
             self.endInsertRows()
             self.count_changed()
 
+    def set_highlight_only(self, toWhat):
+        self.highlight_only = toWhat
+        self.research()
+
     def search(self, text, reset=True):
         try:
-            self.db.search(text)
+            if self.highlight_only:
+                self.db.search('')
+                if not text:
+                    self.rows_matching = set()
+                else:
+                    self.rows_matching = set(self.db.search(text,
+                                                            return_matches=True))
+            else:
+                self.rows_matching = set()
+                self.db.search(text)
         except ParseException as e:
             self.searched.emit(e.msg)
             return
@@ -651,6 +666,9 @@ class BooksModel(QAbstractTableModel): # {{{
             return NONE
         if role in (Qt.DisplayRole, Qt.EditRole):
             return self.column_to_dc_map[col](index.row())
+        elif role == Qt.BackgroundColorRole:
+            if self.id(index) in self.rows_matching:
+                return QColor('lightgreen')
         elif role == Qt.DecorationRole:
             if self.column_to_dc_decorator_map[col] is not None:
                 return self.column_to_dc_decorator_map[index.column()](index.row())
diff --git a/src/calibre/gui2/search_box.py b/src/calibre/gui2/search_box.py
index 9f74abfc86..75d3c14ef1 100644
--- a/src/calibre/gui2/search_box.py
+++ b/src/calibre/gui2/search_box.py
@@ -375,6 +375,7 @@ class SearchBoxMixin(object): # {{{
             unicode(self.search.toolTip())))
         self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip())
         self.clear_button.setStatusTip(self.clear_button.toolTip())
+        self.search_highlight_only.stateChanged.connect(self.highlight_only_changed)
 
     def focus_search_box(self, *args):
         self.search.setFocus(Qt.OtherFocusReason)
@@ -401,6 +402,9 @@ class SearchBoxMixin(object): # {{{
     def focus_to_library(self):
         self.current_view().setFocus(Qt.OtherFocusReason)
 
+    def highlight_only_changed(self, toWhat):
+        self.current_view().model().set_highlight_only(toWhat)
+
     # }}}
 
 class SavedSearchBoxMixin(object): # {{{

From 32084ebd9ad13026b1f382ccac25e152314e9484 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 08:53:26 -0700
Subject: [PATCH 25/55] Improved Expansion.com. Fixes #405 (New news feed)

---
 resources/recipes/expansion_spanish.recipe | 116 ++++++++++++---------
 1 file changed, 68 insertions(+), 48 deletions(-)

diff --git a/resources/recipes/expansion_spanish.recipe b/resources/recipes/expansion_spanish.recipe
index 31a1504eb0..f2229e90e6 100644
--- a/resources/recipes/expansion_spanish.recipe
+++ b/resources/recipes/expansion_spanish.recipe
@@ -1,59 +1,79 @@
 #!/usr/bin/env  python
-# -*- coding: utf-8 -*-
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__author__    = 'Gerardo Diez'
+__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
+description   = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
+__docformat__ = 'restructuredtext en'
+
 '''
-www.expansion.com
+expansion.es
 '''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+class Publico(BasicNewsRecipe):
+    title               =u'Expansion.com'
+    __author__      ='Gerardo Diez'
+    publisher       =u'Unidad Editorial Información Económica, S.L.'
+    category                ='finances, catalunya'
+    oldest_article      =1
+    max_articles_per_feed   =100
+    simultaneous_downloads  =10
+    cover_url       =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
+    timefmt         ='[%A, %d %B, %Y]'
+    encoding        ='latin'
+    language        ='es'
+    remove_javascript   =True
+    no_stylesheets      =True
+    keep_only_tags      =dict(name='div', attrs={'class':['noticia primer_elemento']})
+    remove_tags         =[
+                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
+                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
+                dict(name='span', attrs={'class':['comentarios']}),
+                dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
+                dict(name='div', attrs={'id':['comentarios_lectores_listado']})
+                            ]
+    feeds               =[
+                (u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
+                (u'Portada: Bolsas', u'http://estaticos.expansion.com/rss/mercados.xml'),
+                (u'Divisas', u'http://estaticos.expansion.com/rss/mercadosdivisas.xml'),
+                (u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
+                (u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
+                (u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),
 
-from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag
+                (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
+                (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
+                (u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
+                (u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
+                (u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
+                (u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),
 
-class Expansion(BasicNewsRecipe):
-    title                 = 'Diario Expansion'
-    __author__            = 'Darko Miletic'
-    description           = 'Lider de informacion de mercados, economica y politica'
-    publisher             = 'expansion.com'
-    category              = 'news, politics, Spain'
-    oldest_article        = 2
-    max_articles_per_feed = 100
-    no_stylesheets        = True
-    use_embedded_content  = False
-    delay                 = 1
-    encoding              = 'iso-8859-15'
-    language = 'es'
+                (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
+                (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
+                (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
+                (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
+                (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
+                (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
+                (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
+                (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
+                (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
+                (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
+                (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
+                (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),
 
-    direction             = 'ltr'
+                (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
+                (u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
+                (u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),
 
-    html2lrf_options = [
-                          '--comment'  , description
-                        , '--category' , category
-                        , '--publisher', publisher
-                        ]
+                (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
+                (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
+                (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),
 
-    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+                (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
+                (u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
+                (u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
+                (u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),
 
-    feeds              = [
-                            (u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178')
-                           ,(u'Temas del dia'   , u'http://rss.expansion.com/rss/descarga.htm?data2=178')
-                         ]
-
-
-    keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
-
-    remove_tags        = [
-                             dict(name=['object','link','script'])
-                            ,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
-                         ]
-
-    remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
-
-    def preprocess_html(self, soup):
-        soup.html['dir' ] = self.direction
-        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
-        soup.head.insert(0,mcharset)
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
+                (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
+                (u'Catalu&ntilde;a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
+                (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
+                ]
 

From d21021ed8f2b4e872faf19263727e531570e4923 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 12 Jan 2011 15:54:09 +0000
Subject: [PATCH 26/55] Clean up focus issues

---
 src/calibre/gui2/library/views.py | 6 +++++-
 src/calibre/gui2/search_box.py    | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py
index e1e9cf4456..357b48d1de 100644
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@@ -680,8 +680,12 @@ class BooksView(QTableView): # {{{
     def set_editable(self, editable, supports_backloading):
         self._model.set_editable(editable)
 
+    def search_proxy(self, txt):
+        self._model.search(txt)
+        self.setFocus(Qt.OtherFocusReason)
+
     def connect_to_search_box(self, sb, search_done):
-        sb.search.connect(self._model.search)
+        sb.search.connect(self.search_proxy)
         self._search_done = search_done
         self._model.searched.connect(self.search_done)
 
diff --git a/src/calibre/gui2/search_box.py b/src/calibre/gui2/search_box.py
index 75d3c14ef1..5808a2dc46 100644
--- a/src/calibre/gui2/search_box.py
+++ b/src/calibre/gui2/search_box.py
@@ -404,6 +404,7 @@ class SearchBoxMixin(object): # {{{
 
     def highlight_only_changed(self, toWhat):
         self.current_view().model().set_highlight_only(toWhat)
+        self.focus_to_library()
 
     # }}}
 

From 4dde6b9675ad92b16cfc9bdcfa10db8cbcc0adbc Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 08:54:42 -0700
Subject: [PATCH 27/55] Fix #8263 (MSNBC recipe)

---
 resources/recipes/msnbc.recipe | 58 ++++++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 14 deletions(-)

diff --git a/resources/recipes/msnbc.recipe b/resources/recipes/msnbc.recipe
index 6e2fc50aaa..f093479e2f 100644
--- a/resources/recipes/msnbc.recipe
+++ b/resources/recipes/msnbc.recipe
@@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 msnbc.msn.com
 '''
@@ -19,7 +19,16 @@ class MsNBC(BasicNewsRecipe):
     publisher              = 'msnbc.com'
     category               = 'news, USA, world'
     language               = 'en'
-    extra_css              = ' body{ font-family: sans-serif } .head{font-family: serif; font-size: xx-large; font-weight: bold; color: #CC0000} .abstract{font-weight: bold} .source{font-size: small} .updateTime{font-size: small} '
+    extra_css              = """ 
+                                body{ font-family: Georgia,Times,serif }
+                                .hide{display: none}
+                                .caption{font-family: Arial,sans-serif; font-size: x-small}
+                                .entry-summary{font-family: Arial,sans-serif}
+                                .copyright{font-size: 0.95em; font-style: italic}
+                                .source-org{font-size: small; font-family: Arial,sans-serif}
+                                img{display: block; margin-bottom: 0.5em}
+                                span.byline{display: none}
+                            """
 
     conversion_options = {
                              'comments' : description
@@ -28,14 +37,20 @@ class MsNBC(BasicNewsRecipe):
                             ,'publisher': publisher
                          }
 
-    preprocess_regexps = [
-        (re.compile(r'</style></head>', re.DOTALL|re.IGNORECASE),lambda match: '</style>')
-       ,(re.compile(r'<div class="head">', re.DOTALL|re.IGNORECASE),lambda match: '</head><body><div class="head">'),
-    ]
+    remove_tags_before = dict(name='h1', attrs={'id':'headline'})
+    remove_tags_after = dict(name='span', attrs={'class':['copyright','Linear copyright']})
+    keep_only_tags=[
+                      dict(attrs={'id':['headline','deck','byline','source','intelliTXT']})
+                     ,dict(attrs={'class':['gl_headline','articleText','drawer-content Linear','v-center3','byline','textBodyBlack']})
+                   ]
+    remove_attributes=['property','lang','rel','xmlns:fb','xmlns:v','xmlns:dc','xmlns:dcmitype','xmlns:og','xmlns:media','xmlns:vcard','typeof','itemscope','itemtype','itemprop','about','type','size','width','height','onreadystatechange','data','border','hspace','vspace']
+    
+    remove_tags      = [
+                          dict(name=['iframe','object','link','embed','meta','table'])
+                         ,dict(name='span', attrs={'class':['copyright','Linear copyright']})
+                         ,dict(name='div', attrs={'class':'social'})
+                       ]
 
-    remove_tags_before = dict(name='div', attrs={'class':'head'})
-    remove_tags_after = dict(name='div', attrs={'class':'copyright'})
-    remove_tags      = [dict(name=['iframe','object','link','script','form'])]
 
     feeds = [
                (u'US News'       , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml'      )
@@ -48,11 +63,26 @@ class MsNBC(BasicNewsRecipe):
               ,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml'      )
             ]
 
-    def print_version(self, url):
-        return url + 'print/1/displaymode/1098/'
-
     def preprocess_html(self, soup):
-        for item in soup.head.findAll('div'):
-            item.extract()
+        for item in soup.body.findAll('html'):
+            item.name='div'
+        for item in soup.body.findAll('div'):
+            if item.has_key('id') and item['id'].startswith('vine-'):
+               item.extract()
+            if item.has_key('class') and ( item['class'].startswith('ad') or item['class'].startswith('vine')):
+               item.extract()            
+        for item in soup.body.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        for item in soup.body.findAll('ol'):
+            if item.has_key('class') and item['class'].startswith('grid'):
+               item.extract()
+        for item in soup.body.findAll('span'):
+            if ( item.has_key('id') and item['id'].startswith('byLine') and item.string is None) or ( item.has_key('class') and item['class'].startswith('inline') ):
+               item.extract()
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)    
         return soup
 

From fcbc4b331b776d1d52008be895fd672675fc6346 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 08:57:47 -0700
Subject: [PATCH 28/55] ZeroHedge by DM. Fixes #8289 (New recipe for economic
 blog Zero Hedge)

---
 resources/images/news/zerohedge.png | Bin 0 -> 3120 bytes
 resources/recipes/zerohedge.recipe  |  33 ++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 resources/images/news/zerohedge.png
 create mode 100644 resources/recipes/zerohedge.recipe

diff --git a/resources/images/news/zerohedge.png b/resources/images/news/zerohedge.png
new file mode 100644
index 0000000000000000000000000000000000000000..a2bc6cde143d007fed9d275221ce49a06bbc7371
GIT binary patch
literal 3120
zcmV-04A1k4P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00009a7bBm001r{
z001r{0eGc9b^rhhPiaF#P*7-ZbZ>KLZ*U+<Lqi~Na&Km7Y-Iodc-oy)XH-+^7Crag
z^g>IBfRsybQWXdwQbLP>6p<z>Aqfylh#{fb6;Z(vMMVS~$e@S=j*ftg6;Uh<iVD~V
z<RPMtgQJLw%KPDaqifc@_vX$1wbwr9tn;0-&j-K=43<bUQ8j=JsX`tR;Dg7+#^K~H
zK!FM*Z~zbpvt%K2{UZSY_<lS*D<Z%Lz5oGu(+dayz)hRLFdT>f59&ghTmgWD0l;*T
zI7<kC6aYYajzXpYKt=(8otP$50H6c_V9R4-;{Z@C0AMG7=F<Rxo%or10RUT+Ar%3j
zkpLhQWr#!oXgdI`&sK^>09Y^p6lP1rIRMx#05C~cW=H_Aw*bJ-5DT&Z2n+x)QHX^p
z00esgV8|mQcmRZ%02D^@S3L16t`O%c004NIvOKvYIYoh62rY33S640`D9%Y2D-<?i
z0%4j!F2Z@488U%158(66005wo6%pWr^Zj_v4zAA5HjcIqUoGmt2LB>rV&neh&#Q1i
z007~1e$oCcFS8neI|hJl{-P!B1ZZ9hpmq0)X0i`JwE&>$+E?>%_<lS*MWK+n+1cgf
z<k(8YLR(?VSAG6x!e78w{cQPuJpA|d;J)G{fihizM+Erb!p!tcr5w+a34~(Y=8s4G
zw+sLL9n&JjNn*KJDiq^U5^;`1nvC-@r6P$!k}1U{(*I=Q-z@tBKHoI}uxdU5dyy@u
zU1J0GOD7Ombim^G008p4Z^6_k2m^p<gW=D2|L;HjN1!DDfM!XOaR2~bL?kX$%CkSm
z2mk;?pn)o|K^yeJ7%adB9Ki+L!3+FgHiSYX#KJ-lLJDMn9CBbOtb#%)hRv`YDqt_v
zKpix|QD}yfa1JiQRk#j4a1Z)n2%f<xynzV>LC6RbVIkUx0b+_+BaR3cnT7Zv!AJxW
zizFb)h!jyGOOZ85F;a?DAXP{m@;!0_Ifq<Ex{*7`05XF7hP+2Hl!3BQJ=6@fL%FCo
z8iYoo3(#bAF`ADSpqtQgv>H8(HlgRxt7s3}k3K`kFu>>-2Q$QMFfPW!La{h336o>X
zu_CMttHv6zR;&ZNiS=X8v3CR#fknUxHUxJ<AYmRsNLWl*PS{AOARHt#5!wki2?K;t
z!Y3k=s7tgax)J%r7-BLphge7~Bi0g+6E6^Zh(p9TBoc{3GAFr^0!gu?RMHaCM$&Fl
zBk3%un>0uoBa_M6WNWeqIg~6QE69c9o#eyhGvpiOA@W-aonk<7r1(?fC{oI5N*U!4
z<uv66WtcKSRim0x-Ke2d5jBrmLam{;Qm;{ms1r1GnmNsb7D-E`t)i9F8fX`2_i3-_
zbh;7Ul^#x)&{xvS=|||7=mYe33=M`AgU5(xC>fg=2N-7=cNnjjOr{yriy6mMFgG#l
znCF=fnQv8CDz++o6_Lscl}eQ+l^ZHARH>?_s@|##Rr6KLRFA1%Q+=*RRWnoLsR`7U
zt5vF<Q0r40Q)j6=sE4X&sBct1q<&fbi3VB2Ov6t@q*0);U*o*SAPZv|vv@2aYYnT0
zb%8a+Cb7-ge0D0knEf5Qi#@8Tp*ce{N;6lpQuCB%KL_KOarm5cP6_8Ir<e17iry6O
zDdH&`rZh~sF=bq9s+O0QSgS~@QL9Jmy*94xr=6y~MY~!1fet~(N+(<=M`w@D1)b+p
z*;C!83a1uLJv#NSE~;y#8=<>IcfW3@?wFpwUVxrVZ>QdQz32KIeJ}k~{cZZE^+ya?
z2D1z#2HOnI7(B%_ac?{wFUQ;QQA1tBKtrWrm0_3Rgps+?Jfqb{jYbcQX~taRB;#$y
zZN{S}1|}gUOHJxc?wV3fxuz+mJ4`!F$IZ;mqRrNsHJd##*D~ju=bP7?-?v~|cv>vB
zsJ6IeNwVZxrdjT`yl#bBIa#GxRa#xMMy;K#CDyyGyQdMSxlWT#tDe?p!?5wT$+oGt
z8L;Kp2HUQ-ZMJ=3XJQv;x5ci*?vuTfeY$;({XGW_huIFR9a<fJbF^|4I#xQ~n$Dc=
zKYhjYmgz5NSkDm8*fZm{6U!;YX`NG>(?@3)XSs8O^N5RyOM=TTmp(3=8^+zpz2r)C
z^>JO{deZfso3oq3?Wo(Y?l$ge?uXo;%ru`Vo>?<<(8I_>;8Eq#KMS9gFl*neeosSB
zfoHYnBQIkwkyowPu(zdms`p{<7e4kra-ZWq<2*OsGTvEV%s0Td$hXT+!*8Bnh2KMe
zBmZRodjHV?r+_5^X9J0WL4jKW`}lf%A-|44I@@LTvf1rHjG(ze6+w@Jt%Bvjts!X0
z?2xS?_ve_-k<Mujg;0Lz*3buG=3$G&ehepthlN*$KaOySSQ^nWmo<0M+(UEUMEXRQ
zMBbZcF;6+KElM>iKB_KiJlZ$9G`c^=E@oNG)mWWaNo-3TIW8)$Hg0Ub-~8?KhvJ>$
z3*&nim@mj(aCxE5!t{lw7O5^0EIO7zOo&c6l<+|iDySBWCGrz@C5{St!X3hAA}`T4
z(TLbXTq+(;@<=L8dXnssyft|w#WSTW<++3>sgS%(4NTpeI-VAqb|7ssJvzNHgOZVu
zaYCvgO_R1~>SyL=cFU|~g|hy|Zi}}s9+d~lYqOB71z9Z$wnC=pR9Yz4DhIM>Wmjgu
z&56o6maCpC&F##y%G;1PobR9i?GnNg;gYtchD%p19a!eQtZF&3JaKv33gZ<8D~47E
ztUS1iwkmDaPpj=$m#%)jCVEY4fnLGNg2A-`YwHVD3gv};>)hAvT~AmqS>Lr``i7kw
zJ{5_It`yrBmlc25DBO7E8;5VoznR>Ww5hAaxn$2~(q`%A-YuS64wkBy=9dm`4cXeX
z4c}I@?e+FW+b@^RDBHV(wnMq2zdX3SWv9u`%{xC-q*U}&`cyXV(%rRT*Z6MH?i+i&
z_B8C(+grT%{XWUQ+f@NoP1R=AW&26{v-dx)iK^-Nmiuj8txj!m?Z*Ss1N{dh4z}01
z)YTo*JycSU)+_5r4#yw9{+;i4Ee$peRgIj+;v;ZGdF1K$3E%e~4LaI(jC-u%2h$&R
z9cLXcYC@Xwnns&bn)_Q~Te?roKGD|d-g^8;+aC{{G(1^(O7m37Y1-+6)01cN&y1aw
zoqc{T`P^XJqPBbIW6s}d4{z_f5Om?vMgNQEJG?v2T=KYd^0M3I6IZxbny)%vZR&LD
zJpPl@Psh8QyPB@KTx+@RdcC!KX7}kEo;S|j^u2lU7XQ}Oo;f|;z4Ll+_r>@1-xl3|
zawq-H%e&ckC+@AhPrP6BK<z=<L*0kfKU@CX*zeqbYQT4(^U>T#_XdT7&;F71j}Joy
zkC~6lh7E@6o;W@^IpRNZ{ptLtL(gQ-CY~4mqW;US7Zxvm_|@yz&e53Bp_lTPlfP|z
zrTyx_>lv@x#=^!PzR7qqF<$gm`|ZJZ+;<)Cqu&ot<a{81DF0~rvGr5Xr~8u`lav1h
z1DNytV>2z=0000WV@Og>004R=004l4008;_004mL004C`008P>0026e000+nl3&F}
z00047Nkl<Zc-m~zKS-8Q6vy$85oi)YM5*ZTYUa>TLpTWjTf)X@)Le=-HxW^BOWd5A
zj9f$ob$78LQUX_3d39*0KoHSNv|g9<%H{DMxSVq?=lPy<p8Kn*R4Up_tqS%UWB7qb
zXvGpn@v~aAcd+k<>o$)?%wPZ=Sin#K`_tkgU<U7^Xf^IG@D?8<|BQwl;0Y%2B~H;M
zl5gJU0OR4e<n0h&BOXL&3V4Q>5jUZf0X)Y4i0jdl0=nYz6wP>%0lX?q+y*xzc^LjL
zjxmLQxF21A$FpukO<kNT!0qt0v1>~}PNHiq?!<&uz)=b~OF)g-#Ph-mYiJJ`?4|#p
zoD|*-)fH_$*0~kkw^P6t{v?2$4z_Wgz;;u>4ptIDon`>v@gU;aoRB6$wlJLmbmM8L
zwVVU|LT|()_=K9sheL%Q((dUiRP7t?VGeK5hl6<5AuJcix*PyDC`uG!WXfa!0000<
KMNUMnLSTYHtj}-&

literal 0
HcmV?d00001

diff --git a/resources/recipes/zerohedge.recipe b/resources/recipes/zerohedge.recipe
new file mode 100644
index 0000000000..09f62e5b52
--- /dev/null
+++ b/resources/recipes/zerohedge.recipe
@@ -0,0 +1,33 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.zerohedge.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ZeroHedge(BasicNewsRecipe):
+    title                  = 'Zero Hedge'
+    __author__             = 'Darko Miletic'
+    description            = 'On a long enough timeline the survival rate for everyone drops to zero'
+    oldest_article         = 10
+    max_articles_per_feed  = 100
+    no_stylesheets         = True
+    use_embedded_content   = True
+    encoding               = 'utf8'
+    publisher              = 'zero hedge'
+    category               = 'news, USA, world, economy, politics'
+    language               = 'en'
+    masthead_url           = 'http://www.zerohedge.com/themes/newsflash/logo.png'
+    publication_type       = 'blog'
+    extra_css              = 'body{ font-family: sans-serif }'
+
+    conversion_options = {
+                             'comments' : description
+                            ,'tags'     : category
+                            ,'language' : language
+                            ,'publisher': publisher
+                         }
+
+
+    feeds = [(u'Articles', u'http://feeds.feedburner.com/zerohedge/feed')]

From e3bbb4a0dec720a0b511f5a0299dd8297161cba0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 09:11:02 -0700
Subject: [PATCH 29/55] ...

---
 src/calibre/library/caches.py    | 10 ++++++----
 src/calibre/library/database2.py |  4 ----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index 6aef45dbbd..4168360d3a 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -411,7 +411,8 @@ class ResultCache(SearchQueryParser): # {{{
             if isinstance(location, list):
                 if allow_recursion:
                     for loc in location:
-                        matches |= self.get_matches(loc, query, allow_recursion=False)
+                        matches |= self.get_matches(loc, query, candidates,
+                                                    allow_recursion=False)
                     return matches
                 raise ParseException(query, len(query), 'Recursive query group detected', self)
 
@@ -419,11 +420,11 @@ class ResultCache(SearchQueryParser): # {{{
                 fm = self.field_metadata[location]
                 # take care of dates special case
                 if fm['datatype'] == 'datetime':
-                    return self.get_dates_matches(location, query.lower())
+                    return self.get_dates_matches(location, query.lower(), candidates)
 
                 # take care of numbers special case
                 if fm['datatype'] in ('rating', 'int', 'float'):
-                    return self.get_numeric_matches(location, query.lower())
+                    return self.get_numeric_matches(location, query.lower(), candidates)
 
                 # take care of the 'count' operator for is_multiples
                 if fm['is_multiple'] and \
@@ -431,7 +432,8 @@ class ResultCache(SearchQueryParser): # {{{
                         query[1:1] in '=<>!':
                     vf = lambda item, loc=fm['rec_index'], ms=fm['is_multiple']:\
                             len(item[loc].split(ms)) if item[loc] is not None else 0
-                    return self.get_numeric_matches(location, query[1:], val_func=vf)
+                    return self.get_numeric_matches(location, query[1:],
+                                                    candidates, val_func=vf)
 
             # everything else, or 'all' matches
             matchkind = CONTAINS_MATCH
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index aa491aff28..d2654577b9 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -341,10 +341,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         self.has_id  = self.data.has_id
         self.count   = self.data.count
 
-        # Count times get_metadata is called, and how many times in the cache
-        self.gm_count  = 0
-        self.gm_missed = 0
-
         for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
                      'publisher', 'rating', 'series', 'series_index', 'tags',
                      'title', 'timestamp', 'uuid', 'pubdate', 'ondevice'):

From 40b05944c2911e1d1ca5345d0dfc64b99be5d5ea Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 09:12:56 -0700
Subject: [PATCH 30/55] ...

---
 src/calibre/gui2/preferences/toolbar.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/preferences/toolbar.py b/src/calibre/gui2/preferences/toolbar.py
index c13d956aea..26cdea19d3 100644
--- a/src/calibre/gui2/preferences/toolbar.py
+++ b/src/calibre/gui2/preferences/toolbar.py
@@ -37,7 +37,10 @@ class BaseModel(QAbstractListModel):
                     dont_remove_from=set(['toolbar-device']))
         if name is None:
             return FakeAction('--- '+_('Separator')+' ---', None)
-        return gui.iactions[name]
+        try:
+            return gui.iactions[name]
+        except:
+            return None
 
     def rowCount(self, parent):
         return len(self._data)
@@ -124,7 +127,8 @@ class CurrentModel(BaseModel):
         BaseModel.__init__(self)
         self.gprefs_name = 'action-layout-'+key
         current = gprefs[self.gprefs_name]
-        self._data =  [self.name_to_action(x, gui) for x in current]
+        self._data = [self.name_to_action(x, gui) for x in current]
+        self._data = [x for x in self._data if x is not None]
         self.key = key
         self.gui = gui
 

From d92c03de7d18bca07dff9c7247591708304b6802 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 09:21:20 -0700
Subject: [PATCH 31/55] Updated MIT Technology Review

---
 resources/recipes/technology_review.recipe | 23 +++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/resources/recipes/technology_review.recipe b/resources/recipes/technology_review.recipe
index cc8f13733e..e7cc6700d7 100644
--- a/resources/recipes/technology_review.recipe
+++ b/resources/recipes/technology_review.recipe
@@ -35,7 +35,6 @@ class TechnologyReview(BasicNewsRecipe):
     def get_article_url(self, article):
         return article.get('guid', article.get('id', None))
 
-
     def print_version(self, url):
         baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
         split1 = string.split(url,"/")
@@ -43,3 +42,25 @@ class TechnologyReview(BasicNewsRecipe):
         split2= string.split(xxx,"/")
         s =  baseurl + split2[0]
         return s
+
+
+    def postprocess_html(self,soup, True):
+        #remove picture
+        headerhtml = soup.find(True, {'class':'header'})
+        headerhtml.replaceWith("")
+
+        #remove close button
+        closehtml = soup.find(True, {'class':'close'})
+        closehtml.replaceWith("")
+
+        #remove banner advertisement
+        bannerhtml = soup.find(True, {'class':'bannerad'})
+        bannerhtml.replaceWith("")
+
+        #thanks kiklop74!  This code removes all links from the text
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
+
+        return soup

From ccc0c02c67dceca4dcfe527bf080bf08161d456b Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 12 Jan 2011 16:43:47 +0000
Subject: [PATCH 32/55] Add arbitrary python function evaluation to formatter

---
 src/calibre/utils/formatter.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py
index 40760bf91b..0b5f1d1f52 100644
--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@@ -18,6 +18,24 @@ class _Parser(object):
     LEX_NUM = 4
     LEX_EOF = 5
 
+    def _python(self, func):
+        locals = {}
+        exec func in locals
+        if 'evaluate' not in locals:
+            self.error('no evaluate function in python')
+        try:
+            result = locals['evaluate'](self.parent.kwargs)
+            if isinstance(result, (float, int)):
+                result = unicode(result)
+            elif isinstance(result, list):
+                result = ','.join(result)
+            elif isinstance(result, str):
+                result = unicode(result)
+            return result
+        except Exception as e:
+            self.error('python function threw exception: ' + e.msg)
+
+
     def _strcmp(self, x, y, lt, eq, gt):
         v = strcmp(x, y)
         if v < 0:
@@ -79,6 +97,7 @@ class _Parser(object):
             'field'    : (1, lambda s, x: s.parent.get_value(x, [], s.parent.kwargs)),
             'multiply' : (2, partial(_math, op='*')),
             'print'    : (-1, _print),
+            'python'   : (1, _python),
             'strcat'   : (-1, _concat),
             'strcmp'   : (5, _strcmp),
             'substr'   : (3, lambda s, x, y, z: x[int(y): len(x) if int(z) == 0 else int(z)]),
@@ -362,7 +381,7 @@ class TemplateFormatter(string.Formatter):
                 (r'\'.*?((?<!\\)\')',   lambda x,t: (3, t[1:-1])),
                 (r'\n#.*?(?=\n)',       None),
                 (r'\s',                 None)
-        ])
+        ], flags=re.DOTALL)
 
     def _eval_program(self, val, prog):
         # keep a cache of the lex'ed program under the theory that re-lexing

From 7e878120270381b77cb34f732109219f39ecce66 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 12 Jan 2011 17:18:06 +0000
Subject: [PATCH 33/55] Remember the state of the highlight_only check box.
 Scroll to first matching row.

---
 src/calibre/gui2/library/models.py | 12 +++++++++---
 src/calibre/gui2/library/views.py  |  2 ++
 src/calibre/gui2/search_box.py     |  4 ++++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py
index 98e61acf33..38a4b28744 100644
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@@ -94,6 +94,7 @@ class BooksModel(QAbstractTableModel): # {{{
         self.bool_blank_icon = QIcon(I('blank.png'))
         self.device_connected = False
         self.rows_matching = set()
+        self.lowest_row_matching = None
         self.highlight_only = False
         self.read_config()
 
@@ -233,7 +234,8 @@ class BooksModel(QAbstractTableModel): # {{{
 
     def set_highlight_only(self, toWhat):
         self.highlight_only = toWhat
-        self.research()
+        if self.last_search:
+            self.research()
 
     def search(self, text, reset=True):
         try:
@@ -241,11 +243,15 @@ class BooksModel(QAbstractTableModel): # {{{
                 self.db.search('')
                 if not text:
                     self.rows_matching = set()
+                    self.lowest_row_matching = None
                 else:
-                    self.rows_matching = set(self.db.search(text,
-                                                            return_matches=True))
+                    self.rows_matching = self.db.search(text, return_matches=True)
+                    if self.rows_matching:
+                        self.lowest_row_matching = self.db.row(self.rows_matching[0])
+                    self.rows_matching = set(self.rows_matching)
             else:
                 self.rows_matching = set()
+                self.lowest_row_matching = None
                 self.db.search(text)
         except ParseException as e:
             self.searched.emit(e.msg)
diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py
index 357b48d1de..07c3cc21e4 100644
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@@ -682,6 +682,8 @@ class BooksView(QTableView): # {{{
 
     def search_proxy(self, txt):
         self._model.search(txt)
+        if self._model.lowest_row_matching:
+            self.scroll_to_row(self._model.lowest_row_matching)
         self.setFocus(Qt.OtherFocusReason)
 
     def connect_to_search_box(self, sb, search_done):
diff --git a/src/calibre/gui2/search_box.py b/src/calibre/gui2/search_box.py
index 5808a2dc46..e4073a01c9 100644
--- a/src/calibre/gui2/search_box.py
+++ b/src/calibre/gui2/search_box.py
@@ -16,6 +16,7 @@ from calibre.gui2 import config
 from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor
 from calibre.gui2.dialogs.search import SearchDialog
+from calibre.utils.config import dynamic
 from calibre.utils.search_query_parser import saved_searches
 from calibre.utils.icu import sort_key
 
@@ -376,6 +377,8 @@ class SearchBoxMixin(object): # {{{
         self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip())
         self.clear_button.setStatusTip(self.clear_button.toolTip())
         self.search_highlight_only.stateChanged.connect(self.highlight_only_changed)
+        self.search_highlight_only.setChecked(
+                            dynamic.get('search_highlight_only', False))
 
     def focus_search_box(self, *args):
         self.search.setFocus(Qt.OtherFocusReason)
@@ -403,6 +406,7 @@ class SearchBoxMixin(object): # {{{
         self.current_view().setFocus(Qt.OtherFocusReason)
 
     def highlight_only_changed(self, toWhat):
+        dynamic.set('search_highlight_only', toWhat)
         self.current_view().model().set_highlight_only(toWhat)
         self.focus_to_library()
 

From 2fca19878162b49847ae3a855c7a4d4d95822c19 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 12 Jan 2011 17:28:12 +0000
Subject: [PATCH 34/55] Select the row when scrolling to it

---
 src/calibre/gui2/library/views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py
index 07c3cc21e4..e72d0c32a1 100644
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@@ -683,7 +683,7 @@ class BooksView(QTableView): # {{{
     def search_proxy(self, txt):
         self._model.search(txt)
         if self._model.lowest_row_matching:
-            self.scroll_to_row(self._model.lowest_row_matching)
+            self.select_rows([self._model.lowest_row_matching], using_ids=False)
         self.setFocus(Qt.OtherFocusReason)
 
     def connect_to_search_box(self, sb, search_done):

From d7ea545df60327df0e16416aacac33aab6df21c2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 10:35:05 -0700
Subject: [PATCH 35/55] Fix Wired Daily

---
 resources/recipes/wired_daily.recipe | 53 ++++++++++++++++++----------
 1 file changed, 34 insertions(+), 19 deletions(-)

diff --git a/resources/recipes/wired_daily.recipe b/resources/recipes/wired_daily.recipe
index f06d28796e..df59c7c826 100644
--- a/resources/recipes/wired_daily.recipe
+++ b/resources/recipes/wired_daily.recipe
@@ -2,8 +2,10 @@
 __license__   = 'GPL v3'
 __docformat__ = 'restructuredtext en'
 
+import re
 
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.chardet import xml_to_unicode
 
 class Wired_Daily(BasicNewsRecipe):
 
@@ -15,30 +17,43 @@ class Wired_Daily(BasicNewsRecipe):
 
     no_stylesheets = True
 
+    preprocess_regexps = [(re.compile(r'<head.*</head>', re.DOTALL), lambda m:
+        '<head></head>')]
+
     remove_tags_before = dict(name='div', id='content')
-    remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
-        'footer', 'advertisement', 'blog_subscription_unit',
-        'brightcove_component']),
-        {'class':'entryActions'},
-        dict(name=['noscript', 'script'])]
+    remove_tags = [dict(id=['header', 'commenting_module', 'post_nav',
+        'social_tools', 'sidebar', 'footer', 'social_wishlist', 'pgwidget',
+        'outerWrapper', 'inf_widget']),
+        {'class':['entryActions', 'advertisement', 'entryTags']},
+        dict(name=['noscript', 'script']),
+        dict(name='h4', attrs={'class':re.compile(r'rat\d+')}),
+        {'class':lambda x: x and x.startswith('contentjump')},
+        dict(name='li', attrs={'class':['entryCategories', 'entryEdit']})]
+
 
     feeds = [
         ('Top News', 'http://feeds.wired.com/wired/index'),
-        ('Culture', 'http://feeds.wired.com/wired/culture'),
-        ('Software', 'http://feeds.wired.com/wired/software'),
-        ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
-        ('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
-        ('Cars', 'http://feeds.wired.com/wired/cars'),
-        ('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
-        ('Gaming', 'http://feeds.wired.com/wired/gaming'),
-        ('Science', 'http://feeds.wired.com/wired/science'),
-        ('Med Tech', 'http://feeds.wired.com/wired/medtech'),
-        ('Politics', 'http://feeds.wired.com/wired/politics'),
-        ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
-        ('Commentary', 'http://feeds.wired.com/wired/commentary'),
+        ('Product Reviews',
+            'http://www.wired.com/reviews/feeds/latestProductsRss'),
+        ('Autopia', 'http://www.wired.com/autopia/feed/'),
+        ('Danger Room', 'http://www.wired.com/dangerroom/feed/'),
+        ('Epicenter', 'http://www.wired.com/epicenter/feed/'),
+        ('Gadget Lab', 'http://www.wired.com/gadgetlab/feed/'),
+        ('Geek Dad', 'http://www.wired.com/geekdad/feed/'),
+        ('Playbook', 'http://www.wired.com/playbook/feed/'),
+        ('Rawfile', 'http://www.wired.com/rawfile/feed/'),
+        ('This Day in Tech', 'http://www.wired.com/thisdayintech/feed/'),
+        ('Threat Level', 'http://www.wired.com/threatlevel/feed/'),
+        ('Underwire', 'http://www.wired.com/underwire/feed/'),
+        ('Web Monkey', 'http://www.webmonkey.com/feed/'),
+        ('Science', 'http://www.wired.com/wiredscience/feed/'),
         ]
 
+    def populate_article_metadata(self, article, soup, first):
+        if article.text_summary:
+            article.text_summary = xml_to_unicode(article.text_summary,
+                    resolve_entities=True)[0]
+
     def print_version(self, url):
-        return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
-
+        return url + '/all/1'
 

From 0785b9cf92ea6b5f2fb41b005c0dfeb8e2a9f6db Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 11:17:20 -0700
Subject: [PATCH 36/55] Fix #8218 (Bulk Edit window too tall for screen)

---
 src/calibre/gui2/dialogs/metadata_bulk.py |   10 +-
 src/calibre/gui2/dialogs/metadata_bulk.ui | 1445 +++++++++++----------
 2 files changed, 736 insertions(+), 719 deletions(-)

diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py
index e1ee4327f3..5ea8f00148 100644
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@@ -15,7 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_string
 from calibre.ebooks.metadata.book.base import composite_formatter
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.gui2.custom_column_widgets import populate_metadata_page
-from calibre.gui2 import error_dialog
+from calibre.gui2 import error_dialog, ResizableDialog
 from calibre.gui2.progress_indicator import ProgressIndicator
 from calibre.utils.config import dynamic
 from calibre.utils.titlecase import titlecase
@@ -49,7 +49,7 @@ def get_cover_data(path):
 
 
 
-class MyBlockingBusy(QDialog):
+class MyBlockingBusy(QDialog): # {{{
 
     do_one_signal = pyqtSignal()
 
@@ -241,8 +241,9 @@ class MyBlockingBusy(QDialog):
         self.current_index += 1
         self.do_one_signal.emit()
 
+    # }}}
 
-class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
+class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
 
     s_r_functions = {       ''              : lambda x: x,
                             _('Lower Case') : lambda x: icu_lower(x),
@@ -261,9 +262,8 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
                         ]
 
     def __init__(self, window, rows, model, tab):
-        QDialog.__init__(self, window)
+        ResizableDialog.__init__(self, window)
         Ui_MetadataBulkDialog.__init__(self)
-        self.setupUi(self)
         self.model = model
         self.db = model.db
         self.ids = [self.db.id(r) for r in rows]
diff --git a/src/calibre/gui2/dialogs/metadata_bulk.ui b/src/calibre/gui2/dialogs/metadata_bulk.ui
index 41858b099b..5c0f1ec78f 100644
--- a/src/calibre/gui2/dialogs/metadata_bulk.ui
+++ b/src/calibre/gui2/dialogs/metadata_bulk.ui
@@ -6,8 +6,8 @@
    <rect>
     <x>0</x>
     <y>0</y>
-    <width>752</width>
-    <height>633</height>
+    <width>819</width>
+    <height>650</height>
    </rect>
   </property>
   <property name="windowTitle">
@@ -17,8 +17,8 @@
    <iconset resource="../../../../resources/images.qrc">
     <normaloff>:/images/edit_input.png</normaloff>:/images/edit_input.png</iconset>
   </property>
-  <layout class="QVBoxLayout" name="verticalLayout">
-   <item>
+  <layout class="QGridLayout" name="gridLayout_2">
+   <item row="0" column="0">
     <widget class="QLabel" name="box_title">
      <property name="text">
       <string/>
@@ -28,818 +28,836 @@
      </property>
     </widget>
    </item>
-   <item>
-    <layout class="QVBoxLayout">
-     <property name="spacing">
-      <number>6</number>
+   <item row="1" column="0">
+    <widget class="QScrollArea" name="scrollArea">
+     <property name="frameShape">
+      <enum>QFrame::NoFrame</enum>
      </property>
-     <property name="margin">
+     <property name="lineWidth">
       <number>0</number>
      </property>
-     <item>
-      <widget class="QTabWidget" name="central_widget">
-       <property name="currentIndex">
+     <property name="widgetResizable">
+      <bool>true</bool>
+     </property>
+     <widget class="QWidget" name="scrollAreaWidgetContents">
+      <property name="geometry">
+       <rect>
+        <x>0</x>
+        <y>0</y>
+        <width>811</width>
+        <height>589</height>
+       </rect>
+      </property>
+      <layout class="QVBoxLayout" name="verticalLayout_2">
+       <property name="margin">
         <number>0</number>
        </property>
-       <widget class="QWidget" name="tabWidgetPage1">
-        <attribute name="title">
-         <string>&amp;Basic metadata</string>
-        </attribute>
-        <layout class="QGridLayout" name="gridLayout">
-         <item row="0" column="0">
-          <widget class="QLabel" name="label_2">
-           <property name="text">
-            <string>&amp;Author(s): </string>
-           </property>
-           <property name="alignment">
-            <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-           </property>
-           <property name="buddy">
-            <cstring>authors</cstring>
-           </property>
-          </widget>
-         </item>
-         <item row="1" column="1">
-          <widget class="QCheckBox" name="auto_author_sort">
-           <property name="text">
-            <string>A&amp;utomatically set author sort</string>
-           </property>
-          </widget>
-         </item>
-         <item row="2" column="0">
-          <widget class="QLabel" name="label_8">
-           <property name="text">
-            <string>Author s&amp;ort: </string>
-           </property>
-           <property name="alignment">
-            <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-           </property>
-           <property name="buddy">
-            <cstring>author_sort</cstring>
-           </property>
-          </widget>
-         </item>
-         <item row="2" column="1" colspan="2">
-          <widget class="EnLineEdit" name="author_sort">
-           <property name="toolTip">
-            <string>Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles.</string>
-           </property>
-          </widget>
-         </item>
-         <item row="3" column="0">
-          <widget class="QLabel" name="label_6">
-           <property name="text">
-            <string>&amp;Rating:</string>
-           </property>
-           <property name="alignment">
-            <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-           </property>
-           <property name="buddy">
-            <cstring>rating</cstring>
-           </property>
-          </widget>
-         </item>
-         <item row="3" column="1" colspan="2">
-          <widget class="QSpinBox" name="rating">
-           <property name="toolTip">
-            <string>Rating of this book. 0-5 stars</string>
-           </property>
-           <property name="whatsThis">
-            <string>Rating of this book. 0-5 stars</string>
-           </property>
-           <property name="buttonSymbols">
-            <enum>QAbstractSpinBox::PlusMinus</enum>
-           </property>
-           <property name="specialValueText">
-            <string>No change</string>
-           </property>
-           <property name="suffix">
-            <string> stars</string>
-           </property>
-           <property name="minimum">
-            <number>-1</number>
-           </property>
-           <property name="maximum">
-            <number>5</number>
-           </property>
-           <property name="value">
-            <number>-1</number>
-           </property>
-          </widget>
-         </item>
-         <item row="4" column="0">
-          <widget class="QLabel" name="label_3">
-           <property name="text">
-            <string>&amp;Publisher: </string>
-           </property>
-           <property name="alignment">
-            <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-           </property>
-           <property name="buddy">
-            <cstring>publisher</cstring>
-           </property>
-          </widget>
-         </item>
-         <item row="4" column="1" colspan="2">
-          <widget class="EnComboBox" name="publisher">
-           <property name="editable">
-            <bool>true</bool>
-           </property>
-          </widget>
-         </item>
-         <item row="5" column="0">
-          <widget class="QLabel" name="label_4">
-           <property name="text">
-            <string>Add ta&amp;gs: </string>
-           </property>
-           <property name="alignment">
-            <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-           </property>
-           <property name="buddy">
-            <cstring>tags</cstring>
-           </property>
-          </widget>
-         </item>
-         <item row="5" column="1">
-          <widget class="TagsLineEdit" name="tags">
-           <property name="toolTip">
-            <string>Tags categorize the book. This is particularly useful while searching. &lt;br&gt;&lt;br&gt;They can be any words or phrases, separated by commas.</string>
-           </property>
-          </widget>
-         </item>
-         <item row="5" column="2">
-          <widget class="QToolButton" name="tag_editor_button">
-           <property name="toolTip">
-            <string>Open Tag Editor</string>
-           </property>
-           <property name="text">
-            <string>Open Tag Editor</string>
-           </property>
-           <property name="icon">
-            <iconset resource="../../../../resources/images.qrc">
-             <normaloff>:/images/chapters.png</normaloff>:/images/chapters.png</iconset>
-           </property>
-          </widget>
-         </item>
-         <item row="6" column="0">
-          <widget class="QLabel" name="label">
-           <property name="text">
-            <string>&amp;Remove tags:</string>
-           </property>
-           <property name="buddy">
-            <cstring>remove_tags</cstring>
-           </property>
-          </widget>
-         </item>
-         <item row="6" column="1">
-          <widget class="TagsLineEdit" name="remove_tags">
-           <property name="toolTip">
-            <string>Comma separated list of tags to remove from the books. </string>
-           </property>
-          </widget>
-         </item>
-         <item row="6" column="2">
-          <widget class="QCheckBox" name="remove_all_tags">
-           <property name="toolTip">
-            <string>Check this box to remove all tags from the books.</string>
-           </property>
-           <property name="text">
-            <string>Remove all</string>
-           </property>
-          </widget>
-         </item>
-         <item row="7" column="0">
-          <widget class="QLabel" name="label_7">
-           <property name="text">
-            <string>&amp;Series:</string>
-           </property>
-           <property name="textFormat">
-            <enum>Qt::PlainText</enum>
-           </property>
-           <property name="alignment">
-            <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-           </property>
-           <property name="buddy">
-            <cstring>series</cstring>
-           </property>
-          </widget>
-         </item>
-         <item row="7" column="1">
-          <layout class="QHBoxLayout" name="HLayout_34">
-           <item>
-            <widget class="EnComboBox" name="series">
+       <item>
+        <widget class="QTabWidget" name="central_widget">
+         <property name="currentIndex">
+          <number>0</number>
+         </property>
+         <widget class="QWidget" name="tabWidgetPage1">
+          <attribute name="title">
+           <string>&amp;Basic metadata</string>
+          </attribute>
+          <layout class="QGridLayout" name="gridLayout">
+           <item row="0" column="0">
+            <widget class="QLabel" name="label_2">
+             <property name="text">
+              <string>&amp;Author(s): </string>
+             </property>
+             <property name="alignment">
+              <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+             </property>
+             <property name="buddy">
+              <cstring>authors</cstring>
+             </property>
+            </widget>
+           </item>
+           <item row="1" column="1">
+            <widget class="QCheckBox" name="auto_author_sort">
+             <property name="text">
+              <string>A&amp;utomatically set author sort</string>
+             </property>
+            </widget>
+           </item>
+           <item row="2" column="0">
+            <widget class="QLabel" name="label_8">
+             <property name="text">
+              <string>Author s&amp;ort: </string>
+             </property>
+             <property name="alignment">
+              <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+             </property>
+             <property name="buddy">
+              <cstring>author_sort</cstring>
+             </property>
+            </widget>
+           </item>
+           <item row="2" column="1" colspan="2">
+            <widget class="EnLineEdit" name="author_sort">
              <property name="toolTip">
-              <string>List of known series. You can add new series.</string>
+              <string>Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles.</string>
+             </property>
+            </widget>
+           </item>
+           <item row="3" column="0">
+            <widget class="QLabel" name="label_6">
+             <property name="text">
+              <string>&amp;Rating:</string>
+             </property>
+             <property name="alignment">
+              <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+             </property>
+             <property name="buddy">
+              <cstring>rating</cstring>
+             </property>
+            </widget>
+           </item>
+           <item row="3" column="1" colspan="2">
+            <widget class="QSpinBox" name="rating">
+             <property name="toolTip">
+              <string>Rating of this book. 0-5 stars</string>
              </property>
              <property name="whatsThis">
-              <string>List of known series. You can add new series.</string>
+              <string>Rating of this book. 0-5 stars</string>
              </property>
+             <property name="buttonSymbols">
+              <enum>QAbstractSpinBox::PlusMinus</enum>
+             </property>
+             <property name="specialValueText">
+              <string>No change</string>
+             </property>
+             <property name="suffix">
+              <string> stars</string>
+             </property>
+             <property name="minimum">
+              <number>-1</number>
+             </property>
+             <property name="maximum">
+              <number>5</number>
+             </property>
+             <property name="value">
+              <number>-1</number>
+             </property>
+            </widget>
+           </item>
+           <item row="4" column="0">
+            <widget class="QLabel" name="label_3">
+             <property name="text">
+              <string>&amp;Publisher: </string>
+             </property>
+             <property name="alignment">
+              <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+             </property>
+             <property name="buddy">
+              <cstring>publisher</cstring>
+             </property>
+            </widget>
+           </item>
+           <item row="4" column="1" colspan="2">
+            <widget class="EnComboBox" name="publisher">
              <property name="editable">
               <bool>true</bool>
              </property>
-             <property name="insertPolicy">
-              <enum>QComboBox::InsertAlphabetically</enum>
+            </widget>
+           </item>
+           <item row="5" column="0">
+            <widget class="QLabel" name="label_4">
+             <property name="text">
+              <string>Add ta&amp;gs: </string>
              </property>
-             <property name="sizeAdjustPolicy">
-              <enum>QComboBox::AdjustToContents</enum>
+             <property name="alignment">
+              <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+             </property>
+             <property name="buddy">
+              <cstring>tags</cstring>
              </property>
             </widget>
            </item>
-           <item>
-            <widget class="QCheckBox" name="clear_series">
+           <item row="5" column="1">
+            <widget class="TagsLineEdit" name="tags">
              <property name="toolTip">
-              <string>If checked, the series will be cleared</string>
+              <string>Tags categorize the book. This is particularly useful while searching. &lt;br&gt;&lt;br&gt;They can be any words or phrases, separated by commas.</string>
+             </property>
+            </widget>
+           </item>
+           <item row="5" column="2">
+            <widget class="QToolButton" name="tag_editor_button">
+             <property name="toolTip">
+              <string>Open Tag Editor</string>
              </property>
              <property name="text">
-              <string>Clear series</string>
+              <string>Open Tag Editor</string>
+             </property>
+             <property name="icon">
+              <iconset resource="../../../../resources/images.qrc">
+               <normaloff>:/images/chapters.png</normaloff>:/images/chapters.png</iconset>
              </property>
             </widget>
            </item>
-           <item>
-            <spacer name="HSpacer_344">
-             <property name="orientation">
-              <enum>Qt::Horizontal</enum>
+           <item row="6" column="0">
+            <widget class="QLabel" name="label">
+             <property name="text">
+              <string>&amp;Remove tags:</string>
              </property>
-             <property name="sizeHint" stdset="0">
-              <size>
-               <width>20</width>
-               <height>0</height>
-              </size>
+             <property name="buddy">
+              <cstring>remove_tags</cstring>
              </property>
-            </spacer>
+            </widget>
            </item>
-          </layout>
-         </item>
-         <item row="8" column="1" colspan="2">
-          <layout class="QHBoxLayout" name="HLayout_3">
-           <item>
-            <widget class="QCheckBox" name="autonumber_series">
+           <item row="6" column="1">
+            <widget class="TagsLineEdit" name="remove_tags">
              <property name="toolTip">
-              <string>If not checked, the series number for the books will be set to 1.
+              <string>Comma separated list of tags to remove from the books. </string>
+             </property>
+            </widget>
+           </item>
+           <item row="6" column="2">
+            <widget class="QCheckBox" name="remove_all_tags">
+             <property name="toolTip">
+              <string>Check this box to remove all tags from the books.</string>
+             </property>
+             <property name="text">
+              <string>Remove all</string>
+             </property>
+            </widget>
+           </item>
+           <item row="7" column="0">
+            <widget class="QLabel" name="label_7">
+             <property name="text">
+              <string>&amp;Series:</string>
+             </property>
+             <property name="textFormat">
+              <enum>Qt::PlainText</enum>
+             </property>
+             <property name="alignment">
+              <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+             </property>
+             <property name="buddy">
+              <cstring>series</cstring>
+             </property>
+            </widget>
+           </item>
+           <item row="7" column="1">
+            <layout class="QHBoxLayout" name="HLayout_34">
+             <item>
+              <widget class="EnComboBox" name="series">
+               <property name="toolTip">
+                <string>List of known series. You can add new series.</string>
+               </property>
+               <property name="whatsThis">
+                <string>List of known series. You can add new series.</string>
+               </property>
+               <property name="editable">
+                <bool>true</bool>
+               </property>
+               <property name="insertPolicy">
+                <enum>QComboBox::InsertAlphabetically</enum>
+               </property>
+               <property name="sizeAdjustPolicy">
+                <enum>QComboBox::AdjustToContents</enum>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QCheckBox" name="clear_series">
+               <property name="toolTip">
+                <string>If checked, the series will be cleared</string>
+               </property>
+               <property name="text">
+                <string>Clear series</string>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <spacer name="HSpacer_344">
+               <property name="orientation">
+                <enum>Qt::Horizontal</enum>
+               </property>
+               <property name="sizeHint" stdset="0">
+                <size>
+                 <width>20</width>
+                 <height>0</height>
+                </size>
+               </property>
+              </spacer>
+             </item>
+            </layout>
+           </item>
+           <item row="8" column="1" colspan="2">
+            <layout class="QHBoxLayout" name="HLayout_3">
+             <item>
+              <widget class="QCheckBox" name="autonumber_series">
+               <property name="toolTip">
+                <string>If not checked, the series number for the books will be set to 1.
 If checked, selected books will be automatically numbered, in the order
 you selected them. So if you selected Book A and then Book B,
 Book A will have series number 1 and Book B series number 2.</string>
-             </property>
-             <property name="text">
-              <string>Automatically number books in this series</string>
-             </property>
-            </widget>
-           </item>
-           <item>
-            <widget class="QCheckBox" name="series_numbering_restarts">
-             <property name="enabled">
-              <bool>false</bool>
-             </property>
-             <property name="toolTip">
-              <string>Series will normally be renumbered from the highest number in the database
+               </property>
+               <property name="text">
+                <string>Automatically number books in this series</string>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QCheckBox" name="series_numbering_restarts">
+               <property name="enabled">
+                <bool>false</bool>
+               </property>
+               <property name="toolTip">
+                <string>Series will normally be renumbered from the highest number in the database
 for that series. Checking this box will tell calibre to start numbering
 from the value in the box</string>
+               </property>
+               <property name="text">
+                <string>Force numbers to start with </string>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QSpinBox" name="series_start_number">
+               <property name="enabled">
+                <bool>false</bool>
+               </property>
+               <property name="minimum">
+                <number>1</number>
+               </property>
+               <property name="maximum">
+                <number>990000</number>
+               </property>
+               <property name="value">
+                <number>1</number>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <spacer name="HSpacer_34">
+               <property name="orientation">
+                <enum>Qt::Horizontal</enum>
+               </property>
+               <property name="sizeHint" stdset="0">
+                <size>
+                 <width>20</width>
+                 <height>10</height>
+                </size>
+               </property>
+              </spacer>
+             </item>
+            </layout>
+           </item>
+           <item row="9" column="0">
+            <widget class="QLabel" name="label_5">
+             <property name="text">
+              <string>Remove &amp;format:</string>
+             </property>
+             <property name="buddy">
+              <cstring>remove_format</cstring>
+             </property>
+            </widget>
+           </item>
+           <item row="9" column="1">
+            <widget class="QComboBox" name="remove_format"/>
+           </item>
+           <item row="0" column="1">
+            <widget class="EnComboBox" name="authors">
+             <property name="editable">
+              <bool>true</bool>
+             </property>
+            </widget>
+           </item>
+           <item row="11" column="0" colspan="2">
+            <widget class="QCheckBox" name="swap_title_and_author">
+             <property name="text">
+              <string>&amp;Swap title and author</string>
+             </property>
+            </widget>
+           </item>
+           <item row="12" column="0" colspan="2">
+            <widget class="QCheckBox" name="change_title_to_title_case">
+             <property name="toolTip">
+              <string>Force the title to be in title case. If both this and swap authors are checked,
+title and author are swapped before the title case is set</string>
              </property>
              <property name="text">
-              <string>Force numbers to start with </string>
+              <string>Change title to title case</string>
              </property>
             </widget>
            </item>
-           <item>
-            <widget class="QSpinBox" name="series_start_number">
-             <property name="enabled">
-              <bool>false</bool>
-             </property>
-             <property name="minimum">
-              <number>1</number>
-             </property>
-             <property name="maximum">
-              <number>990000</number>
-             </property>
-             <property name="value">
-              <number>1</number>
-             </property>
-            </widget>
-           </item>
-           <item>
-            <spacer name="HSpacer_34">
-             <property name="orientation">
-              <enum>Qt::Horizontal</enum>
-             </property>
-             <property name="sizeHint" stdset="0">
-              <size>
-               <width>20</width>
-               <height>10</height>
-              </size>
-             </property>
-            </spacer>
-           </item>
-          </layout>
-         </item>
-         <item row="9" column="0">
-          <widget class="QLabel" name="label_5">
-           <property name="text">
-            <string>Remove &amp;format:</string>
-           </property>
-           <property name="buddy">
-            <cstring>remove_format</cstring>
-           </property>
-          </widget>
-         </item>
-         <item row="9" column="1">
-          <widget class="QComboBox" name="remove_format"/>
-         </item>
-         <item row="0" column="1">
-          <widget class="EnComboBox" name="authors">
-           <property name="editable">
-            <bool>true</bool>
-           </property>
-          </widget>
-         </item>
-         <item row="11" column="0" colspan="2">
-          <widget class="QCheckBox" name="swap_title_and_author">
-           <property name="text">
-            <string>&amp;Swap title and author</string>
-           </property>
-          </widget>
-         </item>
-         <item row="12" column="0" colspan="2">
-          <widget class="QCheckBox" name="change_title_to_title_case">
-           <property name="toolTip">
-            <string>Force the title to be in title case. If both this and swap authors are checked,
-title and author are swapped before the title case is set</string>
-           </property>
-           <property name="text">
-            <string>Change title to title case</string>
-           </property>
-          </widget>
-         </item>
-         <item row="10" column="0" colspan="2">
-          <widget class="QCheckBox" name="remove_conversion_settings">
-           <property name="toolTip">
-            <string>Remove stored conversion settings for the selected books.
+           <item row="10" column="0" colspan="2">
+            <widget class="QCheckBox" name="remove_conversion_settings">
+             <property name="toolTip">
+              <string>Remove stored conversion settings for the selected books.
 
 Future conversion of these books will use the default settings.</string>
-           </property>
-           <property name="text">
-            <string>Remove &amp;stored conversion settings for the selected books</string>
-           </property>
-          </widget>
-         </item>
-         <item row="14" column="0" colspan="3">
-          <spacer name="verticalSpacer_2">
-           <property name="orientation">
-            <enum>Qt::Vertical</enum>
-           </property>
-           <property name="sizeHint" stdset="0">
-            <size>
-             <width>20</width>
-             <height>40</height>
-            </size>
-           </property>
-          </spacer>
-         </item>
-         <item row="13" column="0" colspan="3">
-          <widget class="QGroupBox" name="groupBox">
-           <property name="title">
-            <string>Change &amp;cover</string>
-           </property>
-           <layout class="QHBoxLayout" name="horizontalLayout">
-            <item>
-             <widget class="QRadioButton" name="cover_generate">
-              <property name="text">
-               <string>&amp;Generate default cover</string>
-              </property>
-             </widget>
-            </item>
-            <item>
-             <widget class="QRadioButton" name="cover_remove">
-              <property name="text">
-               <string>&amp;Remove cover</string>
-              </property>
-             </widget>
-            </item>
-            <item>
-             <widget class="QRadioButton" name="cover_from_fmt">
-              <property name="text">
-               <string>Set from &amp;ebook file(s)</string>
-              </property>
-             </widget>
-            </item>
-           </layout>
-          </widget>
-         </item>
-        </layout>
-       </widget>
-       <widget class="QWidget" name="tab">
-        <attribute name="title">
-         <string>&amp;Custom metadata</string>
-        </attribute>
-       </widget>
-       <widget class="QWidget" name="tabWidgetPage3">
-        <attribute name="title">
-         <string>&amp;Search and replace</string>
-        </attribute>
-        <layout class="QGridLayout" name="vargrid">
-         <property name="sizeConstraint">
-          <enum>QLayout::SetMinimumSize</enum>
-         </property>
-         <item row="1" column="0" colspan="3">
-          <widget class="QLabel" name="s_r_heading">
-           <property name="wordWrap">
-            <bool>true</bool>
-           </property>
-           <property name="openExternalLinks">
-            <bool>true</bool>
-           </property>
-          </widget>
-         </item>
-         <item row="2" column="0">
-          <widget class="QLabel" name="filler">
-           <property name="text">
-            <string/>
-           </property>
-          </widget>
-         </item>
-         <item row="3" column="0">
-          <widget class="QLabel" name="xlabel_21">
-           <property name="text">
-            <string>Search &amp;field:</string>
-           </property>
-           <property name="buddy">
-            <cstring>search_field</cstring>
-           </property>
-          </widget>
-         </item>
-         <item row="3" column="1">
-          <widget class="QComboBox" name="search_field">
-           <property name="toolTip">
-            <string>The name of the field that you want to search</string>
-           </property>
-          </widget>
-         </item>
-         <item row="3" column="2">
-          <layout class="QHBoxLayout" name="HLayout_3">
-           <item>
-            <widget class="QLabel" name="xlabel_24">
+             </property>
              <property name="text">
-              <string>Search &amp;mode:</string>
-             </property>
-             <property name="buddy">
-              <cstring>search_mode</cstring>
+              <string>Remove &amp;stored conversion settings for the selected books</string>
              </property>
             </widget>
            </item>
-           <item>
-            <widget class="QComboBox" name="search_mode">
-             <property name="toolTip">
-              <string>Choose whether to use basic text matching or advanced regular expression matching</string>
-             </property>
-            </widget>
-           </item>
-           <item>
-            <spacer name="HSpacer_2">
+           <item row="14" column="0" colspan="3">
+            <spacer name="verticalSpacer_2">
              <property name="orientation">
-              <enum>Qt::Horizontal</enum>
+              <enum>Qt::Vertical</enum>
              </property>
              <property name="sizeHint" stdset="0">
               <size>
                <width>20</width>
-               <height>10</height>
+               <height>40</height>
               </size>
              </property>
             </spacer>
            </item>
-          </layout>
-         </item>
-         <item row="4" column="0">
-          <widget class="QLabel" name="template_label">
-           <property name="text">
-            <string>Te&amp;mplate:</string>
-           </property>
-           <property name="buddy">
-            <cstring>s_r_template</cstring>
-           </property>
-          </widget>
-         </item>
-         <item row="4" column="1">
-          <widget class="HistoryLineEdit" name="s_r_template">
-           <property name="sizePolicy">
-            <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
-             <horstretch>100</horstretch>
-             <verstretch>0</verstretch>
-            </sizepolicy>
-           </property>
-           <property name="toolTip">
-            <string>Enter a template to be used as the source for the search/replace</string>
-           </property>
-          </widget>
-         </item>
-         <item row="5" column="0">
-          <widget class="QLabel" name="xlabel_2">
-           <property name="text">
-            <string>&amp;Search for:</string>
-           </property>
-           <property name="buddy">
-            <cstring>search_for</cstring>
-           </property>
-          </widget>
-         </item>
-         <item row="5" column="1">
-          <widget class="HistoryLineEdit" name="search_for">
-           <property name="sizePolicy">
-            <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
-             <horstretch>100</horstretch>
-             <verstretch>0</verstretch>
-            </sizepolicy>
-           </property>
-           <property name="toolTip">
-            <string>Enter the what you are looking for, either plain text or a regular expression, depending on the mode</string>
-           </property>
-          </widget>
-         </item>
-         <item row="5" column="2">
-          <widget class="QCheckBox" name="case_sensitive">
-           <property name="toolTip">
-            <string>Check this box if the search string must match exactly upper and lower case. Uncheck it if case is to be ignored</string>
-           </property>
-           <property name="text">
-            <string>Cas&amp;e sensitive</string>
-           </property>
-           <property name="checked">
-            <bool>true</bool>
-           </property>
-          </widget>
-         </item>
-         <item row="6" column="0">
-          <widget class="QLabel" name="xlabel_4">
-           <property name="text">
-            <string>&amp;Replace with:</string>
-           </property>
-           <property name="buddy">
-            <cstring>replace_with</cstring>
-           </property>
-          </widget>
-         </item>
-         <item row="6" column="1">
-          <widget class="HistoryLineEdit" name="replace_with">
-           <property name="toolTip">
-            <string>The replacement text. The matched search text will be replaced with this string</string>
-           </property>
-          </widget>
-         </item>
-         <item row="6" column="2">
-          <layout class="QHBoxLayout" name="verticalLayout">
-           <item>
-            <widget class="QLabel" name="label_41">
-             <property name="text">
-              <string>&amp;Apply function after replace:</string>
-             </property>
-             <property name="buddy">
-              <cstring>replace_func</cstring>
+           <item row="13" column="0" colspan="3">
+            <widget class="QGroupBox" name="groupBox">
+             <property name="title">
+              <string>Change &amp;cover</string>
              </property>
+             <layout class="QHBoxLayout" name="horizontalLayout">
+              <item>
+               <widget class="QRadioButton" name="cover_generate">
+                <property name="text">
+                 <string>&amp;Generate default cover</string>
+                </property>
+               </widget>
+              </item>
+              <item>
+               <widget class="QRadioButton" name="cover_remove">
+                <property name="text">
+                 <string>&amp;Remove cover</string>
+                </property>
+               </widget>
+              </item>
+              <item>
+               <widget class="QRadioButton" name="cover_from_fmt">
+                <property name="text">
+                 <string>Set from &amp;ebook file(s)</string>
+                </property>
+               </widget>
+              </item>
+             </layout>
             </widget>
            </item>
-           <item>
-            <widget class="QComboBox" name="replace_func">
-             <property name="toolTip">
-              <string>Specify how the text is to be processed after matching and replacement. In character mode, the entire
-field is processed. In regular expression mode, only the matched text is processed</string>
-             </property>
-            </widget>
-           </item>
-           <item>
-            <spacer name="HSpacer_1">
-             <property name="orientation">
-              <enum>Qt::Horizontal</enum>
-             </property>
-             <property name="sizeHint" stdset="0">
-              <size>
-               <width>20</width>
-               <height>10</height>
-              </size>
-             </property>
-            </spacer>
-           </item>
           </layout>
-         </item>
-         <item row="7" column="0">
-          <widget class="QLabel" name="destination_field_label">
-           <property name="text">
-            <string>&amp;Destination field:</string>
+         </widget>
+         <widget class="QWidget" name="tab">
+          <attribute name="title">
+           <string>&amp;Custom metadata</string>
+          </attribute>
+         </widget>
+         <widget class="QWidget" name="tabWidgetPage3">
+          <attribute name="title">
+           <string>&amp;Search and replace</string>
+          </attribute>
+          <layout class="QGridLayout" name="vargrid">
+           <property name="sizeConstraint">
+            <enum>QLayout::SetMinimumSize</enum>
            </property>
-           <property name="buddy">
-            <cstring>destination_field</cstring>
-           </property>
-          </widget>
-         </item>
-         <item row="7" column="1">
-          <widget class="QComboBox" name="destination_field">
-           <property name="toolTip">
-            <string>The field that the text will be put into after all replacements.
-If blank, the source field is used if the field is modifiable</string>
-           </property>
-          </widget>
-         </item>
-         <item row="7" column="2">
-          <layout class="QHBoxLayout" name="verticalLayout">
-           <item>
-            <widget class="QLabel" name="replace_mode_label">
+           <item row="1" column="0" colspan="3">
+            <widget class="QLabel" name="s_r_heading">
+             <property name="wordWrap">
+              <bool>true</bool>
+             </property>
+             <property name="openExternalLinks">
+              <bool>true</bool>
+             </property>
+            </widget>
+           </item>
+           <item row="2" column="0">
+            <widget class="QLabel" name="filler">
              <property name="text">
-              <string>M&amp;ode:</string>
+              <string/>
+             </property>
+            </widget>
+           </item>
+           <item row="3" column="0">
+            <widget class="QLabel" name="xlabel_21">
+             <property name="text">
+              <string>Search &amp;field:</string>
              </property>
              <property name="buddy">
-              <cstring>replace_mode</cstring>
+              <cstring>search_field</cstring>
              </property>
             </widget>
            </item>
-           <item>
-            <widget class="QComboBox" name="replace_mode">
+           <item row="3" column="1">
+            <widget class="QComboBox" name="search_field">
              <property name="toolTip">
-              <string>Specify how the text should be copied into the destination.</string>
+              <string>The name of the field that you want to search</string>
              </property>
             </widget>
            </item>
-           <item>
-            <widget class="QCheckBox" name="comma_separated">
+           <item row="3" column="2">
+            <layout class="QHBoxLayout" name="HLayout_3">
+             <item>
+              <widget class="QLabel" name="xlabel_24">
+               <property name="text">
+                <string>Search &amp;mode:</string>
+               </property>
+               <property name="buddy">
+                <cstring>search_mode</cstring>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QComboBox" name="search_mode">
+               <property name="toolTip">
+                <string>Choose whether to use basic text matching or advanced regular expression matching</string>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <spacer name="HSpacer_2">
+               <property name="orientation">
+                <enum>Qt::Horizontal</enum>
+               </property>
+               <property name="sizeHint" stdset="0">
+                <size>
+                 <width>20</width>
+                 <height>10</height>
+                </size>
+               </property>
+              </spacer>
+             </item>
+            </layout>
+           </item>
+           <item row="4" column="0">
+            <widget class="QLabel" name="template_label">
+             <property name="text">
+              <string>Te&amp;mplate:</string>
+             </property>
+             <property name="buddy">
+              <cstring>s_r_template</cstring>
+             </property>
+            </widget>
+           </item>
+           <item row="4" column="1">
+            <widget class="HistoryLineEdit" name="s_r_template">
+             <property name="sizePolicy">
+              <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
+               <horstretch>100</horstretch>
+               <verstretch>0</verstretch>
+              </sizepolicy>
+             </property>
              <property name="toolTip">
-              <string>Specifies whether result items should be split into multiple values or
-left as single values. This option has the most effect when the source field is
-not multiple and the destination field is multiple</string>
+              <string>Enter a template to be used as the source for the search/replace</string>
+             </property>
+            </widget>
+           </item>
+           <item row="5" column="0">
+            <widget class="QLabel" name="xlabel_2">
+             <property name="text">
+              <string>&amp;Search for:</string>
+             </property>
+             <property name="buddy">
+              <cstring>search_for</cstring>
+             </property>
+            </widget>
+           </item>
+           <item row="5" column="1">
+            <widget class="HistoryLineEdit" name="search_for">
+             <property name="sizePolicy">
+              <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
+               <horstretch>100</horstretch>
+               <verstretch>0</verstretch>
+              </sizepolicy>
+             </property>
+             <property name="toolTip">
+              <string>Enter the what you are looking for, either plain text or a regular expression, depending on the mode</string>
+             </property>
+            </widget>
+           </item>
+           <item row="5" column="2">
+            <widget class="QCheckBox" name="case_sensitive">
+             <property name="toolTip">
+              <string>Check this box if the search string must match exactly upper and lower case. Uncheck it if case is to be ignored</string>
              </property>
              <property name="text">
-              <string>Split &amp;result</string>
+              <string>Cas&amp;e sensitive</string>
              </property>
              <property name="checked">
               <bool>true</bool>
              </property>
             </widget>
            </item>
-           <item>
-            <spacer name="zHSpacer_1">
-             <property name="orientation">
-              <enum>Qt::Horizontal</enum>
-             </property>
-             <property name="sizeHint" stdset="0">
-              <size>
-               <width>20</width>
-               <height>10</height>
-              </size>
-             </property>
-            </spacer>
-           </item>
-          </layout>
-         </item>
-         <item row="8" column="1" colspan="2">
-          <layout class="QHBoxLayout" name="horizontalLayout_21">
-           <item>
-            <spacer name="HSpacer_347">
-             <property name="orientation">
-              <enum>Qt::Horizontal</enum>
-             </property>
-             <property name="sizeHint" stdset="0">
-              <size>
-               <width>20</width>
-               <height>0</height>
-              </size>
-             </property>
-            </spacer>
-           </item>
-           <item>
-            <widget class="QLabel" name="xlabel_412">
+           <item row="6" column="0">
+            <widget class="QLabel" name="xlabel_4">
              <property name="text">
-              <string>For multiple-valued fields, sho&amp;w</string>
+              <string>&amp;Replace with:</string>
              </property>
              <property name="buddy">
-              <cstring>results_count</cstring>
+              <cstring>replace_with</cstring>
              </property>
             </widget>
            </item>
-           <item>
-            <widget class="QSpinBox" name="results_count">
-             <property name="enabled">
-              <bool>true</bool>
-             </property>
-             <property name="minimum">
-              <number>1</number>
-             </property>
-             <property name="maximum">
-              <number>999</number>
-             </property>
-             <property name="value">
-              <number>999</number>
-             </property>
-            </widget>
-           </item>
-           <item>
-            <widget class="QLabel" name="xlabel_412">
-             <property name="text">
-              <string>values starting a&amp;t</string>
-             </property>
-             <property name="buddy">
-              <cstring>starting_from</cstring>
-             </property>
-            </widget>
-           </item>
-           <item>
-            <widget class="QSpinBox" name="starting_from">
-             <property name="enabled">
-              <bool>true</bool>
-             </property>
-             <property name="minimum">
-              <number>1</number>
-             </property>
-             <property name="maximum">
-              <number>999</number>
-             </property>
-             <property name="value">
-              <number>1</number>
-             </property>
-            </widget>
-           </item>
-           <item>
-            <widget class="QLabel" name="xlabel_41">
-             <property name="text">
-              <string>with values separated b&amp;y</string>
-             </property>
-             <property name="buddy">
-              <cstring>multiple_separator</cstring>
-             </property>
-            </widget>
-           </item>
-           <item>
-            <widget class="QLineEdit" name="multiple_separator">
+           <item row="6" column="1">
+            <widget class="HistoryLineEdit" name="replace_with">
              <property name="toolTip">
-              <string>Used when displaying test results to separate values in multiple-valued fields</string>
+              <string>The replacement text. The matched search text will be replaced with this string</string>
              </property>
             </widget>
            </item>
-          </layout>
-         </item>
-         <item row="9" column="0" colspan="4">
-          <widget class="QScrollArea" name="scrollArea11">
-           <property name="frameShape">
-            <enum>QFrame::NoFrame</enum>
-           </property>
-           <property name="widgetResizable">
-            <bool>true</bool>
-           </property>
-           <widget class="QWidget" name="gridLayoutWidget_2">
-            <property name="geometry">
-             <rect>
-              <x>0</x>
-              <y>0</y>
-              <width>726</width>
-              <height>334</height>
-             </rect>
-            </property>
-            <layout class="QGridLayout" name="testgrid">
-             <item row="7" column="1">
-              <widget class="QLabel" name="xlabel_3">
+           <item row="6" column="2">
+            <layout class="QHBoxLayout" name="verticalLayout">
+             <item>
+              <widget class="QLabel" name="label_41">
                <property name="text">
-                <string>Test text</string>
+                <string>&amp;Apply function after replace:</string>
+               </property>
+               <property name="buddy">
+                <cstring>replace_func</cstring>
                </property>
               </widget>
              </item>
-             <item row="7" column="2">
-              <widget class="QLabel" name="xlabel_3">
-               <property name="text">
-                <string>Test result</string>
+             <item>
+              <widget class="QComboBox" name="replace_func">
+               <property name="toolTip">
+                <string>Specify how the text is to be processed after matching and replacement. In character mode, the entire
+field is processed. In regular expression mode, only the matched text is processed</string>
                </property>
               </widget>
              </item>
-             <item row="8" column="0">
-              <widget class="QLabel" name="label_31">
-               <property name="text">
-                <string>Your test:</string>
-               </property>
-              </widget>
-             </item>
-             <item row="8" column="1">
-              <widget class="HistoryLineEdit" name="test_text"/>
-             </item>
-             <item row="8" column="2">
-              <widget class="QLineEdit" name="test_result"/>
-             </item>
-             <item row="25" column="0" colspan="2">
-              <spacer name="verticalSpacer_2">
+             <item>
+              <spacer name="HSpacer_1">
                <property name="orientation">
-                <enum>Qt::Vertical</enum>
+                <enum>Qt::Horizontal</enum>
                </property>
                <property name="sizeHint" stdset="0">
                 <size>
                  <width>20</width>
-                 <height>5</height>
+                 <height>10</height>
                 </size>
                </property>
               </spacer>
              </item>
             </layout>
-           </widget>
-          </widget>
-         </item>
-        </layout>
-       </widget>
-      </widget>
-     </item>
-    </layout>
+           </item>
+           <item row="7" column="0">
+            <widget class="QLabel" name="destination_field_label">
+             <property name="text">
+              <string>&amp;Destination field:</string>
+             </property>
+             <property name="buddy">
+              <cstring>destination_field</cstring>
+             </property>
+            </widget>
+           </item>
+           <item row="7" column="1">
+            <widget class="QComboBox" name="destination_field">
+             <property name="toolTip">
+              <string>The field that the text will be put into after all replacements.
+If blank, the source field is used if the field is modifiable</string>
+             </property>
+            </widget>
+           </item>
+           <item row="7" column="2">
+            <layout class="QHBoxLayout" name="verticalLayout">
+             <item>
+              <widget class="QLabel" name="replace_mode_label">
+               <property name="text">
+                <string>M&amp;ode:</string>
+               </property>
+               <property name="buddy">
+                <cstring>replace_mode</cstring>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QComboBox" name="replace_mode">
+               <property name="toolTip">
+                <string>Specify how the text should be copied into the destination.</string>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QCheckBox" name="comma_separated">
+               <property name="toolTip">
+                <string>Specifies whether result items should be split into multiple values or
+left as single values. This option has the most effect when the source field is
+not multiple and the destination field is multiple</string>
+               </property>
+               <property name="text">
+                <string>Split &amp;result</string>
+               </property>
+               <property name="checked">
+                <bool>true</bool>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <spacer name="zHSpacer_1">
+               <property name="orientation">
+                <enum>Qt::Horizontal</enum>
+               </property>
+               <property name="sizeHint" stdset="0">
+                <size>
+                 <width>20</width>
+                 <height>10</height>
+                </size>
+               </property>
+              </spacer>
+             </item>
+            </layout>
+           </item>
+           <item row="8" column="1" colspan="2">
+            <layout class="QHBoxLayout" name="horizontalLayout_21">
+             <item>
+              <spacer name="HSpacer_347">
+               <property name="orientation">
+                <enum>Qt::Horizontal</enum>
+               </property>
+               <property name="sizeHint" stdset="0">
+                <size>
+                 <width>20</width>
+                 <height>0</height>
+                </size>
+               </property>
+              </spacer>
+             </item>
+             <item>
+              <widget class="QLabel" name="xlabel_412">
+               <property name="text">
+                <string>For multiple-valued fields, sho&amp;w</string>
+               </property>
+               <property name="buddy">
+                <cstring>results_count</cstring>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QSpinBox" name="results_count">
+               <property name="enabled">
+                <bool>true</bool>
+               </property>
+               <property name="minimum">
+                <number>1</number>
+               </property>
+               <property name="maximum">
+                <number>999</number>
+               </property>
+               <property name="value">
+                <number>999</number>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QLabel" name="xlabel_412">
+               <property name="text">
+                <string>values starting a&amp;t</string>
+               </property>
+               <property name="buddy">
+                <cstring>starting_from</cstring>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QSpinBox" name="starting_from">
+               <property name="enabled">
+                <bool>true</bool>
+               </property>
+               <property name="minimum">
+                <number>1</number>
+               </property>
+               <property name="maximum">
+                <number>999</number>
+               </property>
+               <property name="value">
+                <number>1</number>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QLabel" name="xlabel_41">
+               <property name="text">
+                <string>with values separated b&amp;y</string>
+               </property>
+               <property name="buddy">
+                <cstring>multiple_separator</cstring>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QLineEdit" name="multiple_separator">
+               <property name="toolTip">
+                <string>Used when displaying test results to separate values in multiple-valued fields</string>
+               </property>
+              </widget>
+             </item>
+            </layout>
+           </item>
+           <item row="9" column="0" colspan="4">
+            <widget class="QScrollArea" name="scrollArea11">
+             <property name="frameShape">
+              <enum>QFrame::NoFrame</enum>
+             </property>
+             <property name="widgetResizable">
+              <bool>true</bool>
+             </property>
+             <widget class="QWidget" name="gridLayoutWidget_2">
+              <property name="geometry">
+               <rect>
+                <x>0</x>
+                <y>0</y>
+                <width>197</width>
+                <height>60</height>
+               </rect>
+              </property>
+              <layout class="QGridLayout" name="testgrid">
+               <item row="7" column="1">
+                <widget class="QLabel" name="xlabel_3">
+                 <property name="text">
+                  <string>Test text</string>
+                 </property>
+                </widget>
+               </item>
+               <item row="7" column="2">
+                <widget class="QLabel" name="xlabel_3">
+                 <property name="text">
+                  <string>Test result</string>
+                 </property>
+                </widget>
+               </item>
+               <item row="8" column="0">
+                <widget class="QLabel" name="label_31">
+                 <property name="text">
+                  <string>Your test:</string>
+                 </property>
+                </widget>
+               </item>
+               <item row="8" column="1">
+                <widget class="HistoryLineEdit" name="test_text"/>
+               </item>
+               <item row="8" column="2">
+                <widget class="QLineEdit" name="test_result"/>
+               </item>
+               <item row="25" column="0" colspan="2">
+                <spacer name="verticalSpacer_2">
+                 <property name="orientation">
+                  <enum>Qt::Vertical</enum>
+                 </property>
+                 <property name="sizeHint" stdset="0">
+                  <size>
+                   <width>20</width>
+                   <height>5</height>
+                  </size>
+                 </property>
+                </spacer>
+               </item>
+              </layout>
+             </widget>
+            </widget>
+           </item>
+          </layout>
+         </widget>
+        </widget>
+       </item>
+      </layout>
+     </widget>
+    </widget>
    </item>
-   <item>
+   <item row="2" column="0">
     <widget class="QDialogButtonBox" name="button_box">
      <property name="orientation">
       <enum>Qt::Horizontal</enum>
@@ -893,7 +911,6 @@ not multiple and the destination field is multiple</string>
   <tabstop>swap_title_and_author</tabstop>
   <tabstop>change_title_to_title_case</tabstop>
   <tabstop>button_box</tabstop>
-  <tabstop>central_widget</tabstop>
   <tabstop>search_field</tabstop>
   <tabstop>search_mode</tabstop>
   <tabstop>s_r_template</tabstop>

From 173e7eb9720c88fec4947c170ddd24d9d795d9b6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 11:19:02 -0700
Subject: [PATCH 37/55] ...

---
 src/calibre/gui2/dialogs/metadata_bulk.ui | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/dialogs/metadata_bulk.ui b/src/calibre/gui2/dialogs/metadata_bulk.ui
index 5c0f1ec78f..9240cd1af8 100644
--- a/src/calibre/gui2/dialogs/metadata_bulk.ui
+++ b/src/calibre/gui2/dialogs/metadata_bulk.ui
@@ -6,7 +6,7 @@
    <rect>
     <x>0</x>
     <y>0</y>
-    <width>819</width>
+    <width>850</width>
     <height>650</height>
    </rect>
   </property>
@@ -44,7 +44,7 @@
        <rect>
         <x>0</x>
         <y>0</y>
-        <width>811</width>
+        <width>842</width>
         <height>589</height>
        </rect>
       </property>

From 08d7a000c95eb91e9d31eb6af27a74868f6851b1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 11:40:46 -0700
Subject: [PATCH 38/55] Prevent custom columns from overriding the information
 shown in the book details panel

---
 src/calibre/gui2/library/models.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py
index 38a4b28744..eea452c238 100644
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@@ -358,8 +358,9 @@ class BooksModel(QAbstractTableModel): # {{{
             name, val = mi.format_field(key)
             if mi.metadata_for_field(key)['datatype'] == 'comments':
                 name += ':html'
-            if val:
+            if val and name not in data:
                 data[name] = val
+
         return data
 
 

From 1381fe91605155fe3e8e8b418992087b71cbf756 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 12:45:09 -0700
Subject: [PATCH 39/55] ...

---
 src/calibre/__init__.py         | 12 ++++++++++++
 src/calibre/gui2/viewer/main.py |  3 ++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index 2585b5d081..a4f7439405 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -459,6 +459,18 @@ def force_unicode(obj, enc=preferred_encoding):
                         obj = obj.decode('utf-8')
     return obj
 
+def as_unicode(obj, enc=preferred_encoding):
+    if not isbytestring(obj):
+        try:
+            obj = unicode(obj)
+        except:
+            try:
+                obj = str(obj)
+            except:
+                obj = repr(obj)
+    return force_unicode(obj, enc=enc)
+
+
 
 def human_readable(size):
     """ Convert a size in bytes into a human readable form """
diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py
index 25f69b1558..6468cd88c6 100644
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@@ -26,6 +26,7 @@ from calibre.gui2.search_box import SearchBox2
 from calibre.ebooks.metadata import MetaInformation
 from calibre.customize.ui import available_input_formats
 from calibre.gui2.viewer.dictionary import Lookup
+from calibre import as_unicode
 
 class TOCItem(QStandardItem):
 
@@ -632,7 +633,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
             else:
                 r = getattr(worker.exception, 'reason', worker.exception)
                 error_dialog(self, _('Could not open ebook'),
-                        unicode(r), det_msg=worker.traceback, show=True)
+                        as_unicode(r), det_msg=worker.traceback, show=True)
             self.close_progress_indicator()
         else:
             self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:])

From f0447ea1cd29ff4405309251f3437949999970a0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 12:45:41 -0700
Subject: [PATCH 40/55] ...

---
 src/calibre/gui2/dialogs/metadata_single.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index c2588f57a8..a4e8bb6972 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -823,7 +823,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                                 if book.series_index is not None:
                                     self.series_index.setValue(book.series_index)
                         if book.has_cover:
-                            if d.opt_auto_download_cover.isChecked() and book.has_cover:
+                            if d.opt_auto_download_cover.isChecked():
                                 self.fetch_cover()
                             else:
                                 self.fetch_cover_button.setFocus(Qt.OtherFocusReason)

From a9c671efaf42cd533bd485e5243c12114f01a8f8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 12:50:02 -0700
Subject: [PATCH 41/55] ...

---
 resources/recipes/msnbc.recipe | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/resources/recipes/msnbc.recipe b/resources/recipes/msnbc.recipe
index f093479e2f..6e58585341 100644
--- a/resources/recipes/msnbc.recipe
+++ b/resources/recipes/msnbc.recipe
@@ -4,7 +4,6 @@ __copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 msnbc.msn.com
 '''
 
-import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 
 class MsNBC(BasicNewsRecipe):
@@ -19,7 +18,7 @@ class MsNBC(BasicNewsRecipe):
     publisher              = 'msnbc.com'
     category               = 'news, USA, world'
     language               = 'en'
-    extra_css              = """ 
+    extra_css              = """
                                 body{ font-family: Georgia,Times,serif }
                                 .hide{display: none}
                                 .caption{font-family: Arial,sans-serif; font-size: x-small}
@@ -44,7 +43,7 @@ class MsNBC(BasicNewsRecipe):
                      ,dict(attrs={'class':['gl_headline','articleText','drawer-content Linear','v-center3','byline','textBodyBlack']})
                    ]
     remove_attributes=['property','lang','rel','xmlns:fb','xmlns:v','xmlns:dc','xmlns:dcmitype','xmlns:og','xmlns:media','xmlns:vcard','typeof','itemscope','itemtype','itemprop','about','type','size','width','height','onreadystatechange','data','border','hspace','vspace']
-    
+
     remove_tags      = [
                           dict(name=['iframe','object','link','embed','meta','table'])
                          ,dict(name='span', attrs={'class':['copyright','Linear copyright']})
@@ -70,7 +69,7 @@ class MsNBC(BasicNewsRecipe):
             if item.has_key('id') and item['id'].startswith('vine-'):
                item.extract()
             if item.has_key('class') and ( item['class'].startswith('ad') or item['class'].startswith('vine')):
-               item.extract()            
+               item.extract()
         for item in soup.body.findAll('img'):
             if not item.has_key('alt'):
                item['alt'] = 'image'
@@ -83,6 +82,6 @@ class MsNBC(BasicNewsRecipe):
         for alink in soup.findAll('a'):
             if alink.string is not None:
                tstr = alink.string
-               alink.replaceWith(tstr)    
+               alink.replaceWith(tstr)
         return soup
 

From 24e60cc35778d6699e8f90d5a1308e93d75c0a86 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 12:55:15 -0700
Subject: [PATCH 42/55] Remove code duplication

---
 src/calibre/ebooks/txt/processor.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index 3702bbfabe..e1979063c0 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -1,4 +1,8 @@
 # -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
 
 '''
 Read content from txt file.
@@ -10,10 +14,7 @@ from calibre import prepare_string_for_xml, isbytestring
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor
 from calibre.ebooks.conversion.preprocess import DocAnalysis
-
-__license__   = 'GPL v3'
-__copyright__ = '2009, John Schember <john@nachtimwald.com>'
-__docformat__ = 'restructuredtext en'
+from calibre.utils.cleantext import clean_ascii_chars
 
 HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
 
@@ -33,9 +34,7 @@ def clean_txt(txt):
     # Remove excessive line breaks.
     txt = re.sub('\n{3,}', '\n\n', txt)
     #remove ASCII invalid chars : 0 to 8 and 11-14 to 24
-    chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19))
-    illegal_chars = re.compile(u'|'.join(map(unichr, chars)))
-    txt = illegal_chars.sub('', txt)
+    txt = clean_ascii_chars(txt)
 
     return txt
 

From 0c685dcfe0d3407b877f3df63589a21a07ee4285 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 13:03:08 -0700
Subject: [PATCH 43/55] BibTeX catalog: Add support for custom columns

---
 src/calibre/gui2/catalog/catalog_bibtex.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/calibre/gui2/catalog/catalog_bibtex.py b/src/calibre/gui2/catalog/catalog_bibtex.py
index 5030cf6ec8..7b7739bb46 100644
--- a/src/calibre/gui2/catalog/catalog_bibtex.py
+++ b/src/calibre/gui2/catalog/catalog_bibtex.py
@@ -27,14 +27,17 @@ class PluginWidget(QWidget, Ui_Form):
     def __init__(self, parent=None):
         QWidget.__init__(self, parent)
         self.setupUi(self)
-        from calibre.library.catalog import FIELDS
-        self.all_fields = []
-        for x in FIELDS :
-            if x != 'all':
-                self.all_fields.append(x)
-                QListWidgetItem(x, self.db_fields)
 
     def initialize(self, name, db): #not working properly to update
+        from calibre.library.catalog import FIELDS
+
+        self.all_fields = [x for x in FIELDS if x != 'all']
+        #add custom columns
+        self.all_fields.extend([x for x in sorted(db.custom_field_keys())])
+        #populate
+        for x in self.all_fields:
+            QListWidgetItem(x, self.db_fields)
+
         self.name = name
         fields = gprefs.get(name+'_db_fields', self.all_fields)
         # Restore the activated db_fields from last use

From be5519221ef0d99aa5aa4731a854d6075a131094 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 13:04:26 -0700
Subject: [PATCH 44/55] ...

---
 src/calibre/utils/cleantext.py | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/calibre/utils/cleantext.py b/src/calibre/utils/cleantext.py
index b4afe7576d..938960df93 100644
--- a/src/calibre/utils/cleantext.py
+++ b/src/calibre/utils/cleantext.py
@@ -3,7 +3,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2010, sengian <sengian1@gmail.com>'
 __docformat__ = 'restructuredtext en'
 
-import re
+import re, htmlentitydefs
 
 _ascii_pat = None
 
@@ -21,3 +21,32 @@ def clean_ascii_chars(txt, charlist=None):
         pat = re.compile(u'|'.join(map(unichr, charlist)))
     return pat.sub('', txt)
 
+##
+# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
+# Removes HTML or XML character references and entities from a text string.
+#
+# @param text The HTML (or XML) source text.
+# @return The plain text, as a Unicode string, if necessary.
+
+def unescape(text, rm=False, rchar=u''):
+    def fixup(m, rm=rm, rchar=rchar):
+        text = m.group(0)
+        if text[:2] == "&#":
+            # character reference
+            try:
+                if text[:3] == "&#x":
+                    return unichr(int(text[3:-1], 16))
+                else:
+                    return unichr(int(text[2:-1]))
+            except ValueError:
+                pass
+        else:
+            # named entity
+            try:
+                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+            except KeyError:
+                pass
+        if rm:
+            return rchar #replace by char
+        return text # leave as is
+    return re.sub("&#?\w+;", fixup, text)

From ee98918a0c1d4464f2c79c492aa5d1cdf19ec0a9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 13:17:56 -0700
Subject: [PATCH 45/55] RTF metadata: Add support for publisher and tags. Fixes
 #6657 (RTF metadata)

---
 src/calibre/ebooks/metadata/rtf.py | 90 +++++++++++++++++++-----------
 1 file changed, 56 insertions(+), 34 deletions(-)

diff --git a/src/calibre/ebooks/metadata/rtf.py b/src/calibre/ebooks/metadata/rtf.py
index ad41125575..c20d880a2f 100644
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@@ -10,7 +10,8 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors
 title_pat    = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
 author_pat   = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
 comment_pat  = re.compile(r'\{\\info.*?\{\\subject(.*?)(?<!\\)\}', re.DOTALL)
-category_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL)
+tags_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL)
+publisher_pat = re.compile(r'\{\\info.*?\{\\manager(.*?)(?<!\\)\}', re.DOTALL)
 
 def get_document_info(stream):
     """
@@ -82,61 +83,73 @@ def decode(raw, codec):
 
 def get_metadata(stream):
     """ Return metadata as a L{MetaInfo} object """
-    title, author, comment, category = None, None, None, None
     stream.seek(0)
     if stream.read(5) != r'{\rtf':
-        return MetaInformation(None, None)
+        return MetaInformation(_('Unknown'))
     block = get_document_info(stream)[0]
     if not block:
-        return MetaInformation(None, None)
+        return MetaInformation(_('Unknown'))
 
     stream.seek(0)
     cpg = detect_codepage(stream)
     stream.seek(0)
 
     title_match = title_pat.search(block)
-    if title_match:
+    if title_match is not None:
         title = decode(title_match.group(1).strip(), cpg)
+    else:
+        title = _('Unknown')
     author_match = author_pat.search(block)
-    if author_match:
+    if author_match is not None:
         author = decode(author_match.group(1).strip(), cpg)
-    comment_match = comment_pat.search(block)
-    if comment_match:
-        comment = decode(comment_match.group(1).strip(), cpg)
-    category_match = category_pat.search(block)
-    if category_match:
-        category = decode(category_match.group(1).strip(), cpg)
-    mi = MetaInformation(title, author)
+    else:
+        author = None
+    mi = MetaInformation(title)
     if author:
         mi.authors = string_to_authors(author)
-    mi.comments = comment
-    mi.category = category
+
+    comment_match = comment_pat.search(block)
+    if comment_match is not None:
+        comment = decode(comment_match.group(1).strip(), cpg)
+        mi.comments = comment
+    tags_match = tags_pat.search(block)
+    if tags_match is not None:
+        tags = decode(tags_match.group(1).strip(), cpg)
+        mi.tags = tags
+    publisher_match = publisher_pat.search(block)
+    if publisher_match is not None:
+        publisher = decode(publisher_match.group(1).strip(), cpg)
+        mi.publisher = publisher
+
     return mi
 
-
 def create_metadata(stream, options):
-    md = r'{\info'
+    md = [r'{\info']
     if options.title:
         title = options.title.encode('ascii', 'ignore')
-        md += r'{\title %s}'%(title,)
+        md.append(r'{\title %s}'%(title,))
     if options.authors:
         au = options.authors
         if not isinstance(au, basestring):
             au = u', '.join(au)
         author = au.encode('ascii', 'ignore')
-        md += r'{\author %s}'%(author,)
-    if options.get('category', None):
-        category = options.category.encode('ascii', 'ignore')
-        md += r'{\category %s}'%(category,)
+        md.append(r'{\author %s}'%(author,))
     comp = options.comment if hasattr(options, 'comment') else options.comments
     if comp:
         comment = comp.encode('ascii', 'ignore')
-        md += r'{\subject %s}'%(comment,)
-    if len(md) > 6:
-        md += '}'
+        md.append(r'{\subject %s}'%(comment,))
+    if options.publisher:
+        publisher = options.publisher.encode('ascii', 'ignore')
+        md.append(r'{\manager %s}'%(publisher,))
+    if options.tags:
+        tags = u', '.join(options.tags)
+        tags = tags.encode('ascii', 'ignore')
+        md.append(r'{\category %s}'%(tags,))
+    if len(md) > 1:
+        md.append('}')
         stream.seek(0)
         src   = stream.read()
-        ans = src[:6] + md + src[6:]
+        ans = src[:6] + u''.join(md) + src[6:]
         stream.seek(0)
         stream.write(ans)
 
@@ -156,7 +169,7 @@ def set_metadata(stream, options):
 
         base_pat = r'\{\\name(.*?)(?<!\\)\}'
         title = options.title
-        if title != None:
+        if title is not None:
             title = title.encode('ascii', 'replace')
             pat = re.compile(base_pat.replace('name', 'title'), re.DOTALL)
             if pat.search(src):
@@ -164,7 +177,7 @@ def set_metadata(stream, options):
             else:
                 src = add_metadata_item(src, 'title', title)
         comment = options.comments
-        if comment != None:
+        if comment is not None:
             comment = comment.encode('ascii', 'replace')
             pat = re.compile(base_pat.replace('name', 'subject'), re.DOTALL)
             if pat.search(src):
@@ -172,7 +185,7 @@ def set_metadata(stream, options):
             else:
                 src = add_metadata_item(src, 'subject', comment)
         author = options.authors
-        if author != None:
+        if author is not None:
             author =  ', '.join(author)
             author = author.encode('ascii', 'ignore')
             pat = re.compile(base_pat.replace('name', 'author'), re.DOTALL)
@@ -180,14 +193,23 @@ def set_metadata(stream, options):
                 src = pat.sub(r'{\\author ' + author + r'}', src)
             else:
                 src = add_metadata_item(src, 'author', author)
-        category = options.get('category', None)
-        if category != None:
-            category = category.encode('ascii', 'replace')
+        tags = options.tags
+        if tags is not None:
+            tags =  ', '.join(tags)
+            tags = tags.encode('ascii', 'replace')
             pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL)
             if pat.search(src):
-                src = pat.sub(r'{\\category ' + category + r'}', src)
+                src = pat.sub(r'{\\category ' + tags + r'}', src)
             else:
-                src = add_metadata_item(src, 'category', category)
+                src = add_metadata_item(src, 'category', tags)
+        publisher = options.publisher
+        if publisher is not None:
+            publisher = publisher.encode('ascii', 'replace')
+            pat = re.compile(base_pat.replace('name', 'manager'), re.DOTALL)
+            if pat.search(src):
+                src = pat.sub(r'{\\manager ' + publisher + r'}', src)
+            else:
+                src = add_metadata_item(src, 'manager', publisher)
         stream.seek(pos + olen)
         after = stream.read()
         stream.seek(pos)

From a52f649aab66f97c04afad6617ad8874025ca700 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 13:25:17 -0700
Subject: [PATCH 46/55] ...

---
 src/calibre/trac/bzr_commit_plugin.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/trac/bzr_commit_plugin.py b/src/calibre/trac/bzr_commit_plugin.py
index df6bf699d1..6c36115cae 100644
--- a/src/calibre/trac/bzr_commit_plugin.py
+++ b/src/calibre/trac/bzr_commit_plugin.py
@@ -110,6 +110,7 @@ class cmd_commit(_cmd_commit):
             suffix = 'The fix will be in the next release.'
         action = action+'ed'
         msg = '%s in branch %s. %s'%(action, nick, suffix)
+        msg = msg.replace('Fixesed', 'Fixed')
         server = xmlrpclib.ServerProxy(url)
         server.ticket.update(int(bug), msg,
                              {'status':'closed', 'resolution':'fixed'},

From 2f08bc5086d5928a59a0c4075e344441be44f4ff Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 13:44:15 -0700
Subject: [PATCH 47/55] Fix db2.get_metadata.formats

---
 src/calibre/library/database2.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index d2654577b9..5f66297322 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -706,6 +706,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         formats = row[fm['formats']]
         if not formats:
             formats = None
+        else:
+            formats = formats.split(',')
         mi.formats = formats
         tags = row[fm['tags']]
         if tags:

From 3e9e655674586063be99f3014c4c933e856970b4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 17:21:46 -0700
Subject: [PATCH 48/55] ImageMagick: When identifying an image don't read the
 whole image

---
 src/calibre/utils/magick/draw.py  |  5 ++++-
 src/calibre/utils/magick/magick.c | 24 ++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/calibre/utils/magick/draw.py b/src/calibre/utils/magick/draw.py
index c03a8660c8..ad4b681b43 100644
--- a/src/calibre/utils/magick/draw.py
+++ b/src/calibre/utils/magick/draw.py
@@ -92,7 +92,10 @@ def identify_data(data):
     or raises an Exception if data is not an image.
     '''
     img = Image()
-    img.load(data)
+    if hasattr(img, 'identify'):
+        img.identify(data)
+    else:
+        img.load(data)
     width, height = img.size
     fmt = img.format
     return (width, height, fmt)
diff --git a/src/calibre/utils/magick/magick.c b/src/calibre/utils/magick/magick.c
index fd9563529a..869b77c736 100644
--- a/src/calibre/utils/magick/magick.c
+++ b/src/calibre/utils/magick/magick.c
@@ -456,6 +456,26 @@ magick_Image_load(magick_Image *self, PyObject *args, PyObject *kwargs) {
 
 // }}}
 
+// Image.identify {{{
+static PyObject *
+magick_Image_identify(magick_Image *self, PyObject *args, PyObject *kwargs) {
+    const char *data;
+	Py_ssize_t dlen;
+    MagickBooleanType res;
+    
+    NULL_CHECK(NULL)
+    if (!PyArg_ParseTuple(args, "s#", &data, &dlen)) return NULL;
+
+    res = MagickPingImageBlob(self->wand, data, dlen);
+
+    if (!res)
+        return magick_set_exception(self->wand);
+
+    Py_RETURN_NONE;
+}
+
+// }}}
+
 // Image.open {{{
 static PyObject *
 magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) {
@@ -993,6 +1013,10 @@ static PyMethodDef magick_Image_methods[] = {
     {"destroy", (PyCFunction)magick_Image_destroy, METH_VARARGS,
     "Destroy the underlying ImageMagick Wand. WARNING: After using this method, all methods on this object will raise an exception."},
 
+    {"identify", (PyCFunction)magick_Image_identify, METH_VARARGS,
+     "Identify an image from a byte buffer (string)"
+    },
+
     {"load", (PyCFunction)magick_Image_load, METH_VARARGS,
      "Load an image from a byte buffer (string)"
     },

From 7fde6cbead97f614fe7be51aa2b0f66f823d140d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 17:23:53 -0700
Subject: [PATCH 49/55] RTF Input: Various code cleanups. Go back to trying to
 handle unicode mappings without pre-processing (Fixes #8171 (Unsupported
 charsets ie non ascii in RTF)). Fix bug in handling super/sub scripts.

---
 resources/templates/rtf.xsl                   |   4 +-
 src/calibre/ebooks/rtf/input.py               | 101 ++++----
 src/calibre/ebooks/rtf2xml/ParseRtf.py        |  85 +++---
 src/calibre/ebooks/rtf2xml/check_brackets.py  |  42 +--
 src/calibre/ebooks/rtf2xml/check_encoding.py  |  33 ++-
 src/calibre/ebooks/rtf2xml/combine_borders.py |  35 +--
 src/calibre/ebooks/rtf2xml/convert_to_tags.py |  55 +++-
 src/calibre/ebooks/rtf2xml/copy.py            |  14 +-
 .../ebooks/rtf2xml/default_encoding.py        | 179 +++++++++----
 src/calibre/ebooks/rtf2xml/delete_info.py     | 132 +++++-----
 src/calibre/ebooks/rtf2xml/footnote.py        | 132 +++++-----
 src/calibre/ebooks/rtf2xml/get_char_map.py    |  15 +-
 src/calibre/ebooks/rtf2xml/hex_2_utf8.py      |  96 ++++---
 src/calibre/ebooks/rtf2xml/inline.py          |  86 +++---
 src/calibre/ebooks/rtf2xml/line_endings.py    |  56 ++--
 src/calibre/ebooks/rtf2xml/pict.py            | 113 ++++----
 src/calibre/ebooks/rtf2xml/process_tokens.py  | 176 ++++++-------
 .../ebooks/rtf2xml/replace_illegals.py        |  20 +-
 src/calibre/ebooks/rtf2xml/tokenize.py        | 245 ++++++++++++------
 19 files changed, 904 insertions(+), 715 deletions(-)

diff --git a/resources/templates/rtf.xsl b/resources/templates/rtf.xsl
index ea1fc71172..6db1c0388d 100644
--- a/resources/templates/rtf.xsl
+++ b/resources/templates/rtf.xsl
@@ -287,7 +287,7 @@
                 <xsl:value-of select="count(preceding::rtf:footnote) + 1"/>
                 <xsl:text>]</xsl:text>
             </xsl:when>
-            <xsl:when test="(@superscript = 'true')">
+            <xsl:when test="(@superscript)">
                 <xsl:element name="sup">
                     <xsl:element name="span">
                         <xsl:attribute name="class">
@@ -297,7 +297,7 @@
                     </xsl:element>
                 </xsl:element>
             </xsl:when>
-            <xsl:when test="(@underscript = 'true')">
+            <xsl:when test="(@underscript or @subscript)">
                 <xsl:element name="sub">
                     <xsl:element name="span">
                         <xsl:attribute name="class">
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 714a5b656f..ba13668eb7 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -77,7 +77,15 @@ class RTFInput(InputFormatPlugin):
 
     def generate_xml(self, stream):
         from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
-        ofile = 'out.xml'
+        ofile = 'dataxml.xml'
+        run_lev, debug_dir = 1, None
+        if getattr(self.opts, 'debug_pipeline', None) is not None:
+            try:
+                os.mkdir(debug_dir)
+                debug_dir = 'rtfdebug'
+                run_lev = 4
+            except:
+                pass
         parser = ParseRtf(
             in_file    = stream,
             out_file   = ofile,
@@ -115,43 +123,45 @@ class RTFInput(InputFormatPlugin):
 
             # Write or do not write paragraphs. Default is 0.
             empty_paragraphs = 1,
+
+            #debug
+            deb_dir = debug_dir,
+            run_level = run_lev,
         )
         parser.parse_rtf()
-        ans = open('out.xml').read()
-        os.remove('out.xml')
-        return ans
+        with open(ofile, 'rb') as f:
+            return f.read()
 
     def extract_images(self, picts):
+        import imghdr
         self.log('Extracting images...')
 
+        with open(picts, 'rb') as f:
+            raw = f.read()
+        picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
+        hex = re.compile(r'[^a-fA-F0-9]')
+        encs = [hex.sub('', pict) for pict in picts]
+
         count = 0
-        raw = open(picts, 'rb').read()
-        starts = []
-        for match in re.finditer(r'\{\\pict([^}]+)\}', raw):
-            starts.append(match.start(1))
-
         imap = {}
-
-        for start in starts:
-            pos, bc = start, 1
-            while bc > 0:
-                if raw[pos] == '}': bc -= 1
-                elif raw[pos] == '{': bc += 1
-                pos += 1
-            pict = raw[start:pos+1]
-            enc = re.sub(r'[^a-zA-Z0-9]', '', pict)
+        for enc in encs:
             if len(enc) % 2 == 1:
                 enc = enc[:-1]
             data = enc.decode('hex')
+            fmt = imghdr.what(None, data)
+            if fmt is None:
+                fmt = 'wmf'
             count += 1
-            name = (('%4d'%count).replace(' ', '0'))+'.wmf'
-            open(name, 'wb').write(data)
+            name = '%04d.%s' % (count, fmt)
+            with open(name, 'wb') as f:
+                f.write(data)
             imap[count] = name
             #open(name+'.hex', 'wb').write(enc)
         return self.convert_images(imap)
 
     def convert_images(self, imap):
-        for count, val in imap.items():
+        self.default_img = None
+        for count, val in imap.iteritems():
             try:
                 imap[count] = self.convert_image(val)
             except:
@@ -159,6 +169,8 @@ class RTFInput(InputFormatPlugin):
         return imap
 
     def convert_image(self, name):
+        if not name.endswith('.wmf'):
+            return name
         try:
             return self.rasterize_wmf(name)
         except:
@@ -167,16 +179,18 @@ class RTFInput(InputFormatPlugin):
 
     def replace_wmf(self, name):
         from calibre.ebooks import calibre_cover
-        data = calibre_cover('Conversion of WMF images is not supported',
+        if self.default_img is None:
+            self.default_img = calibre_cover('Conversion of WMF images is not supported',
             'Use Microsoft Word or OpenOffice to save this RTF file'
             ' as HTML and convert that in calibre.', title_size=36,
             author_size=20)
         name = name.replace('.wmf', '.jpg')
         with open(name, 'wb') as f:
-            f.write(data)
+            f.write(self.default_img)
         return name
 
     def rasterize_wmf(self, name):
+        raise ValueError('Conversion of WMF images not supported')
         from calibre.utils.wmf import extract_raster_image
         with open(name, 'rb') as f:
             data = f.read()
@@ -212,27 +226,27 @@ class RTFInput(InputFormatPlugin):
         css += '\n'+'\n'.join(font_size_classes)
         css += '\n' +'\n'.join(color_classes)
 
-        for cls, val in border_styles.items():
+        for cls, val in border_styles.iteritems():
             css += '\n\n.%s {\n%s\n}'%(cls, val)
 
         with open('styles.css', 'ab') as f:
             f.write(css)
 
-    def preprocess(self, fname):
-        self.log('\tPreprocessing to convert unicode characters')
-        try:
-            data = open(fname, 'rb').read()
-            from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
-            tokenizer = RtfTokenizer(data)
-            tokens = RtfTokenParser(tokenizer.tokens)
-            data = tokens.toRTF()
-            fname = 'preprocessed.rtf'
-            with open(fname, 'wb') as f:
-                f.write(data)
-        except:
-            self.log.exception(
-            'Failed to preprocess RTF to convert unicode sequences, ignoring...')
-        return fname
+    # def preprocess(self, fname):
+        # self.log('\tPreprocessing to convert unicode characters')
+        # try:
+            # data = open(fname, 'rb').read()
+            # from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
+            # tokenizer = RtfTokenizer(data)
+            # tokens = RtfTokenParser(tokenizer.tokens)
+            # data = tokens.toRTF()
+            # fname = 'preprocessed.rtf'
+            # with open(fname, 'wb') as f:
+                # f.write(data)
+        # except:
+            # self.log.exception(
+            # 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
+        # return fname
 
     def convert_borders(self, doc):
         border_styles = []
@@ -269,17 +283,14 @@ class RTFInput(InputFormatPlugin):
         self.log = log
         self.log('Converting RTF to XML...')
         #Name of the preprocesssed RTF file
-        fname = self.preprocess(stream.name)
+        # fname = self.preprocess(stream.name)
         try:
-            xml = self.generate_xml(fname)
+            xml = self.generate_xml(stream.name)
         except RtfInvalidCodeException, e:
+            raise
             raise ValueError(_('This RTF file has a feature calibre does not '
             'support. Convert it to HTML first and then try it.\n%s')%e)
 
-        '''dataxml = open('dataxml.xml', 'w')
-        dataxml.write(xml)
-        dataxml.close'''
-
         d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
         if d:
             imap = {}
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 7b89407f79..cdd9a3d088 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -17,7 +17,8 @@
 #########################################################################
 # $Revision: 1.41 $
 # $Date: 2006/03/24 23:50:07 $
-import sys,os
+import sys, os
+
 from calibre.ebooks.rtf2xml import headings_to_sections, \
     line_endings, footnote, fields_small, default_encoding, \
     make_lists, preamble_div, header, colors, group_borders, \
@@ -90,7 +91,6 @@ class ParseRtf:
                 out_file = '',
                 out_dir = None,
                 dtd = '',
-                #debug = 0, #why? calibre
                 deb_dir = None,
                 convert_symbol = None,
                 convert_wingdings = None,
@@ -107,6 +107,7 @@ class ParseRtf:
                 no_dtd = 0,
                 char_data = '',
                 ):
+
         """
         Requires:
         'file' --file to parse
@@ -119,12 +120,11 @@ class ParseRtf:
             script tries to output to directory where is script is exectued.)
             'deb_dir' --debug directory. If a debug_dir is provided, the script
             will copy each run through as a file to examine in the debug_dir
-            'perl_script'--use perl to make tokens. This runs just a bit faster.
-            (I will probably phase this out.)
             'check_brackets' -- make sure the brackets match up after each run
             through a file. Only for debugging.
         Returns: Nothing
         """
+
         self.__file = in_file
         self.__out_file = out_file
         self.__out_dir = out_dir
@@ -132,7 +132,7 @@ class ParseRtf:
         self.__dtd_path = dtd
         self.__check_file(in_file,"file_to_parse")
         self.__char_data = char_data
-        self.__debug_dir = deb_dir #self.__debug_dir = debug calibre
+        self.__debug_dir = deb_dir
         self.__check_dir(self.__temp_dir)
         self.__copy = self.__check_dir(self.__debug_dir)
         self.__convert_caps = convert_caps
@@ -155,25 +155,24 @@ class ParseRtf:
         if hasattr(the_file, 'read'): return
         if the_file == None:
             if type == "file_to_parse":
-                message = "You must provide a file for the script to work"
-            msg = message
+                msg = "\nYou must provide a file for the script to work"
             raise RtfInvalidCodeException, msg
         elif os.path.exists(the_file):
             pass # do nothing
         else:
-            message = "The file '%s' cannot be found" % the_file
-            msg = message
+            msg = "\nThe file '%s' cannot be found" % the_file
             raise RtfInvalidCodeException, msg
+
     def __check_dir(self, the_dir):
         """Check to see if directory exists"""
         if not the_dir :
             return
         dir_exists = os.path.isdir(the_dir)
         if not dir_exists:
-            message = "%s is not a directory" % the_dir
-            msg = message
+            msg = "\n%s is not a directory" % the_dir
             raise RtfInvalidCodeException, msg
         return 1
+
     def parse_rtf(self):
         """
         Parse the file by calling on other classes.
@@ -194,13 +193,14 @@ class ParseRtf:
             copy_obj.set_dir(self.__debug_dir)
             copy_obj.remove_files()
             copy_obj.copy_file(self.__temp_file, "original_file")
-        # new as of 2005-08-02. Do I want this?
+        # Function to check if bracket are well handled
         if self.__debug_dir or self.__run_level > 2:
             self.__check_brack_obj = check_brackets.CheckBrackets\
             (file = self.__temp_file,
                 bug_handler = RtfInvalidCodeException,
                     )
-        # convert Macintosh line endings to Unix line endings
+        #convert Macintosh and Windows line endings to Unix line endings
+        #why do this if you don't wb after?
         line_obj = line_endings.FixLineEndings(
                 in_file = self.__temp_file,
                 bug_handler = RtfInvalidCodeException,
@@ -208,13 +208,13 @@ class ParseRtf:
                 run_level = self.__run_level,
                 replace_illegals = self.__replace_illegals,
                 )
-        return_value = line_obj.fix_endings()
+        return_value = line_obj.fix_endings() #calibre return what?
         self.__return_code(return_value)
         tokenize_obj = tokenize.Tokenize(
                 bug_handler = RtfInvalidCodeException,
                 in_file = self.__temp_file,
                 copy = self.__copy,
-                run_level = self.__run_level,)
+                run_level = self.__run_level)
         tokenize_obj.tokenize()
         process_tokens_obj = process_tokens.ProcessTokens(
             in_file = self.__temp_file,
@@ -230,12 +230,25 @@ class ParseRtf:
                 os.remove(self.__temp_file)
             except OSError:
                 pass
+            #Check to see if the file is correctly encoded
+            encode_obj = default_encoding.DefaultEncoding(
+            in_file = self.__temp_file,
+            run_level = self.__run_level,
+            bug_handler = RtfInvalidCodeException,
+            check_raw = True,
+            )
+            platform, code_page, default_font_num = encode_obj.find_default_encoding()
             check_encoding_obj = check_encoding.CheckEncoding(
-                bug_handler = RtfInvalidCodeException,
-                    )
-            check_encoding_obj.check_encoding(self.__file)
-            sys.stderr.write('File "%s" does not appear to be RTF.\n' % self.__file if isinstance(self.__file, str) else self.__file.encode('utf-8'))
-            raise InvalidRtfException, msg
+                    bug_handler = RtfInvalidCodeException,
+                        )
+            enc = encode_obj.get_codepage()
+            if enc != 'mac_roman':
+                enc = 'cp' + enc
+            if check_encoding_obj.check_encoding(self.__file, enc):
+                file_name = self.__file if isinstance(self.__file, str) \
+                                    else self.__file.encode('utf-8')
+                msg = 'File %s does not appear to be correctly encoded.\n' % file_name
+                raise InvalidRtfException, msg
         delete_info_obj = delete_info.DeleteInfo(
             in_file = self.__temp_file,
             copy = self.__copy,
@@ -508,6 +521,7 @@ class ParseRtf:
                 indent = self.__indent,
                 run_level = self.__run_level,
                 no_dtd = self.__no_dtd,
+                encoding = encode_obj.get_codepage(),
                 bug_handler = RtfInvalidCodeException,
                 )
         tags_obj.convert_to_tags()
@@ -520,35 +534,28 @@ class ParseRtf:
         output_obj.output()
         os.remove(self.__temp_file)
         return self.__exit_level
+
     def __bracket_match(self, file_name):
         if self.__run_level > 2:
             good_br, msg =  self.__check_brack_obj.check_brackets()
             if good_br:
                 pass
-                # sys.stderr.write( msg + ' in ' + file_name + "\n")
+                #sys.stderr.write( msg + ' in ' + file_name + "\n")
             else:
-                msg += msg +  " in file '" + file_name + "'\n"
+                msg = '%s in file %s\n' % (msg, file_name)
                 raise RtfInvalidCodeException, msg
+
     def __return_code(self, num):
-        if num == None:
-            return
-        if int(num) > self.__exit_level:
-            self.__exit_level = num
+      if num == None:
+          return
+      if int(num) > self.__exit_level:
+          self.__exit_level = num
+
     def __make_temp_file(self,file):
         """Make a temporary file to parse"""
         write_file="rtf_write_file"
         read_obj = file if hasattr(file, 'read') else open(file,'r')
-        write_obj = open(write_file, 'w')
-        line = "dummy"
-        while line:
-            line = read_obj.read(1000)
-            write_obj.write(line )
-        write_obj.close()
+        with open(write_file, 'wb') as write_obj:
+            for line in read_obj:
+                write_obj.write(line)
         return write_file
-    """
-mi<tg<open______<style-sheet\n
-mi<tg<close_____<style-sheet\n
-mi<tg<open-att__<footnote<num>1\n
-mi<tg<empty-att_<page-definition<margin>33\n
-mi<tg<empty_____<para\n
-"""
diff --git a/src/calibre/ebooks/rtf2xml/check_brackets.py b/src/calibre/ebooks/rtf2xml/check_brackets.py
index 418469467d..361cc034e0 100755
--- a/src/calibre/ebooks/rtf2xml/check_brackets.py
+++ b/src/calibre/ebooks/rtf2xml/check_brackets.py
@@ -24,38 +24,38 @@ class CheckBrackets:
         self.__ob_count = 0
         self.__cb_count = 0
         self.__open_bracket_num = []
+
     def open_brack(self, line):
         num = line[-5:-1]
         self.__open_bracket_num.append(num)
         self.__bracket_count += 1
+
     def close_brack(self, line):
         num = line[-5:-1]
-        ##self.__open_bracket_num.append(num)
         try:
             last_num = self.__open_bracket_num.pop()
         except:
-            return 0
+            return False
         if num != last_num:
-            return 0
+            return False
         self.__bracket_count -= 1
-        return 1
+        return True
+
     def check_brackets(self):
-        read_obj = open(self.__file, 'r')
-        line = 'dummy'
         line_count = 0
-        while line:
-            line_count += 1
-            line = read_obj.readline()
-            self.__token_info = line[:16]
-            if self.__token_info == 'ob<nu<open-brack':
-                self.open_brack(line)
-            if self.__token_info == 'cb<nu<clos-brack':
-                right_count = self.close_brack(line)
-                if not right_count:
-                    return (0, "closed bracket doesn't match, line %s" % line_count)
-        read_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            for line in read_obj:
+                line_count += 1
+                self.__token_info = line[:16]
+                if self.__token_info == 'ob<nu<open-brack':
+                    self.open_brack(line)
+                if self.__token_info == 'cb<nu<clos-brack':
+                    if not self.close_brack(line):
+                        return (False, "closed bracket doesn't match, line %s" % line_count)
+
         if self.__bracket_count != 0:
-            msg = 'At end of file open and closed brackets don\'t match\n'
-            msg = msg + 'total number of brackets is %s' % self.__bracket_count
-            return (0, msg)
-        return (1, "brackets match!")
+            msg = ('At end of file open and closed brackets don\'t match\n' \
+                        'total number of brackets is %s') % self.__bracket_count
+            return (False, msg)
+        return (True, "Brackets match!")
+
diff --git a/src/calibre/ebooks/rtf2xml/check_encoding.py b/src/calibre/ebooks/rtf2xml/check_encoding.py
index f6810e4909..0f52320aea 100755
--- a/src/calibre/ebooks/rtf2xml/check_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/check_encoding.py
@@ -1,8 +1,11 @@
 #!/usr/bin/env python
 import sys
+
 class CheckEncoding:
+
     def __init__(self, bug_handler):
         self.__bug_handler = bug_handler
+
     def __get_position_error(self, line, encoding, line_num):
         char_position = 0
         for char in line:
@@ -12,21 +15,23 @@ class CheckEncoding:
             except UnicodeError, msg:
                 sys.stderr.write('line: %s char: %s\n' %  (line_num, char_position))
                 sys.stderr.write(str(msg) + '\n')
-    def check_encoding(self, path, encoding='us-ascii'):
-        read_obj = open(path, 'r')
-        line_to_read = 1
+
+    def check_encoding(self, path, encoding='us-ascii', verbose=True):
         line_num = 0
-        while line_to_read:
-            line_num += 1
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            try:
-                line.decode(encoding)
-            except UnicodeError:
-                if len(line) < 1000:
-                    self.__get_position_error(line, encoding, line_num)
-                else:
-                    sys.stderr.write('line: %d has bad encoding\n'%line_num)
+        with open(path, 'r') as read_obj:
+            for line in read_obj:
+                line_num += 1
+                try:
+                    line.decode(encoding)
+                except UnicodeError:
+                    if verbose:
+                        if len(line) < 1000:
+                            self.__get_position_error(line, encoding, line_num)
+                        else:
+                            sys.stderr.write('line: %d has bad encoding\n' % line_num)
+                    return True
+        return False
+
 if __name__ == '__main__':
     check_encoding_obj = CheckEncoding()
     check_encoding_obj.check_encoding(sys.argv[1])
diff --git a/src/calibre/ebooks/rtf2xml/combine_borders.py b/src/calibre/ebooks/rtf2xml/combine_borders.py
index 71cd822e30..eaf09d0842 100755
--- a/src/calibre/ebooks/rtf2xml/combine_borders.py
+++ b/src/calibre/ebooks/rtf2xml/combine_borders.py
@@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+
 class CombineBorders:
     """Combine borders in RTF tokens to make later processing easier"""
     def __init__(self,
@@ -32,28 +34,31 @@ class CombineBorders:
         self.__state = 'default'
         self.__bord_pos = 'default'
         self.__bord_att = []
+
     def found_bd(self, line):
         #cw<bd<bor-t-r-vi
         self.__state = 'border'
         self.__bord_pos = line[6:16]
+
     def __default_func(self, line):
         #cw<bd<bor-t-r-vi
         if self.__first_five == 'cw<bd':
             self.found_bd(line)
             return ''
         return line
+
     def end_border(self, line, write_obj):
-        joiner = "|"
-        border_string = joiner.join(self.__bord_att)
+        border_string = "|".join(self.__bord_att)
         self.__bord_att = []
         write_obj.write('cw<bd<%s<nu<%s\n' % (self.__bord_pos,
-        border_string))
+                                                border_string))
         self.__state = 'default'
         self.__bord_string = ''
         if self.__first_five == 'cw<bd':
             self. found_bd(line)
         else:
             write_obj.write(line)
+
     def add_to_border_desc(self, line):
         #cw<bt<bdr-hair__<nu<true
         #cw<bt<bdr-linew<nu<0.50
@@ -65,26 +70,22 @@ class CombineBorders:
         else:
             num = ':' + num
         self.__bord_att.append(border_desc + num)
+
     def __border_func(self, line, write_obj):
         if self.__first_five != 'cw<bt':
             self.end_border(line, write_obj)
         else:
             self.add_to_border_desc(line)
+
     def combine_borders(self):
-        read_obj = open(self.__file, 'r')
-        write_obj = open(self.__write_to, 'w')
-        line_to_read = 'dummy'
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__first_five = line[0:5]
-            if self.__state == 'border':
-                self.__border_func(line, write_obj)
-            else:
-                to_print = self.__default_func(line)
-                write_obj.write(to_print)
-        read_obj.close()
-        write_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as write_obj:
+                for line in read_obj:
+                    self.__first_five = line[0:5]
+                    if self.__state == 'border':
+                        self.__border_func(line, write_obj)
+                    else:
+                        write_obj.write(self.__default_func(line))
         copy_obj = copy.Copy(bug_handler = self.__bug_handler)
         if self.__copy:
             copy_obj.copy_file(self.__write_to, "combine_borders.data")
diff --git a/src/calibre/ebooks/rtf2xml/convert_to_tags.py b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
index ab54c0cbc3..6927537474 100755
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@@ -1,6 +1,9 @@
-import os, tempfile
-from calibre.ebooks.rtf2xml import copy
+import os, tempfile, sys
+
+from calibre.ebooks.rtf2xml import copy, check_encoding
+
 public_dtd = 'rtf2xml1.0.dtd'
+
 class ConvertToTags:
     """
     Convert file to XML
@@ -10,6 +13,7 @@ class ConvertToTags:
             bug_handler,
             dtd_path,
             no_dtd,
+            encoding,
             indent = None,
             copy = None,
             run_level = 1,
@@ -29,9 +33,14 @@ class ConvertToTags:
         self.__copy = copy
         self.__dtd_path = dtd_path
         self.__no_dtd = no_dtd
+        if encoding != 'mac_roman':
+            self.__encoding = 'cp' + encoding
+        else:
+            self.__encoding = 'mac_roman'
         self.__indent = indent
         self.__run_level = run_level
         self.__write_to = tempfile.mktemp()
+
     def __initiate_values(self):
         """
         Set values, including those for the dictionary.
@@ -61,6 +70,7 @@ class ConvertToTags:
         'tx<ut<__________'  :   self.__text_func,
         'mi<tg<empty_____'  :   self.__empty_func,
         }
+
     def __open_func(self, line):
         """
         Print the opening tag and newlines when needed.
@@ -73,6 +83,7 @@ class ConvertToTags:
         if info in self.__two_new_line:
             self.__write_extra_new_line()
         self.__write_obj.write('<%s>' % info)
+
     def __empty_func(self, line):
         """
         Print out empty tag and newlines when needed.
@@ -85,10 +96,11 @@ class ConvertToTags:
             self.__write_new_line()
         if info in self.__two_new_line:
             self.__write_extra_new_line()
+
     def __open_att_func(self, line):
         """
         Process lines for open tags that have attributes.
-        The important infor is between [17:-1]. Take this info and split it
+        The important info is between [17:-1]. Take this info and split it
         with the delimeter '<'. The first token in this group is the element
         name. The rest are attributes, separated fromt their values by '>'. So
         read each token one at a time, and split them by '>'.
@@ -119,6 +131,7 @@ class ConvertToTags:
             self.__write_new_line()
         if element_name in self.__two_new_line:
             self.__write_extra_new_line()
+
     def __empty_att_func(self, line):
         """
         Same as the __open_att_func, except a '/' is placed at the end of the tag.
@@ -143,6 +156,7 @@ class ConvertToTags:
             self.__write_new_line()
         if element_name in self.__two_new_line:
             self.__write_extra_new_line()
+
     def __close_func(self, line):
         """
         Print out the closed tag and new lines, if appropriate.
@@ -156,6 +170,7 @@ class ConvertToTags:
             self.__write_new_line()
         if info in self.__two_new_line:
             self.__write_extra_new_line()
+
     def __text_func(self, line):
         """
         Simply print out the information between [17:-1]
@@ -163,6 +178,7 @@ class ConvertToTags:
         #tx<nu<__________<Normal;
         # change this!
         self.__write_obj.write(line[17:-1])
+
     def __write_extra_new_line(self):
         """
         Print out extra new lines if the new lines have not exceeded two. If
@@ -172,8 +188,10 @@ class ConvertToTags:
             return
         if self.__new_line < 2:
             self.__write_obj.write('\n')
+
     def __default_func(self, line):
         pass
+
     def __write_new_line(self):
         """
         Print out a new line if a new line has not already been printed out.
@@ -183,11 +201,23 @@ class ConvertToTags:
         if not self.__new_line:
             self.__write_obj.write('\n')
             self.__new_line += 1
+
     def __write_dec(self):
         """
         Write the XML declaration at the top of the document.
         """
-        self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
+        #keep maximum compatibility with previous version
+        check_encoding_obj = check_encoding.CheckEncoding(
+                    bug_handler=self.__bug_handler)
+
+        if not check_encoding_obj.check_encoding(self.__file, verbose=False):
+            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
+        elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
+            self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
+        else:
+            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
+            sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
+                    ' hope for the best')
         self.__new_line = 0
         self.__write_new_line()
         if self.__no_dtd:
@@ -207,6 +237,7 @@ class ConvertToTags:
             )
         self.__new_line = 0
         self.__write_new_line()
+
     def convert_to_tags(self):
         """
         Read in the file one line at a time. Get the important info, between
@@ -222,18 +253,14 @@ class ConvertToTags:
             an empty tag function.
             """
         self.__initiate_values()
-        read_obj = open(self.__file, 'r')
         self.__write_obj = open(self.__write_to, 'w')
         self.__write_dec()
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            action = self.__state_dict.get(self.__token_info)
-            if action != None:
-                action(line)
-        read_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            for line in read_obj:
+                self.__token_info = line[:16]
+                action = self.__state_dict.get(self.__token_info)
+                if action is not None:
+                    action(line)
         self.__write_obj.close()
         copy_obj = copy.Copy(bug_handler = self.__bug_handler)
         if self.__copy:
diff --git a/src/calibre/ebooks/rtf2xml/copy.py b/src/calibre/ebooks/rtf2xml/copy.py
index ff029c1841..1b620b9fbf 100755
--- a/src/calibre/ebooks/rtf2xml/copy.py
+++ b/src/calibre/ebooks/rtf2xml/copy.py
@@ -23,6 +23,7 @@ class Copy:
     def __init__(self, bug_handler, file = None, deb_dir = None, ):
         self.__file = file
         self.__bug_handler = bug_handler
+
     def set_dir(self, deb_dir):
         """Set the temporary directory to write files to"""
         if deb_dir is None:
@@ -33,19 +34,11 @@ class Copy:
             message = "%(deb_dir)s is not a directory" % vars()
             raise self.__bug_handler , message
         Copy.__dir = deb_dir
+
     def remove_files(self ):
         """Remove files from directory"""
         self.__remove_the_files(Copy.__dir)
-        """
-        list_of_files = os.listdir(Copy.__dir)
-        list_of_files = os.listdir(the_dir)
-        for file in list_of_files:
-            rem_file = os.path.join(Copy.__dir,file)
-            if os.path.isdir(rem_file):
-                self.remove_files(rem_file)
-            else:
-                os.remove(rem_file)
-        """
+
     def __remove_the_files(self, the_dir):
         """Remove files from directory"""
         list_of_files = os.listdir(the_dir)
@@ -58,6 +51,7 @@ class Copy:
                     os.remove(rem_file)
                 except OSError:
                     pass
+
     def copy_file(self, file, new_file):
         """
         Copy the file to a new name
diff --git a/src/calibre/ebooks/rtf2xml/default_encoding.py b/src/calibre/ebooks/rtf2xml/default_encoding.py
index b932b465d0..53887e0d90 100755
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@@ -1,61 +1,142 @@
 #########################################################################
 #                                                                       #
-#                                                                       #
 #   copyright 2002 Paul Henry Tremblay                                  #
 #                                                                       #
-#   This program is distributed in the hope that it will be useful,     #
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of      #
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU    #
-#   General Public License for more details.                            #
-#                                                                       #
-#   You should have received a copy of the GNU General Public License   #
-#   along with this program; if not, write to the Free Software         #
-#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA            #
-#   02111-1307 USA                                                      #
-#                                                                       #
-#                                                                       #
 #########################################################################
+
+'''
+Codepages as to RTF 1.9.1:
+    437	United States IBM
+    708	Arabic (ASMO 708)
+    709	Arabic (ASMO 449+, BCON V4)
+    710	Arabic (transparent Arabic)
+    711	Arabic (Nafitha Enhanced)
+    720	Arabic (transparent ASMO)
+    819	Windows 3.1 (United States and Western Europe)
+    850	IBM multilingual
+    852	Eastern European
+    860	Portuguese
+    862	Hebrew
+    863	French Canadian
+    864	Arabic
+    865	Norwegian
+    866	Soviet Union
+    874	Thai
+    932	Japanese
+    936	Simplified Chinese
+    949	Korean
+    950	Traditional Chinese
+    1250	Eastern European
+    1251	Cyrillic
+    1252	Western European
+    1253	Greek
+    1254	Turkish
+    1255	Hebrew
+    1256	Arabic
+    1257	Baltic
+    1258	Vietnamese
+    1361	Johab
+    10000	MAC Roman
+    10001	MAC Japan
+    10004	MAC Arabic
+    10005	MAC Hebrew
+    10006	MAC Greek
+    10007	MAC Cyrillic
+    10029	MAC Latin2
+    10081	MAC Turkish
+    57002	Devanagari
+    57003	Bengali
+    57004	Tamil
+    57005	Telugu
+    57006	Assamese
+    57007	Oriya
+    57008	Kannada
+    57009	Malayalam
+    57010	Gujarati
+    57011	Punjabi
+'''
+import re
+
 class DefaultEncoding:
     """
     Find the default encoding for the doc
     """
-    def __init__(self, in_file, bug_handler, run_level = 1,):
-        """
-        Required:
-            'file'
-        Returns:
-            nothing
-            """
+    def __init__(self, in_file, bug_handler, run_level = 1, check_raw = False):
         self.__file = in_file
         self.__bug_handler = bug_handler
+        self.__platform = 'Windows'
+        self.__default_num = 'not-defined'
+        self.__code_page = '1252'
+        self.__datafetched = False
+        self.__fetchraw = check_raw
+
     def find_default_encoding(self):
-        platform = 'Windows'
-        default_num = 'not-defined'
-        code_page = 'ansicpg1252'
-        read_obj = open(self.__file, 'r')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            if self.__token_info == 'mi<mk<rtfhed-end':
-                break
-            if self.__token_info == 'cw<ri<ansi-codpg':
-                #cw<ri<ansi-codpg<nu<10000
-                num = line[20:-1]
-                if not num:
-                    num = '1252'
-                code_page = 'ansicpg' + num
-            if self.__token_info == 'cw<ri<macintosh_':
-                platform = 'Macintosh'
-            if self.__token_info == 'cw<ri<deflt-font':
-                default_num = line[20:-1]
-                #cw<ri<deflt-font<nu<0
-            #action = self.__state_dict.get(self.__state)
-            #if action == None:
-                #print self.__state
-            #action(line)
-        read_obj.close()
-        if platform == 'Macintosh':
-            code_page = 'mac_roman'
-        return platform, code_page, default_num
+        if not self.__datafetched:
+            self._encoding()
+            self.__datafetched = True
+        if self.__platform == 'Macintosh':
+            code_page = self.__code_page
+        else:
+            code_page = 'ansicpg' + self.__code_page
+        return self.__platform, code_page, self.__default_num
+
+    def get_codepage(self):
+        if not self.__datafetched:
+            self._encoding()
+            self.__datafetched = True
+        return self.__code_page
+
+    def get_platform(self):
+        if not self.__datafetched:
+            self._encoding()
+            self.__datafetched = True
+        return self.__platform
+
+    def _encoding(self):
+        with open(self.__file, 'r') as read_obj:
+            if not self.__fetchraw:
+                for line in read_obj:
+                    self.__token_info = line[:16]
+                    if self.__token_info == 'mi<mk<rtfhed-end':
+                        break
+                    if self.__token_info == 'cw<ri<ansi-codpg':
+                        #cw<ri<ansi-codpg<nu<10000
+                        self.__code_page = line[20:-1] if int(line[20:-1]) \
+                                            else '1252'
+                    if self.__token_info == 'cw<ri<macintosh_':
+                        self.__platform = 'Macintosh'
+                        self.__code_page = 'mac_roman'
+                    elif self.__token_info == 'cw<ri<pc________':
+                        self.__platform = 'IBMPC'
+                        self.__code_page = '437'
+                    elif self.__token_info == 'cw<ri<pca_______':
+                        self.__platform = 'OS/2'
+                        self.__code_page = '850'
+                    if self.__token_info == 'cw<ri<deflt-font':
+                        self.__default_num = line[20:-1]
+                        #cw<ri<deflt-font<nu<0
+            else:
+                fenc = re.compile(r'\\(mac|pc|ansi|pca)[\\ \{\}\t\n]+')
+                fenccp = re.compile(r'\\ansicpg(\d+)[\\ \{\}\t\n]+')
+                for line in read_obj:
+                    if fenccp.search(line):
+                        cp = fenccp.search(line).group(1)
+                        if not int(cp):
+                            self.__code_page = cp
+                        break
+                    if fenc.search(line):
+                        enc = fenc.search(line).group(1)
+                        if enc == 'mac':
+                            self.__code_page = 'mac_roman'
+                        elif enc == 'pc':
+                            self.__code_page = '437'
+                        elif enc == 'pca':
+                            self.__code_page = '850'
+
+# if __name__ == '__main__':
+    # encode_obj = DefaultEncoding(
+            # in_file = sys.argv[1],
+            # bug_handler = Exception,
+            # check_raw = True,
+            # )
+    # print encode_obj.get_codepage()
diff --git a/src/calibre/ebooks/rtf2xml/delete_info.py b/src/calibre/ebooks/rtf2xml/delete_info.py
index f79caa3aae..fed47b1e75 100755
--- a/src/calibre/ebooks/rtf2xml/delete_info.py
+++ b/src/calibre/ebooks/rtf2xml/delete_info.py
@@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import sys, os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+
 class DeleteInfo:
     """Delelet unecessary destination groups"""
     def __init__(self,
@@ -29,17 +31,18 @@ class DeleteInfo:
         self.__bug_handler = bug_handler
         self.__copy = copy
         self.__write_to = tempfile.mktemp()
-        self.__bracket_count=0
+        self.__bracket_count= 0
         self.__ob_count = 0
         self.__cb_count = 0
-        self.__after_asterisk = 0
-        self.__delete = 0
+        # self.__after_asterisk = False
+        # self.__delete = 0
         self.__initiate_allow()
         self.__ob = 0
-        self.__write_cb = 0
+        self.__write_cb = False
         self.__run_level = run_level
-        self.__found_delete = 0
-        self.__list = 0
+        self.__found_delete = False
+        # self.__list = False
+
     def __initiate_allow(self):
         """
         Initiate a list of destination groups which should be printed out.
@@ -66,9 +69,10 @@ class DeleteInfo:
         self.__state_dict = {
             'default'           : self.__default_func,
             'after_asterisk'    : self.__asterisk_func,
-            'delete'           : self.__delete_func,
+            'delete'            : self.__delete_func,
             'list'              : self.__list_func,
         }
+
     def __default_func(self,line):
         """Handle lines when in no special state. Look for an asterisk to
         begin a special state. Otherwise, print out line."""
@@ -81,27 +85,29 @@ class DeleteInfo:
             if self.__ob:
                 self.__write_obj.write(self.__ob)
             self.__ob = line
-            return 0
+            return False
         else:
             # write previous bracket, since didn't fine asterisk
             if self.__ob:
                 self.__write_obj.write(self.__ob)
                 self.__ob = 0
-            return 1
+            return True
+
     def __delete_func(self,line):
         """Handle lines when in delete state. Don't print out lines
         unless the state has ended."""
         if self.__delete_count == self.__cb_count:
             self.__state = 'default'
             if self.__write_cb:
-                self.__write_cb = 0
-                return 1
-            return 0
+                self.__write_cb = True
+                return True
+            return False
+
     def __asterisk_func(self,line):
         """
         Determine whether to delete info in group
         Note on self.__cb flag.
-        If you find that you are in a delete group, and the preivous
+        If you find that you are in a delete group, and the previous
         token in not an open bracket (self.__ob = 0), that means
         that the delete group is nested inside another acceptable
         detination group. In this case, you have alrady written
@@ -110,21 +116,21 @@ class DeleteInfo:
         """
         # Test for {\*}, in which case don't enter
         # delete state
-        self.__after_asterisk = 0 # only enter this function once
-        self.__found_delete = 1
+        # self.__after_asterisk = False # only enter this function once
+        self.__found_delete = True
         if self.__token_info == 'cb<nu<clos-brack':
             if self.__delete_count == self.__cb_count:
                 self.__state = 'default'
                 self.__ob = 0
                 # changed this because haven't printed out start
-                return 0
+                return False
             else:
                 # not sure what happens here!
                 # believe I have a '{\*}
                 if self.__run_level > 3:
                     msg = 'flag problem\n'
                     raise self.__bug_handler, msg
-                return 1
+                return True
         elif self.__token_info in self.__allowable :
             if self.__ob:
                 self.__write_obj.write(self.__ob)
@@ -132,85 +138,81 @@ class DeleteInfo:
                 self.__state = 'default'
             else:
                 pass
-            return 1
+            return True
         elif self.__token_info == 'cw<ls<list______':
             self.__ob = 0
             self.__found_list_func(line)
         elif self.__token_info in self.__not_allowable:
             if not self.__ob:
-                self.__write_cb = 1
+                self.__write_cb = True
             self.__ob = 0
             self.__state = 'delete'
             self.__cb_count = 0
-            return 0
+            return False
         else:
             if self.__run_level > 5:
-                msg = 'After an asterisk, and found neither an allowable or non-allowble token\n'
-                msg += 'token is "%s"\n' % self.__token_info
-                raise self.__bug_handler
+                msg = ('After an asterisk, and found neither an allowable or non-allowable token\n\
+                            token is "%s"\n') % self.__token_info
+                raise self.__bug_handler, msg
             if not self.__ob:
-                self.__write_cb = 1
+                self.__write_cb = True
             self.__ob = 0
             self.__state = 'delete'
             self.__cb_count = 0
-            return 0
+            return False
+
     def __found_list_func(self, line):
         """
         print out control words in this group
         """
         self.__state = 'list'
+
     def __list_func(self, line):
         """
         Check to see if the group has ended.
-        Return 1 for all control words.
-        Return 0 otherwise.
+        Return True for all control words.
+        Return False otherwise.
         """
         if self.__delete_count == self.__cb_count and self.__token_info ==\
             'cb<nu<clos-brack':
             self.__state = 'default'
             if self.__write_cb:
-                self.__write_cb = 0
-                return 1
-            return 0
+                self.__write_cb = False
+                return True
+            return False
         elif line[0:2] == 'cw':
-            return 1
+            return True
         else:
-            return 0
+            return False
+
     def delete_info(self):
         """Main method for handling other methods. Read one line in at
-        a time, and determine wheter to print the line based on the state."""
-        line_to_read = 'dummy'
-        read_obj = open(self.__file, 'r')
-        self.__write_obj = open(self.__write_to, 'w')
-        while line_to_read:
-            #ob<nu<open-brack<0001
-            to_print =1
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            if self.__token_info == 'ob<nu<open-brack':
-                self.__ob_count = line[-5:-1]
-            if self.__token_info == 'cb<nu<clos-brack':
-                self.__cb_count = line[-5:-1]
-            action = self.__state_dict.get(self.__state)
-            if not action:
-                sys.stderr.write('No action in dictionary state is "%s" \n'
-                        % self.__state)
-            to_print = action(line)
-            """
-            if self.__after_asterisk:
-                to_print = self.__asterisk_func(line)
-            elif self.__list:
-                self.__in_list_func(line)
-            elif self.__delete:
-                to_print = self.__delete_func(line)
-            else:
-                to_print = self.__default_func(line)
-            """
-            if to_print:
-                self.__write_obj.write(line)
-        self.__write_obj.close()
-        read_obj.close()
+        a time, and determine whether to print the line based on the state."""
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                for line in read_obj:
+                    #ob<nu<open-brack<0001
+                    to_print = True
+                    self.__token_info = line[:16]
+                    if self.__token_info == 'ob<nu<open-brack':
+                        self.__ob_count = line[-5:-1]
+                    if self.__token_info == 'cb<nu<clos-brack':
+                        self.__cb_count = line[-5:-1]
+                    action = self.__state_dict.get(self.__state)
+                    if not action:
+                        sys.stderr.write(_('No action in dictionary state is "%s" \n')
+                                % self.__state)
+                    to_print = action(line)
+                    # if self.__after_asterisk:
+                        # to_print = self.__asterisk_func(line)
+                    # elif self.__list:
+                        # self.__in_list_func(line)
+                    # elif self.__delete:
+                        # to_print = self.__delete_func(line)
+                    # else:
+                        # to_print = self.__default_func(line)
+                    if to_print:
+                        self.__write_obj.write(line)
         copy_obj = copy.Copy(bug_handler = self.__bug_handler)
         if self.__copy:
             copy_obj.copy_file(self.__write_to, "delete_info.data")
diff --git a/src/calibre/ebooks/rtf2xml/footnote.py b/src/calibre/ebooks/rtf2xml/footnote.py
index a596ca73f6..6ac12f65e6 100755
--- a/src/calibre/ebooks/rtf2xml/footnote.py
+++ b/src/calibre/ebooks/rtf2xml/footnote.py
@@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+
 class Footnote:
     """
     Two public methods are available. The first separates all of the
@@ -35,6 +37,7 @@ class Footnote:
         self.__copy = copy
         self.__write_to = tempfile.mktemp()
         self.__found_a_footnote = 0
+
     def __first_line_func(self, line):
         """
         Print the tag info for footnotes.  Check whether footnote is an
@@ -47,6 +50,7 @@ class Footnote:
             self.__write_to_foot_obj.write(
             'mi<tg<open-att__<footnote<num>%s\n' % self.__footnote_count)
         self.__first_line = 0
+
     def __in_footnote_func(self, line):
         """Handle all tokens that are part of footnote"""
         if self.__first_line:
@@ -68,6 +72,7 @@ class Footnote:
             'mi<mk<footnt-clo\n')
         else:
             self.__write_to_foot_obj.write(line)
+
     def __found_footnote(self, line):
         """ Found a footnote"""
         self.__found_a_footnote = 1
@@ -81,6 +86,7 @@ class Footnote:
         'mi<mk<footnt-ind<%04d\n' % self.__footnote_count)
         self.__write_to_foot_obj.write(
         'mi<mk<footnt-ope<%04d\n' % self.__footnote_count)
+
     def __default_sep(self, line):
         """Handle all tokens that are not footnote tokens"""
         if self.__token_info == 'cw<nt<footnote__':
@@ -91,6 +97,7 @@ class Footnote:
             self.__write_obj.write(
                 'tx<nu<__________<%s\n' % num
             )
+
     def __initiate_sep_values(self):
         """
         initiate counters for separate_footnotes method.
@@ -102,6 +109,7 @@ class Footnote:
         self.__in_footnote = 0
         self.__first_line = 0 #have not processed the first line of footnote
         self.__footnote_count = 0
+
     def separate_footnotes(self):
         """
         Separate all the footnotes in an RTF file and put them at the bottom,
@@ -111,58 +119,50 @@ class Footnote:
         bottom of the main file.
         """
         self.__initiate_sep_values()
-        read_obj = open(self.__file)
-        self.__write_obj = open(self.__write_to, 'w')
         self.__footnote_holder = tempfile.mktemp()
-        self.__write_to_foot_obj = open(self.__footnote_holder, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            # keep track of opening and closing brackets
-            if self.__token_info == 'ob<nu<open-brack':
-                self.__ob_count = line[-5:-1]
-            if self.__token_info == 'cb<nu<clos-brack':
-                self.__cb_count = line[-5:-1]
-            # In the middle of footnote text
-            if self.__in_footnote:
-                self.__in_footnote_func(line)
-            # not in the middle of footnote text
-            else:
-                self.__default_sep(line)
-        self.__write_obj.close()
-        read_obj.close()
-        self.__write_to_foot_obj.close()
-        read_obj = open(self.__footnote_holder, 'r')
-        write_obj = open(self.__write_to, 'a')
-        write_obj.write(
-        'mi<mk<sect-close\n'
-        'mi<mk<body-close\n'
-        'mi<tg<close_____<section\n'
-        'mi<tg<close_____<body\n'
-        'mi<tg<close_____<doc\n'
-        'mi<mk<footnt-beg\n')
-        line = 1
-        while line:
-            line = read_obj.readline()
-            write_obj.write(line)
-        write_obj.write(
-        'mi<mk<footnt-end\n')
-        read_obj.close()
-        write_obj.close()
+        with open(self.__file) as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
+                    for line in read_obj:
+                        self.__token_info = line[:16]
+                        # keep track of opening and closing brackets
+                        if self.__token_info == 'ob<nu<open-brack':
+                            self.__ob_count = line[-5:-1]
+                        if self.__token_info == 'cb<nu<clos-brack':
+                            self.__cb_count = line[-5:-1]
+                        # In the middle of footnote text
+                        if self.__in_footnote:
+                            self.__in_footnote_func(line)
+                        # not in the middle of footnote text
+                        else:
+                            self.__default_sep(line)
+        with open(self.__footnote_holder, 'r') as read_obj:
+            with open(self.__write_to, 'a') as write_obj:
+                write_obj.write(
+                    'mi<mk<sect-close\n'
+                    'mi<mk<body-close\n'
+                    'mi<tg<close_____<section\n'
+                    'mi<tg<close_____<body\n'
+                    'mi<tg<close_____<doc\n'
+                    'mi<mk<footnt-beg\n')
+                for line in read_obj:
+                    write_obj.write(line)
+                write_obj.write(
+                'mi<mk<footnt-end\n')
         os.remove(self.__footnote_holder)
         copy_obj = copy.Copy(bug_handler = self.__bug_handler)
         if self.__copy:
             copy_obj.copy_file(self.__write_to, "footnote_separate.data")
         copy_obj.rename(self.__write_to, self.__file)
         os.remove(self.__write_to)
+
     def update_info(self, file, copy):
         """
         Unused method
         """
         self.__file = file
         self.__copy = copy
+
     def __get_foot_body_func(self, line):
         """
         Process lines in main body and look for beginning of footnotes.
@@ -172,6 +172,7 @@ class Footnote:
             self.__state = 'foot'
         else:
             self.__write_obj.write(line)
+
     def __get_foot_foot_func(self, line):
         """
         Copy footnotes from bottom of file to a separate, temporary file.
@@ -180,6 +181,7 @@ class Footnote:
             self.__state = 'body'
         else:
             self.__write_to_foot_obj.write(line)
+
     def __get_footnotes(self):
         """
         Private method to remove footnotes from main file.  Read one line from
@@ -188,21 +190,16 @@ class Footnote:
         These two functions do the work of separating the footnotes form the
         body.
         """
-        read_obj = open(self.__file)
-        self.__write_obj = open(self.__write_to, 'w')
-            # self.__write_to = "footnote_info.data"
-        self.__write_to_foot_obj = open(self.__footnote_holder, 'w')
-        line = 1
-        while line:
-            line = read_obj.readline()
-            self.__token_info = line[:16]
-            if self.__state == 'body':
-                self.__get_foot_body_func(line)
-            elif self.__state == 'foot':
-                self.__get_foot_foot_func(line)
-        read_obj.close()
-        self.__write_obj.close()
-        self.__write_to_foot_obj.close()
+        with open(self.__file) as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
+                    for line in read_obj:
+                        self.__token_info = line[:16]
+                        if self.__state == 'body':
+                            self.__get_foot_body_func(line)
+                        elif self.__state == 'foot':
+                            self.__get_foot_foot_func(line)
+
     def __get_foot_from_temp(self, num):
         """
         Private method for joining footnotes to body. This method reads from
@@ -213,9 +210,7 @@ class Footnote:
         look_for = 'mi<mk<footnt-ope<' + num + '\n'
         found_foot = 0
         string_to_return = ''
-        line = 1
-        while line:
-            line = self.__read_from_foot_obj.readline()
+        for line in self.__read_from_foot_obj:
             if found_foot:
                 if line == 'mi<mk<footnt-clo\n':
                     return string_to_return
@@ -223,6 +218,7 @@ class Footnote:
             else:
                 if line == look_for:
                     found_foot = 1
+
     def __join_from_temp(self):
         """
         Private method for rejoining footnotes to body.  Read from the
@@ -232,16 +228,14 @@ class Footnote:
         print out to the third file.
         If no footnote marker is found, simply print out the token (line).
         """
-        self.__read_from_foot_obj = open(self.__footnote_holder, 'r')
-        read_obj = open(self.__write_to, 'r')
-        self.__write_obj = open(self.__write_to2, 'w')
-        line = 1
-        while line:
-            line = read_obj.readline()
-            if line[:16] == 'mi<mk<footnt-ind':
-                line = self.__get_foot_from_temp(line[17:-1])
-            self.__write_obj.write(line)
-        read_obj.close()
+        with open(self.__footnote_holder, 'r') as self.__read_from_foot_obj:
+            with open(self.__write_to, 'r') as read_obj:
+                with open(self.__write_to2, 'w') as self.__write_obj:
+                    for line in read_obj:
+                        if line[:16] == 'mi<mk<footnt-ind':
+                            line = self.__get_foot_from_temp(line[17:-1])
+                        self.__write_obj.write(line)
+
     def join_footnotes(self):
         """
         Join the footnotes from the bottom of the file and put them in their
@@ -258,8 +252,8 @@ class Footnote:
         self.__state = 'body'
         self.__get_footnotes()
         self.__join_from_temp()
-        self.__write_obj.close()
-        self.__read_from_foot_obj.close()
+        # self.__write_obj.close()
+        # self.__read_from_foot_obj.close()
         copy_obj = copy.Copy(bug_handler = self.__bug_handler)
         if self.__copy:
             copy_obj.copy_file(self.__write_to2, "footnote_joined.data")
diff --git a/src/calibre/ebooks/rtf2xml/get_char_map.py b/src/calibre/ebooks/rtf2xml/get_char_map.py
index db307b19d6..fb3ef28b4f 100755
--- a/src/calibre/ebooks/rtf2xml/get_char_map.py
+++ b/src/calibre/ebooks/rtf2xml/get_char_map.py
@@ -43,27 +43,28 @@ class GetCharMap:
     def get_char_map(self, map):
         if map == 'ansicpg0':
             map = 'ansicpg1250'
-        found_map = 0
+        if map in ('ansicpg10000', '10000'):
+            map = 'mac_roman'
+        found_map = False
         map_dict = {}
         self.__char_file.seek(0)
-        for line in self.__char_file.readlines():
+        for line in self.__char_file:
             if not line.strip(): continue
             begin_element = '<%s>' % map;
             end_element = '</%s>' % map
             if not found_map:
                 if begin_element in line:
-                    found_map = 1
+                    found_map = True
             else:
                 if end_element in line:
                     break
                 fields = line.split(':')
                 fields[1].replace('\\colon', ':')
                 map_dict[fields[1]] = fields[3]
-            
-        
+
+
         if not found_map:
-            msg = 'no map found\n'
-            msg += 'map is "%s"\n'%(map,)
+            msg = 'no map found\nmap is "%s"\n'%(map,)
             raise self.__bug_handler, msg
         return map_dict
 
diff --git a/src/calibre/ebooks/rtf2xml/hex_2_utf8.py b/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
index d67dce30d2..ba85174845 100755
--- a/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
+++ b/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
@@ -54,10 +54,10 @@ class Hex2Utf8:
             'convert_to_caps'--wether to convert caps to utf-8
         Returns:
             nothing
-            """
+        """
         self.__file = in_file
         self.__copy = copy
-        if area_to_convert != 'preamble' and area_to_convert != 'body':
+        if area_to_convert not in ('preamble', 'body'):
             msg = (
             'Developer error! Wrong flag.\n'
             'in module "hex_2_utf8.py\n'
@@ -79,7 +79,8 @@ class Hex2Utf8:
         self.__write_to = tempfile.mktemp()
         self.__bug_handler = bug_handler
         self.__invalid_rtf_handler = invalid_rtf_handler
-    def update_values(  self,
+
+    def update_values(self,
                         file,
                         area_to_convert,
                         char_file,
@@ -132,6 +133,7 @@ class Hex2Utf8:
         # self.__convert_symbol = 0
         # self.__convert_wingdings = 0
         # self.__convert_zapf = 0
+
     def __initiate_values(self):
         """
         Required:
@@ -191,6 +193,7 @@ class Hex2Utf8:
             'body'          :       self.__body_func,
             'mi<mk<body-open_'  :   self.__found_body_func,
             'tx<hx<__________'  :   self.__hex_text_func,
+            # 'tx<nu<__________'  :   self.__text_func,
             }
         self.__body_state_dict = {
             'preamble'      :       self.__preamble_for_body_func,
@@ -209,6 +212,7 @@ class Hex2Utf8:
         }
         self.__caps_list = ['false']
         self.__font_list = ['not-defined']
+
     def __hex_text_func(self, line):
         """
         Required:
@@ -218,12 +222,12 @@ class Hex2Utf8:
             token is in the dictionary, then check if the value starts with a
             "&". If it does, then tag the result as utf text. Otherwise, tag it
             as normal text.
-            If the nex_num is not in the dictionary, then a mistake has been
+            If the hex_num is not in the dictionary, then a mistake has been
             made.
             """
         hex_num = line[17:-1]
         converted = self.__current_dict.get(hex_num)
-        if converted != None:
+        if converted is not None:
             # tag as utf-8
             if converted[0:1] == "&":
                 font = self.__current_dict_name
@@ -263,42 +267,43 @@ class Hex2Utf8:
                     # msg += 'dictionary is %s\n' % self.__current_dict_name
                     msg = 'Character "&#x%s;" does not appear to be valid (or is a control character)\n' % token
                     raise self.__bug_handler, msg
+
     def __found_body_func(self, line):
         self.__state = 'body'
         self.__write_obj.write(line)
+
     def __body_func(self, line):
         """
         When parsing preamble
         """
         self.__write_obj.write(line)
+
     def __preamble_func(self, line):
         action = self.__preamble_state_dict.get(self.__token_info)
-        if action != None:
+        if action is not None:
             action(line)
         else:
             self.__write_obj.write(line)
+
     def __convert_preamble(self):
         self.__state = 'preamble'
-        read_obj = open(self.__file, 'r')
         self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            action = self.__preamble_state_dict.get(self.__state)
-            if action == None:
-                sys.stderr.write('error no state found in hex_2_utf8',
-                self.__state
-                )
-            action(line)
-        read_obj.close()
+        with open(self.__file, 'r') as read_obj:
+           for line in read_obj:
+                self.__token_info = line[:16]
+                action = self.__preamble_state_dict.get(self.__state)
+                if action is None:
+                    sys.stderr.write(_('error no state found in hex_2_utf8'),
+                    self.__state
+                    )
+                action(line)
         self.__write_obj.close()
         copy_obj = copy.Copy(bug_handler = self.__bug_handler)
         if self.__copy:
             copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data")
         copy_obj.rename(self.__write_to, self.__file)
         os.remove(self.__write_to)
+
     def __preamble_for_body_func(self, line):
         """
         Required:
@@ -311,6 +316,7 @@ class Hex2Utf8:
         if self.__token_info == 'mi<mk<body-open_':
             self.__found_body_func(line)
         self.__write_obj.write(line)
+
     def __body_for_body_func(self, line):
         """
         Required:
@@ -321,10 +327,11 @@ class Hex2Utf8:
             Used when parsing the body.
         """
         action = self.__in_body_dict.get(self.__token_info)
-        if action != None:
+        if action is not None:
             action(line)
         else:
             self.__write_obj.write(line)
+
     def __start_font_func(self, line):
         """
         Required:
@@ -348,6 +355,7 @@ class Hex2Utf8:
         else:
             self.__current_dict_name = 'default'
             self.__current_dict = self.__def_dict
+
     def __end_font_func(self, line):
         """
         Required:
@@ -376,6 +384,7 @@ class Hex2Utf8:
         else:
             self.__current_dict_name = 'default'
             self.__current_dict = self.__def_dict
+
     def __start_special_font_func_old(self, line):
         """
         Required:
@@ -398,6 +407,7 @@ class Hex2Utf8:
             self.__current_dict.append(self.__dingbats_dict)
             self.__special_fonts_found += 1
             self.__current_dict_name = 'Zapf Dingbats'
+
     def __end_special_font_func(self, line):
         """
         Required:
@@ -416,6 +426,7 @@ class Hex2Utf8:
             self.__current_dict.pop()
             self.__special_fonts_found -= 1
             self.__dict_name = 'default'
+
     def __start_caps_func_old(self, line):
         """
         Required:
@@ -427,6 +438,7 @@ class Hex2Utf8:
             self.__in_caps to 1
         """
         self.__in_caps = 1
+
     def __start_caps_func(self, line):
         """
         Required:
@@ -440,6 +452,7 @@ class Hex2Utf8:
         self.__in_caps = 1
         value = line[17:-1]
         self.__caps_list.append(value)
+
     def __end_caps_func(self, line):
         """
         Required:
@@ -455,7 +468,8 @@ class Hex2Utf8:
         else:
             sys.stderr.write('Module is hex_2_utf8\n')
             sys.stderr.write('method is __end_caps_func\n')
-            sys.stderr.write('caps list should be more than one?\n')
+            sys.stderr.write('caps list should be more than one?\n') #self.__in_caps not set
+
     def __text_func(self, line):
         """
         Required:
@@ -466,9 +480,8 @@ class Hex2Utf8:
             if in caps, convert. Otherwise, print out.
         """
         text = line[17:-1]
-        if self.__current_dict_name == 'Symbol'\
-          or self.__current_dict_name == 'Wingdings'\
-          or self.__current_dict_name == 'Zapf Dingbats':
+        # print line
+        if self.__current_dict_name in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
             the_string = ''
             for letter in text:
                 hex_num = hex(ord(letter))
@@ -477,21 +490,21 @@ class Hex2Utf8:
                 hex_num = hex_num[2:]
                 hex_num = '\'%s' % hex_num
                 converted = self.__current_dict.get(hex_num)
-                if converted == None:
+                if converted is None:
                     sys.stderr.write('module is hex_2_ut8\n')
                     sys.stderr.write('method is __text_func\n')
                     sys.stderr.write('no hex value for "%s"\n' % hex_num)
                 else:
                     the_string += converted
             self.__write_obj.write('tx<nu<__________<%s\n' % the_string)
+            # print the_string
         else:
             if self.__caps_list[-1] == 'true' \
                 and self.__convert_caps\
-                and self.__current_dict_name != 'Symbol'\
-                and self.__current_dict_name != 'Wingdings'\
-                and self.__current_dict_name != 'Zapf Dingbats':
+                and self.__current_dict_name not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
                 text = text.upper()
             self.__write_obj.write('tx<nu<__________<%s\n' % text)
+
     def __utf_to_caps_func(self, line):
         """
         Required:
@@ -506,6 +519,7 @@ class Hex2Utf8:
             # utf_text = utf_text.upper()
             utf_text = self.__utf_token_to_caps_func(utf_text)
         self.__write_obj.write('tx<ut<__________<%s\n' % utf_text)
+
     def __utf_token_to_caps_func(self, char_entity):
         """
         Required:
@@ -530,28 +544,26 @@ class Hex2Utf8:
             return char_entity
         else:
             return converted
+
     def __convert_body(self):
         self.__state = 'body'
-        read_obj = open(self.__file, 'r')
-        self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            action = self.__body_state_dict.get(self.__state)
-            if action == None:
-                sys.stderr.write('error no state found in hex_2_utf8',
-                self.__state
-                )
-            action(line)
-        read_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            self.__write_obj = open(self.__write_to, 'w')
+            for line in read_obj:
+                self.__token_info = line[:16]
+                action = self.__body_state_dict.get(self.__state)
+                if action is None:
+                    sys.stderr.write('error no state found in hex_2_utf8',
+                    self.__state
+                    )
+                action(line)
         self.__write_obj.close()
         copy_obj = copy.Copy(bug_handler = self.__bug_handler)
         if self.__copy:
             copy_obj.copy_file(self.__write_to, "body_utf_convert.data")
         copy_obj.rename(self.__write_to, self.__file)
         os.remove(self.__write_to)
+
     def convert_hex_2_utf8(self):
         self.__initiate_values()
         if self.__area_to_convert == 'preamble':
diff --git a/src/calibre/ebooks/rtf2xml/inline.py b/src/calibre/ebooks/rtf2xml/inline.py
index 5ca1cd0783..7eda0ce429 100755
--- a/src/calibre/ebooks/rtf2xml/inline.py
+++ b/src/calibre/ebooks/rtf2xml/inline.py
@@ -1,5 +1,7 @@
 import sys, os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+
 """
 States.
 1. default
@@ -36,6 +38,7 @@ class Inline:
         self.__copy = copy
         self.__run_level = run_level
         self.__write_to = tempfile.mktemp()
+
     def __initiate_values(self):
         """
         Initiate all values.
@@ -51,7 +54,6 @@ class Inline:
             'tx<ut<__________'  :       self.__found_text_func,
             'mi<mk<inline-fld'  :       self.__found_text_func,
             'text'              :       self.__found_text_func,
-            'cw<nu<hard-lineb'  :       self.__found_text_func, #calibre
             'cb<nu<clos-brack'  :       self.__close_bracket_func,
             'mi<mk<par-end___'  :       self.__end_para_func,
             'mi<mk<footnt-ope'  :       self.__end_para_func,
@@ -63,7 +65,6 @@ class Inline:
             'tx<hx<__________'  :       self.__found_text_func,
             'tx<ut<__________'  :       self.__found_text_func,
             'text'              :       self.__found_text_func,
-            'cw<nu<hard-lineb'  :       self.__found_text_func, #calibre
             'mi<mk<inline-fld'  :       self.__found_text_func,
             'ob<nu<open-brack':         self.__found_open_bracket_func,
             'mi<mk<par-end___'  :       self.__end_para_func,
@@ -83,12 +84,12 @@ class Inline:
         self.__in_para = 0 #  not in paragraph
         self.__char_dict = {
             # character info => ci
-            'annotation'    :       'annotation',
+            'annotation'    :   'annotation',
             'blue______'    :   'blue',
             'bold______'    :   'bold',
-            'caps______'    :       'caps',
-            'char-style'    :       'character-style',
-            'dbl-strike'    :    'double-strike-through',
+            'caps______'    :   'caps',
+            'char-style'    :   'character-style',
+            'dbl-strike'    :   'double-strike-through',
             'emboss____'    :   'emboss',
             'engrave___'    :   'engrave',
             'font-color'    :   'font-color',
@@ -96,7 +97,7 @@ class Inline:
             'font-size_'    :   'font-size',
             'font-style'    :   'font-style',
             'font-up___'    :   'superscript',
-            'footnot-mk'    :       'footnote-marker',
+            'footnot-mk'    :   'footnote-marker',
             'green_____'    :   'green',
             'hidden____'    :   'hidden',
             'italics___'    :   'italics',
@@ -107,9 +108,10 @@ class Inline:
             'strike-thr'    :   'strike-through',
             'subscript_'    :   'subscript',
             'superscrip'    :   'superscript',
-            'underlined'    :       'underlined',
+            'underlined'    :   'underlined',
         }
         self.__caps_list = ['false']
+
     def __set_list_func(self, line):
         """
         Requires:
@@ -128,6 +130,7 @@ class Inline:
                 self.__place = 'in_list'
                 self.__inline_list = self.__list_inline_list
                 self.__groups_in_waiting = self.__groups_in_waiting_list
+
     def __default_func(self, line):
         """
         Requires:
@@ -140,8 +143,8 @@ class Inline:
         action = self.__default_dict.get(self.__token_info)
         if action:
             action(line)
-        if self.__token_info != 'cw<nu<hard-lineb': #calibre
-            self.__write_obj.write(line)
+        self.__write_obj.write(line)
+
     def __found_open_bracket_func(self, line):
         """
         Requires:
@@ -156,6 +159,7 @@ class Inline:
         self.__groups_in_waiting[0] += 1
         self.__inline_list.append({})
         self.__inline_list[-1]['contains_inline'] = 0
+
     def __after_open_bracket_func(self, line):
         """
         Requires:
@@ -176,6 +180,7 @@ class Inline:
                 self.__state = 'default' #  a non control word?
                 action(line)
         self.__write_obj.write(line)
+
     def __handle_control_word(self, line):
         """
         Required:
@@ -206,6 +211,7 @@ class Inline:
                 elif char_value == 'Zapf Dingbats':
                     self.__write_obj.write('mi<mk<font-dingb\n')
             """
+
     def __close_bracket_func(self, line):
         """
         Requires:
@@ -244,6 +250,7 @@ class Inline:
         self.__inline_list.pop()
         if self.__groups_in_waiting[0] != 0:
             self.__groups_in_waiting[0] -= 1
+
     def __found_text_func(self, line):
         """
         Required:
@@ -257,7 +264,6 @@ class Inline:
                 Text can mark the start of a paragraph.
                 If already in a paragraph, check to see if any groups are waiting
                 to be added. If so, use another method to write these groups.
-            3. If not check if hardline break, then write
         """
         if self.__place == 'in_list':
             self.__write_inline()
@@ -265,12 +271,9 @@ class Inline:
             if not self.__in_para:
                 self.__in_para = 1
                 self.__start_para_func(line)
-            else:
-                if self.__token_info == 'cw<nu<hard-lineb': #calibre
-                    self.__write_obj.write('mi<tg<empty_____<hardline-break\n')
-                if self.__groups_in_waiting[0] != 0:
+            elif self.__groups_in_waiting[0] != 0:
                     self.__write_inline()
-                
+
     def __write_inline(self):
         """
         Required:
@@ -314,6 +317,7 @@ class Inline:
                             self.__write_obj.write('<%s>%s' % (the_key, the_dict[the_key]))
                     self.__write_obj.write('\n')
         self.__groups_in_waiting[0] = 0
+
     def __end_para_func(self, line):
         """
         Requires:
@@ -342,6 +346,7 @@ class Inline:
                     self.__write_obj.write('mi<mk<caps-end__\n')
                 self.__write_obj.write('mi<tg<close_____<inline\n')
         self.__in_para = 0
+
     def __start_para_func(self, line):
         """
         Requires:
@@ -369,12 +374,14 @@ class Inline:
                         self.__write_obj.write('<%s>%s' % (the_key, the_dict[the_key]))
                 self.__write_obj.write('\n')
         self.__groups_in_waiting[0] = 0
+
     def __found_field_func(self, line):
         """
         Just a default function to make sure I don't prematurely exit
         default state
         """
         pass
+
     def form_tags(self):
         """
         Requires:
@@ -386,32 +393,27 @@ class Inline:
             the state.
         """
         self.__initiate_values()
-        read_obj = open(self.__file, 'r')
-        self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            token = line[0:-1]
-            self.__token_info = ''
-            if token == 'tx<mc<__________<rdblquote'\
-                or token == 'tx<mc<__________<ldblquote'\
-                or token == 'tx<mc<__________<lquote'\
-                or token == 'tx<mc<__________<rquote'\
-                or token == 'tx<mc<__________<emdash'\
-                or token == 'tx<mc<__________<endash'\
-                or token == 'tx<mc<__________<bullet':
-                self.__token_info = 'text'
-            else:
-                self.__token_info = line[:16]
-            self.__set_list_func(line)
-            action = self.__state_dict.get(self.__state)
-            if action == None:
-                sys.stderr.write('No matching state in module inline_for_lists.py\n')
-                sys.stderr.write(self.__state + '\n')
-            action(line)
-        read_obj.close()
-        self.__write_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                for line in read_obj:
+                    token = line[0:-1]
+                    self.__token_info = ''
+                    if token == 'tx<mc<__________<rdblquote'\
+                        or token == 'tx<mc<__________<ldblquote'\
+                        or token == 'tx<mc<__________<lquote'\
+                        or token == 'tx<mc<__________<rquote'\
+                        or token == 'tx<mc<__________<emdash'\
+                        or token == 'tx<mc<__________<endash'\
+                        or token == 'tx<mc<__________<bullet':
+                        self.__token_info = 'text'
+                    else:
+                        self.__token_info = line[:16]
+                    self.__set_list_func(line)
+                    action = self.__state_dict.get(self.__state)
+                    if action is None:
+                        sys.stderr.write('No matching state in module inline_for_lists.py\n')
+                        sys.stderr.write(self.__state + '\n')
+                    action(line)
         copy_obj = copy.Copy(bug_handler = self.__bug_handler)
         if self.__copy:
             copy_obj.copy_file(self.__write_to, "inline.data")
diff --git a/src/calibre/ebooks/rtf2xml/line_endings.py b/src/calibre/ebooks/rtf2xml/line_endings.py
index 543ae5dd83..dfc482d981 100755
--- a/src/calibre/ebooks/rtf2xml/line_endings.py
+++ b/src/calibre/ebooks/rtf2xml/line_endings.py
@@ -15,8 +15,11 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import os, tempfile, re
+import os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+from calibre.utils.cleantext import clean_ascii_chars
+
 class FixLineEndings:
     """Fix line endings"""
     def __init__(self,
@@ -32,36 +35,23 @@ class FixLineEndings:
         self.__run_level = run_level
         self.__write_to = tempfile.mktemp()
         self.__replace_illegals = replace_illegals
+
     def fix_endings(self):
-        ##tempFileName = tempfile.mktemp()
-        illegal_regx = re.compile( '\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0B|\x0E|\x0F|\x10|\x11|\x12|\x13')
-        #nums = [0, 1, 2, 3, 4, 5, 6, 7, 8,  11,  14, 15, 16, 17, 18, 19]
-        """
-read_obj = open(self.__file, 'r')
-line = read_obj.read(1000)
-regexp = re.compile(r"\r")
-macintosh = regexp.search(line)
-read_obj.close()
-        """
-        # always check since I have to get rid of illegal characters
-        macintosh = 1
-        if macintosh:
-            line = 1
-            read_obj = open(self.__file, 'r')
-            write_obj = open(self.__write_to, 'w')
-            while line:
-                line = read_obj.read(1000)
-                # line = re.sub(regexp,"\n",line)
-                line = line.replace ('\r', '\n')
-                if self.__replace_illegals:
-                    line = re.sub(illegal_regx, '', line)
-                    # for num in nums:
-                        # line = line.replace(chr(num), '')
-                write_obj.write(line )
-            read_obj.close()
-            write_obj.close()
-            copy_obj = copy.Copy(bug_handler = self.__bug_handler)
-            if self.__copy:
-                copy_obj.copy_file(self.__write_to, "line_endings.data")
-            copy_obj.rename(self.__write_to, self.__file)
-            os.remove(self.__write_to)
+        #read
+        with open(self.__file, 'r') as read_obj:
+            input_file = read_obj.read()
+        #calibre go from win and mac to unix
+        input_file = input_file.replace ('\r\n', '\n')
+        input_file = input_file.replace ('\r', '\n')
+        #remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27
+        if self.__replace_illegals:
+            input_file = clean_ascii_chars(input_file)
+        #write
+        with open(self.__write_to, 'wb') as write_obj:
+            write_obj.write(input_file)
+        #copy
+        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
+        if self.__copy:
+            copy_obj.copy_file(self.__write_to, "line_endings.data")
+        copy_obj.rename(self.__write_to, self.__file)
+        os.remove(self.__write_to)
diff --git a/src/calibre/ebooks/rtf2xml/pict.py b/src/calibre/ebooks/rtf2xml/pict.py
index 3a1044520e..c8a2e7e84a 100755
--- a/src/calibre/ebooks/rtf2xml/pict.py
+++ b/src/calibre/ebooks/rtf2xml/pict.py
@@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import sys, os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+
 class Pict:
     """Process graphic information"""
     def __init__(self,
@@ -36,13 +38,11 @@ class Pict:
         self.__ob_count = 0
         self.__cb_count = 0
         self.__pict_count = 0
-        self.__in_pict = 0
-        self.__already_found_pict = 0
+        self.__in_pict = False
+        self.__already_found_pict = False
         self.__orig_file = orig_file
         self.__initiate_pict_dict()
         self.__out_file = out_file
-        # this is left over
-        self.__no_ask = 1
 
     def __initiate_pict_dict(self):
         self.__pict_dict = {
@@ -71,57 +71,43 @@ class Pict:
                 self.__out_file))
         else:
             dir_name = os.path.dirname(self.__orig_file)
-        # self.__output_to_file_func()
         self.__dir_name = base_name + "_rtf_pict_dir/"
         self.__dir_name = os.path.join(dir_name, self.__dir_name)
         if not os.path.isdir(self.__dir_name):
             try:
                 os.mkdir(self.__dir_name)
             except OSError, msg:
-                msg = str(msg)
-                msg += "Couldn't make directory '%s':\n" % (self.__dir_name)
+                msg = "%sCouldn't make directory '%s':\n" % (str(msg), self.__dir_name)
                 raise self.__bug_handler
         else:
-            if self.__no_ask:
-                user_response = 'r'
-            else:
-                msg = 'Do you want to remove all files in %s?\n' % self.__dir_name
-                msg += 'Type "r" to remove.\n'
-                msg +=  'Type any other key to keep files in place.\n'
-                sys.stderr.write(msg)
-                user_response = raw_input()
-            if user_response == 'r':
-                if self.__run_level > 1:
-                    sys.stderr.write('Removing files from old pict directory...\n')
-                all_files = os.listdir(self.__dir_name)
-                for the_file in all_files:
-                    the_file = os.path.join(self.__dir_name, the_file)
-                    try:
-                        os.remove(the_file)
-                    except OSError:
-                        pass
-                if self.__run_level > 1:
-                    sys.stderr.write('Files removed.\n')
+            if self.__run_level > 1:
+                sys.stderr.write('Removing files from old pict directory...\n')
+            all_files = os.listdir(self.__dir_name)
+            for the_file in all_files:
+                the_file = os.path.join(self.__dir_name, the_file)
+                try:
+                    os.remove(the_file)
+                except OSError:
+                    pass
+            if self.__run_level > 1:
+                sys.stderr.write('Files removed.\n')
 
     def __create_pict_file(self):
         """Create a file for all the pict data to be written to.
         """
         self.__pict_file = os.path.join(self.__dir_name, 'picts.rtf')
-        write_pic_obj = open(self.__pict_file, 'w')
-        write_pic_obj.close()
         self.__write_pic_obj = open(self.__pict_file, 'a')
 
     def __in_pict_func(self, line):
         if self.__cb_count == self.__pict_br_count:
-            self.__in_pict = 0
+            self.__in_pict = False
             self.__write_pic_obj.write("}\n")
-            return 1
+            return True
         else:
             action = self.__pict_dict.get(self.__token_info)
             if action:
-                line = action(line)
-                self.__write_pic_obj.write(line)
-            return 0
+                self.__write_pic_obj.write(action(line))
+            return False
 
     def __default(self, line, write_obj):
         """Determine if each token marks the beginning of pict data.
@@ -142,53 +128,50 @@ class Pict:
             write_obj.write('mi<mk<pict-end__\n')
             if not self.__already_found_pict:
                 self.__create_pict_file()
-                self.__already_found_pict=1;
+                self.__already_found_pict=True;
                 self.__print_rtf_header()
             self.__in_pict = 1
             self.__pict_br_count = self.__ob_count
             self.__cb_count = 0
             self.__write_pic_obj.write("{\\pict\n")
-            return 0
-        return 1
+            return False
+        return True
 
     def __print_rtf_header(self):
         """Print to pict file the necessary RTF data for the file to be
         recognized as an RTF file.
         """
-        self.__write_pic_obj.write("{\\rtf1 \n")
-        self.__write_pic_obj.write("{\\fonttbl\\f0\\null;} \n")
-        self.__write_pic_obj.write("{\\colortbl\\red255\\green255\\blue255;} \n")
-        self.__write_pic_obj.write("\\pard \n")
+        self.__write_pic_obj.write("{\\rtf1 \n{\\fonttbl\\f0\\null;} \n")
+        self.__write_pic_obj.write("{\\colortbl\\red255\\green255\\blue255;} \n\\pard \n")
 
     def process_pict(self):
         self.__make_dir()
-        read_obj = open(self.__file)
-        write_obj = open(self.__write_to, 'w')
-        line_to_read = 'dummy'
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            if self.__token_info == 'ob<nu<open-brack':
-                self.__ob_count = line[-5:-1]
-            if self.__token_info == 'cb<nu<clos-brack':
-                self.__cb_count = line[-5:-1]
-            if not self.__in_pict:
-                to_print = self.__default(line, write_obj)
-                if to_print :
-                    write_obj.write(line)
-            else:
-                to_print = self.__in_pict_func(line)
-                if to_print :
-                    write_obj.write(line)
-        if self.__already_found_pict:
-            self.__write_pic_obj.write("}\n")
-            self.__write_pic_obj.close()
-        read_obj.close()
-        write_obj.close()
+        with open(self.__file) as read_obj:
+            with open(self.__write_to, 'w') as write_obj:
+                for line in read_obj:
+                    self.__token_info = line[:16]
+                    if self.__token_info == 'ob<nu<open-brack':
+                        self.__ob_count = line[-5:-1]
+                    if self.__token_info == 'cb<nu<clos-brack':
+                        self.__cb_count = line[-5:-1]
+                    if not self.__in_pict:
+                        to_print = self.__default(line, write_obj)
+                        if to_print :
+                            write_obj.write(line)
+                    else:
+                        to_print = self.__in_pict_func(line)
+                        if to_print :
+                            write_obj.write(line)
+                if self.__already_found_pict:
+                    self.__write_pic_obj.write("}\n")
+                    self.__write_pic_obj.close()
         copy_obj = copy.Copy(bug_handler = self.__bug_handler)
         if self.__copy:
             copy_obj.copy_file(self.__write_to, "pict.data")
+            try:
+                copy_obj.copy_file(self.__pict_file, "pict.rtf")
+            except:
+                pass
         copy_obj.rename(self.__write_to, self.__file)
         os.remove(self.__write_to)
         if self.__pict_count == 0:
diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py
index 19a7d38135..9460af07fc 100755
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@@ -15,8 +15,10 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import os, re,  tempfile
+import os, re, tempfile
+
 from calibre.ebooks.rtf2xml import copy, check_brackets
+
 class ProcessTokens:
     """
     Process each token on a line and add information that will be useful for
@@ -41,14 +43,16 @@ class ProcessTokens:
         self.__bracket_count=0
         self.__exception_handler = exception_handler
         self.__bug_handler = bug_handler
+
     def compile_expressions(self):
         self.__num_exp = re.compile(r"([a-zA-Z]+)(.*)")
         self.__utf_exp = re.compile(r'(&.*?;)')
+
     def initiate_token_dict(self):
         self.__return_code = 0
         self.dict_token={
         # unicode
-        'mshex'             :   ('nu', '__________', self.__ms_hex_func),
+        'mshex'              :  ('nu', '__________', self.__ms_hex_func),
         # brackets
         '{'                  :	('nu', '{', self.ob_func),
         '}'                  :	('nu', '}', self.cb_func),
@@ -66,6 +70,7 @@ class ProcessTokens:
         ';'                  :	('mc', ';', self.ms_sub_func),
         # this must be wrong
         '-'                  :	('mc', '-', self.ms_sub_func),
+        'line'               :  ('mi', 'hardline-break', self.hardline_func), #calibre
         # misc => ml
         '*'                  :	('ml', 'asterisk__', self.default_func),
         ':'                  :	('ml', 'colon_____', self.default_func),
@@ -73,7 +78,6 @@ class ProcessTokens:
         'backslash'          :	('nu', '\\', self.text_func),
         'ob'                 :	('nu', '{', self.text_func),
         'cb'                 :	('nu', '}', self.text_func),
-        'line'               :  ('nu', 'hard-lineb', self.default_func), #calibre
         #'line'               :  ('nu', ' ', self.text_func), calibre
         # paragraph formatting => pf
         'page'               :  ('pf', 'page-break', self.default_func),
@@ -159,15 +163,17 @@ class ProcessTokens:
         'rtf'                :	('ri', 'rtf_______', self.default_func),
         'deff'               :	('ri', 'deflt-font', self.default_func),
         'mac'                :	('ri', 'macintosh_', self.default_func),
+        'pc'                 :	('ri', 'pc________', self.default_func),
+        'pca'                :	('ri', 'pca_______', self.default_func),
         'ansi'               :	('ri', 'ansi______', self.default_func),
         'ansicpg'            :	('ri', 'ansi-codpg', self.default_func),
         # notes => nt
         'footnote'           :	('nt', 'footnote__', self.default_func),
         'ftnalt'             :	('nt', 'type______<endnote', self.two_part_func),
         # anchor => an
-        'tc'                :	('an', 'toc_______', self.default_func),
+        'tc'                 :	('an', 'toc_______', self.default_func),
         'bkmkstt'            :	('an', 'book-mk-st', self.default_func),
-        'bkmkstart'         :	('an', 'book-mk-st', self.default_func),
+        'bkmkstart'          :	('an', 'book-mk-st', self.default_func),
         'bkmkend'            :	('an', 'book-mk-en', self.default_func),
         'xe'                 :	('an', 'index-mark', self.default_func),
         'rxe'                :	('an', 'place_____', self.default_func),
@@ -347,7 +353,7 @@ class ProcessTokens:
             10:     'Kanji numbering without the digit character',
             11:     'Kanji numbering with the digit character',
             1246:   'phonetic Katakana characters in aiueo order',
-            1346:    'phonetic katakana characters in iroha order',
+            1346:   'phonetic katakana characters in iroha order',
             14:     'double byte character',
             15:     'single byte character',
             16:     'Kanji numbering 3',
@@ -392,7 +398,7 @@ class ProcessTokens:
             5121 	:  'Arabic Algeria',
             15361 	:  'Arabic Bahrain',
             3073 	:  'Arabic Egypt',
-            1 	        :   'Arabic General',
+            1 	    :   'Arabic General',
             2049 	:  'Arabic Iraq',
             11265 	:  'Arabic Jordan',
             13313 	:  'Arabic Kuwait',
@@ -417,7 +423,7 @@ class ProcessTokens:
             1059 	:  'Byelorussian',
             1027 	:  'Catalan',
             2052 	:  'Chinese China',
-            4 	        :  'Chinese General',
+            4 	    :  'Chinese General',
             3076 	:  'Chinese Hong Kong',
             4100 	:  'Chinese Singapore',
             1028 	:  'Chinese Taiwan',
@@ -431,7 +437,7 @@ class ProcessTokens:
             2057 	:  'English British',
             4105 	:  'English Canada',
             9225 	:  'English Caribbean',
-            9 	        :  'English General',
+            9 	    :  'English General',
             6153 	:  'English Ireland',
             8201 	:  'English Jamaica',
             5129 	:  'English New Zealand',
@@ -595,30 +601,37 @@ class ProcessTokens:
         num = num[1:] # chop off leading 0, which I added
         num = num.upper() # the mappings store hex in caps
         return 'tx<hx<__________<\'%s\n' % num # add an ' for the mappings
+
     def ms_sub_func(self, pre, token, num):
         return 'tx<mc<__________<%s\n' % token
+
+    def hardline_func(self, pre, token, num):
+        return 'mi<tg<empty_____<%s\n' % token
+
     def default_func(self, pre, token, num):
-        if num == None:
+        if num is None:
             num = 'true'
         return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
+
     def __list_type_func(self, pre, token, num):
         type = 'arabic'
-        if num == None:
+        if num is None:
             type = 'Arabic'
         else:
             try:
                 num = int(num)
             except ValueError:
                 if self.__run_level > 3:
-                    msg = 'number "%s" cannot be converted to integer\n' % num
+                    msg = 'Number "%s" cannot be converted to integer\n' % num
                     raise self.__bug_handler, msg
             type = self.__number_type_dict.get(num)
-            if type == None:
+            if type is None:
                 if self.__run_level > 3:
                     msg = 'No type for "%s" in self.__number_type_dict\n'
                     raise self.__bug_handler
                 type = 'Arabic'
         return 'cw<%s<%s<nu<%s\n' % (pre, token, type)
+
     def __language_func(self, pre, token, num):
         lang_name = self.__language_dict.get(int(re.search('[0-9]+', num).group()))
         if not lang_name:
@@ -627,31 +640,36 @@ class ProcessTokens:
                 msg = 'No entry for number "%s"' % num
                 raise self.__bug_handler, msg
         return 'cw<%s<%s<nu<%s\n' % (pre, token, lang_name)
+
     def two_part_func(self, pre, token, num):
         list = token.split("<")
         token = list[0]
         num = list[1]
         return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
         ##return 'cw<nu<nu<nu<%s>num<%s\n' % (token, num)
+
     def divide_by_2(self, pre, token, num):
         num = self.divide_num(num, 2)
         return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
         ##return 'cw<nu<nu<nu<%s>%s<%s\n' % (token, num, token)
+
     def divide_by_20(self, pre, token, num):
         num = self.divide_num(num, 20)
         return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
         ##return 'cw<nu<nu<nu<%s>%s<%s\n' % (token, num, token)
+
     def text_func(self, pre, token, num=None):
         return 'tx<nu<__________<%s\n' % token
+
     def ob_func(self, pre, token, num=None):
         self.__bracket_count += 1
-        ##return 'ob<%04d\n' % self.__bracket_count
         return 'ob<nu<open-brack<%04d\n' % self.__bracket_count
+
     def cb_func(self, pre, token, num=None):
-        ##line = 'cb<%04d\n' % self.__bracket_count
         line = 'cb<nu<clos-brack<%04d\n' % self.__bracket_count
         self.__bracket_count -= 1
         return line
+
     def color_func(self, pre, token, num):
         third_field = 'nu'
         if num[-1] == ';':
@@ -662,6 +680,7 @@ class ProcessTokens:
             num = "0" + num
         return 'cw<%s<%s<%s<%s\n' % (pre, token, third_field, num)
         ##return 'cw<cl<%s<nu<nu<%s>%s<%s\n' % (third_field, token, num, token)
+
     def bool_st_func(self, pre, token, num):
         if num is None or num == '' or num == '1':
             return 'cw<%s<%s<nu<true\n' % (pre, token)
@@ -670,24 +689,23 @@ class ProcessTokens:
             return 'cw<%s<%s<nu<false\n' % (pre, token)
                 ##return 'cw<nu<nu<nu<%s>false<%s\n' % (token, token)
         else:
-            msg = 'boolean should have some value module process tokens\n'
-            msg += 'token is ' + token + "\n"
-            msg += "'" + num + "'" + "\n"
+            msg = "boolean should have some value module process tokens\ntoken is %s\n'%s'\n" % (token, num)
             raise self.__bug_handler, msg
+
     def __no_sup_sub_func(self, pre, token, num):
         the_string = 'cw<ci<subscript_<nu<false\n'
         the_string += 'cw<ci<superscrip<nu<false\n'
         return the_string
+
     def divide_num(self, numerator, denominator):
         try:
-            numerator = float(re.search('[0-9.]+', numerator).group())            
+            #calibre why ignore negative number? Wrong in case of \fi
+            numerator = float(re.search('[0-9.\-]+', numerator).group())
         except TypeError, msg:
             if self.__run_level > 3:
-                msg = 'no number to process?\n'
-                msg += 'this indicates that the token '
-                msg += ' \(\\li\) should have a number and does not\n'
-                msg += 'numerator is "%s"\n' % numerator
-                msg += 'denominator is "%s"\n' % denominator
+                msg = ('No number to process?\nthis indicates that the token \(\\li\) \
+                should have a number and does not\nnumerator is \
+                "%s"\ndenominator is "%s"\n') % (numerator, denominator)
                 raise self.__bug_handler, msg
             if 5 > self.__return_code:
                 self.__return_code = 5
@@ -698,9 +716,10 @@ class ProcessTokens:
         if string_num[-2:] == ".0":
             string_num = string_num[:-2]
         return string_num
+
     def split_let_num(self, token):
         match_obj = re.search(self.__num_exp,token)
-        if match_obj != None:
+        if match_obj is not None:
             first = match_obj.group(1)
             second = match_obj.group(2)
             if not second:
@@ -714,6 +733,7 @@ class ProcessTokens:
                 raise self.__bug_handler
             return token, 0
         return first, second
+
     def convert_to_hex(self,number):
         """Convert a string to uppercase hexidecimal"""
         num = int(number)
@@ -722,6 +742,7 @@ class ProcessTokens:
             return hex_num
         except:
             raise self.__bug_handler
+
     def process_cw(self, token):
         """Change the value of the control word by determining what dictionary
         it belongs to"""
@@ -737,89 +758,62 @@ class ProcessTokens:
         pre, token, action = self.dict_token.get(token, (None, None, None))
         if action:
             return action(pre, token, num)
-    # unused function
-    def initiate_token_actions(self):
-        self.action_for_token={
-        '{'     :   self.ob_func,
-        '}'     :   self.cb_func,
-        '\\'    :   self.process_cw,
-        }
-    # unused function
-    def evaluate_token(self,token):
-        """Evaluate tokens. Return a value if the token is not a
-        control word. Otherwise, pass token onto another method
-        for further evaluation."""
-        token, action = self.dict_token.get(token[0:1])
-        if action:
-            line = action(token)
-            return line
-        else :
-            return  'tx<nu<nu<nu<nu<%s\n' % token
+
     def __check_brackets(self, in_file):
         self.__check_brack_obj = check_brackets.CheckBrackets\
             (file = in_file)
         good_br =  self.__check_brack_obj.check_brackets()[0]
         if not good_br:
             return 1
+
     def process_tokens(self):
         """Main method for handling other methods. """
-        first_token = 0
-        second_token = 0
-        read_obj = open(self.__file, 'r')
-        write_obj = open(self.__write_to, 'w')
-        line_to_read = "dummy"
         line_count = 0
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            token = line_to_read
-            token = token.replace("\n","")
-            if not token:
-                continue
-            line_count += 1
-            try:
-                token.decode('us-ascii')
-            except UnicodeError, msg:
-                msg = str(msg)
-                msg += 'Invalid RTF: File not ascii encoded.\n'
-                raise self.__exception_handler, msg
-            if not first_token:
-                if token != '\\{':
-                    msg = 'Invalid RTF: document doesn\'t start with {\n'
-                    raise self.__exception_handler, msg
-                first_token = 1
-            elif first_token and not second_token:
-                if token[0:4] != '\\rtf':
-                    msg ='Invalid RTF: document doesn\'t start with \\rtf \n'
-                    raise self.__exception_handler, msg
-                second_token = 1
-            ##token = self.evaluate_token(token)
-            the_index = token.find('\\ ')
-            if token != None and  the_index > -1:
-                msg ='Invalid RTF: token "\\ " not valid. \n'
-                raise self.__exception_handler, msg
-            elif token[0:1] == "\\":
-                line = self.process_cw(token)
-                if line != None:
-                    write_obj.write(line)
-            else:
-                fields = re.split(self.__utf_exp, token)
-                for field in fields:
-                    if not field:
-                        continue
-                    if field[0:1] == '&':
-                        write_obj.write('tx<ut<__________<%s\n' % field)
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'wb') as write_obj:
+                for line in read_obj:
+                    token = line.replace("\n","")
+                    line_count += 1
+                    if line_count == 1 and token != '\\{':
+                            msg = 'Invalid RTF: document doesn\'t start with {\n'
+                            raise self.__exception_handler, msg
+                    elif line_count == 2 and token[0:4] != '\\rtf':
+                            msg = 'Invalid RTF: document doesn\'t start with \\rtf \n'
+                            raise self.__exception_handler, msg
+
+                    the_index = token.find('\\ ')
+                    if token is not None and  the_index > -1:
+                        msg = 'Invalid RTF: token "\\ " not valid.\n'
+                        raise self.__exception_handler, msg
+                    elif token[:1] == "\\":
+                        try:
+                            token.decode('us-ascii')
+                        except UnicodeError, msg:
+                            msg = 'Invalid RTF: Tokens not ascii encoded.\n%s' % str(msg)
+                            raise self.__exception_handler, msg
+                        line = self.process_cw(token)
+                        if line is not None:
+                            write_obj.write(line)
                     else:
-                        write_obj.write('tx<nu<__________<%s\n' % field)
-        read_obj.close()
-        write_obj.close()
+                        fields = re.split(self.__utf_exp, token)
+                        for field in fields:
+                            if not field:
+                                continue
+                            if field[0:1] == '&':
+                                write_obj.write('tx<ut<__________<%s\n' % field)
+                            else:
+                                write_obj.write('tx<nu<__________<%s\n' % field)
+
         if not line_count:
-            msg ='Invalid RTF: file appears to be empty. \n'
+            msg = 'Invalid RTF: file appears to be empty.\n'
             raise self.__exception_handler, msg
+
         copy_obj = copy.Copy(bug_handler = self.__bug_handler)
         if self.__copy:
             copy_obj.copy_file(self.__write_to, "processed_tokens.data")
         copy_obj.rename(self.__write_to, self.__file)
         os.remove(self.__write_to)
+
         bad_brackets = self.__check_brackets(self.__file)
         if bad_brackets:
             msg = 'Invalid RTF: document does not have matching brackets.\n'
diff --git a/src/calibre/ebooks/rtf2xml/replace_illegals.py b/src/calibre/ebooks/rtf2xml/replace_illegals.py
index 901cdd289d..4b477087d4 100755
--- a/src/calibre/ebooks/rtf2xml/replace_illegals.py
+++ b/src/calibre/ebooks/rtf2xml/replace_illegals.py
@@ -16,7 +16,10 @@
 #                                                                       #
 #########################################################################
 import os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+from calibre.utils.cleantext import clean_ascii_chars
+
 class ReplaceIllegals:
     """
     reaplace illegal lower ascii characters
@@ -30,21 +33,14 @@ class ReplaceIllegals:
         self.__copy = copy
         self.__run_level = run_level
         self.__write_to = tempfile.mktemp()
+
     def replace_illegals(self):
         """
         """
-        nums = [0, 1, 2, 3, 4, 5, 6, 7, 8,  11,  13, 14, 15, 16, 17, 18, 19]
-        read_obj = open(self.__file, 'r')
-        write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            for num in nums:
-                line = line.replace(chr(num), '')
-            write_obj.write(line)
-        read_obj.close()
-        write_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as write_obj:
+                for line in read_obj:
+                    write_obj.write(clean_ascii_chars(line))
         copy_obj = copy.Copy()
         if self.__copy:
             copy_obj.copy_file(self.__write_to, "replace_illegals.data")
diff --git a/src/calibre/ebooks/rtf2xml/tokenize.py b/src/calibre/ebooks/rtf2xml/tokenize.py
index 45887f33e7..de66415f0c 100755
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@@ -16,7 +16,10 @@
 #                                                                       #
 #########################################################################
 import os, re, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+from calibre.utils.mreplace import MReplace
+
 class Tokenize:
     """Tokenize RTF into one line per field. Each line will contain information useful for the rest of the script"""
     def __init__(self,
@@ -28,89 +31,175 @@ class Tokenize:
         self.__file = in_file
         self.__bug_handler = bug_handler
         self.__copy = copy
-        self.__special_tokens = [ '_', '~', "'", '{', '}' ]
         self.__write_to = tempfile.mktemp()
-    def __from_ms_to_utf8(self,match_obj):
-        uni_char = int(match_obj.group(1))
-        if uni_char < 0:
-            uni_char +=  65536
-        return   '&#x' + str('%X' % uni_char) + ';'
-    def __neg_unicode_func(self, match_obj):
-        neg_uni_char = int(match_obj.group(1)) * -1
-        # sys.stderr.write(str( neg_uni_char))
-        uni_char = neg_uni_char + 65536
-        return   '&#x' + str('%X' % uni_char) + ';'
-    def __sub_line_reg(self,line):
-        line = line.replace("\\\\", "\\backslash ")
-        line = line.replace("\\~", "\\~ ")
-        line = line.replace("\\;", "\\; ")
-        line = line.replace("&", "&amp;")
-        line = line.replace("<", "&lt;")
-        line = line.replace(">", "&gt;")
-        line = line.replace("\\~", "\\~ ")
-        line = line.replace("\\_", "\\_ ")
-        line = line.replace("\\:", "\\: ")
-        line = line.replace("\\-", "\\- ")
-        # turn into a generic token to eliminate special
-        # cases and make processing easier
-        line = line.replace("\\{", "\\ob ")
-        # turn into a generic token to eliminate special
-        # cases and make processing easier
-        line = line.replace("\\}", "\\cb ")
-        # put a backslash in front of to eliminate special cases and
-        # make processing easier
-        line = line.replace("{", "\\{")
-        # put a backslash in front of to eliminate special cases and
-        # make processing easier
-        line = line.replace("}", "\\}")
-        line = re.sub(self.__utf_exp, self.__from_ms_to_utf8, line)
-        # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
-        line = re.sub(self.__ms_hex_exp, "\\mshex0\g<1> ", line)
-        ##line = line.replace("\\backslash", "\\\\")
-        # this is for older RTF
-        line = re.sub(self.__par_exp, '\\par ', line)
-        return line
-    def __compile_expressions(self):
-        self.__ms_hex_exp = re.compile(r"\\\'(..)")
-        self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}")
-        self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)")
-        self.__par_exp = re.compile(r'\\$')
-        self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
-        ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
-    def __create_tokens(self):
         self.__compile_expressions()
-        read_obj = open(self.__file, 'r')
-        write_obj = open(self.__write_to, 'w')
-        line_to_read = "dummy"
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            line = line.replace("\n", "")
-            line =  self.__sub_line_reg(line)
-            tokens = re.split(self.__splitexp, line)
-            ##print tokens
-            for token in tokens:
-                if token != "":
-                    write_obj.write(token + "\n")
-                    """
-                    match_obj = re.search(self.__mixed_exp, token)
-                    if match_obj != None:
-                        first = match_obj.group(1)
-                        second = match_obj.group(2)
-                        write_obj.write(first + "\n")
-                        write_obj.write(second + "\n")
-                    else:
-                        write_obj.write(token + "\n")
-                    """
-        read_obj.close()
-        write_obj.close()
+        #variables
+        self.__uc_char = 0
+        self.__uc_bin = False
+        self.__uc_value = [1]
+
+    def __reini_utf8_counters(self):
+        self.__uc_char = 0
+        self.__uc_bin = False
+
+    def __remove_uc_chars(self, startchar, token):
+        for i in xrange(startchar, len(token)):
+            if token[i] == " ":
+                continue
+            elif self.__uc_char:
+                self.__uc_char -= 1
+            else:
+                return token[i:]
+        #if only " " and char to skip
+        return ''
+
+    def __unicode_process(self, token):
+        #change scope in
+        if token == '\{':
+            self.__uc_value.append(self.__uc_value[-1])
+            #basic error handling
+            self.__reini_utf8_counters()
+            return token
+        #change scope out
+        elif token == '\}':
+            self.__uc_value.pop()
+            self.__reini_utf8_counters()
+            return token
+        #add a uc control
+        elif token[:3] == '\uc':
+            self.__uc_value[-1] = int(token[3:])
+            self.__reini_utf8_counters()
+            return token
+        #bin data to slip
+        elif self.__uc_bin:
+            self.__uc_bin = False
+            return ''
+        #uc char to remove
+        elif self.__uc_char:
+            #handle \bin tag in case of uc char to skip
+            if token[:4] == '\bin':
+                self.__uc_char -=1
+                self.__uc_bin = True
+                return ''
+            elif token[:1] == "\\" :
+                self.__uc_char -=1
+                return ''
+            else:
+                return self.__remove_uc_chars(0, token)
+        #go for real \u token
+        match_obj = self.__utf_exp.match(token)
+        if match_obj is not None:
+            self.__reini_utf8_counters()
+            #get value and handle negative case
+            uni_char = int(match_obj.group(1))
+            uni_len = len(match_obj.group(1)) + 2
+            if uni_char < 0:
+                uni_char += 65536
+            uni_char = unichr(uni_char).encode('ascii', 'xmlcharrefreplace')
+            self.__uc_char = self.__uc_value[-1]
+            #there is only an unicode char
+            if len(token)<= uni_len:
+                return uni_char
+            #an unicode char and something else
+            #must be after as it is splited on \
+            #necessary? maybe for \bin?
+            elif not self.__uc_char:
+                return uni_char + token[uni_len:]
+            #if not uc0 and chars
+            else:
+                return uni_char + self.__remove_uc_chars(uni_len, token)
+        #default
+        return token
+
+    def __sub_reg_split(self,input_file):
+        input_file = self.__replace_spchar.mreplace(input_file)
+        input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
+        input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
+        #remove \n in bin data
+        input_file = self.__bin_exp.sub(lambda x: \
+                                        x.group().replace('\n', '') + '\n', input_file)
+        #split
+        tokens = re.split(self.__splitexp, input_file)
+        #remove empty tokens and \n
+        return filter(lambda x: len(x) > 0 and x != '\n', tokens)
+        #input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
+        # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
+        # this is for older RTF
+        #line = re.sub(self.__par_exp, '\\par ', line)
+        #return filter(lambda x: len(x) > 0, \
+            #(self.__remove_line.sub('', x) for x in tokens))
+
+    def __compile_expressions(self):
+        SIMPLE_RPL = {
+            "\\\\": "\\backslash ",
+            "\\~": "\\~ ",
+            "\\;": "\\; ",
+            "&": "&amp;",
+            "<": "&lt;",
+            ">": "&gt;",
+            "\\~": "\\~ ",
+            "\\_": "\\_ ",
+            "\\:": "\\: ",
+            "\\-": "\\- ",
+            # turn into a generic token to eliminate special
+            # cases and make processing easier
+            "\\{": "\\ob ",
+            # turn into a generic token to eliminate special
+            # cases and make processing easier
+            "\\}": "\\cb ",
+            # put a backslash in front of to eliminate special cases and
+            # make processing easier
+            "{": "\\{",
+            # put a backslash in front of to eliminate special cases and
+            # make processing easier
+            "}": "\\}",
+            # this is for older RTF
+            r'\\$': '\\par ',
+            }
+        self.__replace_spchar = MReplace(SIMPLE_RPL)
+        #add ;? in case of char following \u
+        self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
+        self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
+        self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
+        #manage upr/ud situations
+        self.__utf_ud = re.compile(r"\\{[\n ]?\\upr[\n ]?(?:\\{.*?\\})[\n ]?" + \
+                       r"\\{[\n ]?\\*[\n ]?\\ud[\n ]?(\\{.*?\\})[\n ]?\\}[\n ]?\\}")
+        #add \n in split for whole file reading
+        #why keep backslash whereas \is replaced before?
+        #remove \n from endline char
+        self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
+        #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
+        #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
+        #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
+        #self.__par_exp = re.compile(r'\\$')
+        #self.__remove_line = re.compile(r'\n+')
+        #self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
+        ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
+
     def tokenize(self):
-        """Main class for handling other methods. Reads in one line \
-        at a time, usues method self.sub_line to make basic substitutions,\
-        uses ? to process tokens"""
-        self.__create_tokens()
+        """Main class for handling other methods. Reads the file \
+        , uses method self.sub_reg to make basic substitutions,\
+        and process tokens by itself"""
+        #read
+        with open(self.__file, 'r') as read_obj:
+            input_file = read_obj.read()
+        
+        #process simple replacements and split giving us a correct list
+        #remove '' and \n in the process
+        tokens = self.__sub_reg_split(input_file)
+        #correct unicode
+        tokens = map(self.__unicode_process, tokens)
+        #remove empty items created by removing \uc
+        tokens = filter(lambda x: len(x) > 0, tokens)
+        
+        #write
+        with open(self.__write_to, 'wb') as write_obj:
+            write_obj.write('\n'.join(tokens))
+        #Move and copy
         copy_obj = copy.Copy(bug_handler = self.__bug_handler)
         if self.__copy:
             copy_obj.copy_file(self.__write_to, "tokenize.data")
         copy_obj.rename(self.__write_to, self.__file)
         os.remove(self.__write_to)
+        
+        #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
\ No newline at end of file

From ee246b73ec23809bdb18acdfee0bb2e0da7584c8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 17:46:35 -0700
Subject: [PATCH 50/55] Zip file reading: Be more tolerant when a zip file has
 a damaged directory

---
 src/calibre/utils/zipfile.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/calibre/utils/zipfile.py b/src/calibre/utils/zipfile.py
index ff290abd25..c230b9dfa7 100644
--- a/src/calibre/utils/zipfile.py
+++ b/src/calibre/utils/zipfile.py
@@ -982,9 +982,12 @@ class ZipFile:
             zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
 
         if fname != zinfo.orig_filename:
-            raise BadZipfile, \
-                      'File name in directory "%s" and header "%s" differ.' % (
-                          zinfo.orig_filename, fname)
+            print ('WARNING: Header (%r) and directory (%r) filenames do not'
+                    ' match inside ZipFile')%(fname, zinfo.orig_filename)
+            print 'Using directory filename %r'%zinfo.orig_filename
+            #raise BadZipfile, \
+            #          'File name in directory "%r" and header "%r" differ.' % (
+            #              zinfo.orig_filename, fname)
 
         # check for encrypted flag & handle password
         is_encrypted = zinfo.flag_bits & 0x1

From d51bd60c9cad5a13df45f0edb7473aeb50d89beb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 18:24:26 -0700
Subject: [PATCH 51/55] Nicer DRM error message

---
 resources/images/document-encrypt.png | Bin 0 -> 8988 bytes
 src/calibre/gui2/dialogs/drm_error.py |  21 ++++++
 src/calibre/gui2/dialogs/drm_error.ui | 102 ++++++++++++++++++++++++++
 src/calibre/gui2/ui.py                |   8 +-
 src/calibre/gui2/viewer/main.py       |   5 +-
 5 files changed, 127 insertions(+), 9 deletions(-)
 create mode 100644 resources/images/document-encrypt.png
 create mode 100644 src/calibre/gui2/dialogs/drm_error.py
 create mode 100644 src/calibre/gui2/dialogs/drm_error.ui

diff --git a/resources/images/document-encrypt.png b/resources/images/document-encrypt.png
new file mode 100644
index 0000000000000000000000000000000000000000..07743420242a17d6c1c63d7a846d3b33d52c9ea6
GIT binary patch
literal 8988
zcmXY1cQ{<n_ukce?<53K5;Z!}dk}<8LX=e^ST%ZIy{!;YSMMciSXSRfl;|aT7bMXM
ztNZQe`~05!+&llwJ!j6D_kGXYI}>a0Qj?s7g#-WqkZWtHy~4K8|861z?6XI~Y6#m9
zSnFu20q+01@;l1D000~SZ8c?M-|YPy2+XWMbu|R!<?5q9cEm56QkZVZ$vvdZKw?dr
zw$&R`SI03Q<wO36$BFx;g0vDD-jV9(FX}2zID4F=gr9KmfRY1c9D~suZ5mY)xA}yH
z0==f0O+|<;s${{Z&n%xBaz|&sS$((jOI7}0KR0(hiLNX_;6eWON@q}8n@f(-&dpen
z<C>s<{7PZ-@BcMYT$$QFYyMLcMVhM~cQ==muR&*{Xz-B`DCmv(Y#Y>s`cL^2&SwTc
z)j=JA8yw#W8O*ENSvmisHT1pQx+b#bA1gmVU*YoCp@6XP{lSkPKdvh){hKilsg*&o
zWMex;wNouIfVJ#^g2nwizp|-Ic0$UBH&eul3#>1@L~%qE6}wn3!m$ofCnqOuyRD<c
zduRT`BO?=sm_L~7;o;%&Kc2t6+n8*$Ff~<Gb&ZX^B|7Q8w`a35UVrJr(Ew_+jAqwr
zNWto1h>xG&WnR#EL!P|ipAW%Y_KTz_Y*el%EB9@cmB9gietzkghoF+x!-fEii}%5j
zcJtzQ(|81fjjjSa#{XZ3$Ij^SSIgVD&HhA>^=^E?`N48Pu#3yVyKwYc9@}%@qv43?
zHMspE3F_u15P0K5?dRs;P?2YRzPBe3fH`<?XJ=<bwyFitIF$1{;T}`C77*_EN7KjT
z(z2Dd72I|ILC$;cqK*CTD0+AB)4<P(COYWl<z+yqqKUbAM}6fJ<J9?i6Ys+ny|O8j
zbadqpY@lz=%!+Fue_J&`?xTijk3Bp*_LkeybL4zBvfp6>MzdvlZf>3ncilXbT5`i}
zUh&_p&`B2x+W3-p^Goc@!>p$z&b$E4Pi&WP#5udLpddY`iXq+JN=LvRjFH+R=nNEm
zyK8wmsUz%uuq2Zl%Y+}e4;hs?KU^JaTR9UtKyFeTbY8n$-3H&Ss43oGj`^Jo@>cA^
zaS17=z0B{gqaXVJCZ%P|eOl`L_<wF59<8UQ(i^8{W@bt>(661%TJA(LTC6d4U8vhh
zD>W~cEbdb{5JM{~k5)baknQc+i1145>aBye!|j!T!>!xH5Jh#vUkZYPg6<bbRxh)q
z(dItOC4Cu(3P&j1u8ys6kJLs%ytVNH)$g~M)q6I#*}s3ERKg8|;KzMTqq*`!^)~&|
zA$QNQG3?Fv)R?zDJLzI$rj1esnV6aVT59pge(sn2g;zh>Y|y;T`xu2wdGt9kFe11s
z@9-W?8H`E{{>MeZCaqL9<!1r9zw$=m8wmp{9lJ0chqr9fB1~jZz(1OAQhjybzI9WE
zT&Vu%ax_QIZ*g&@@6!XhXR4bRr@xqmJnbDX^V1(sYmYYKg+CrFwFnSlLH|pY{mP*8
z@Ppztv%cc3xX+S1;d_(2n{f7<wSWZC69V$@UIi75P%h0+H+0Ch$|r!UTZQ)~cH3S5
zCUu_Q-<<9(AmX!JJ7vD^NcK!!mXGM?hR1+mgQdlMal_LSo0(!Cq`mi;NJ&Y{pJ0|e
zDD&=bb~3ZFaM!*wTK*d3&HGi;c7Sv7CkV5QnXSPvnUm@v0D(JltCvxT^8(w%4|aBZ
zLo_yIl|MYj<oo9vKD3<8yu&ikCbQG9(X|5u$oBZ>FW<T>BqBh>+5KOi<IR52KDQ}7
zspClDz|eQD9rVzh+<;G5=<&8=6ZYQ0;g9}7*Ztk<{15jw8n`L&Vy3zCEMT-H<S}QA
z7J+LYQ#<DQ>981`I$BR^S=_d5YJ%wtzB&j(edz%TdZ89vTsbDU^Rjrg)omRo=jDza
zwO?Z89XY7OQf-BobJTQbZ_lZZ%2`=D=t_L-`6(G^H+Cv6Ptv=F<yMX<0G;=@haOk|
z91hw~UotQ-bo#CaVQv=%AE5LmjY&|=UAOzI&-&PmKv_5cENas;L-yuGu?Y`2-y0NT
zWa;A-<KW~qK2wYv2)UasQ}8-@admXDnYg-<eO(oE!$p1hRo$=-GAs1s8{Pm;6B0Qv
zrLQ=G1tXTB&#!(LEd^iiPkW8f>OV&>EwNo=4%6cqh3{qv6^=r?e8Z#v!X2LAueGh-
z9yoxO^OzQQ&pvYO^ef(piiwHs&DGnGpT-yF3+Nc1;T;96T;E>}a3zw}ln6&tJot;{
zKW1v>I)r?w5vx~wLEO%D?d`)x`tp%6b*`%lohPZabsxqaVpGvC)pe=2a+-_DujD;!
z*~!hSbpY4k>g52~6ap_O6GZQ)WujvY(jJe>z(jAbX|r{3h+S^;p`1U8po}Y20^Nrr
z&g&CHZZ;WL2ZH7AE;i*lF!y}{_btI)V_0f^qbkluH;gIh7}B-h?z`?xXtIBo>Hm*_
zSYallU|Z{sB<;HI#3I46^Yd-%YxonObl}C&3L5NIx_Y>yrGpd1C~WcT;&^j!wJU`D
zbZ2{;FV5OT?QWe{;W+l*TmJguXKnD=6z55Qhed5OR$%uMxPd{JTj~5m$ynuyA!H9s
zs;u<gh~-6YrM>%&x&HY*o)NPgaM(dQ+mQH1<@}`C=>Z6ImZw&%<t;h?@lam4^MdpG
ze7`3^aoX;~2mYaxD#ZDrnd>%g;Bma=jd0}^YI1UN{&Z)I@@(sT(RJ)#?K79=fAD&5
z@^L4}=i~M;vuw^w6T0v0mWK)!#Ci#U;jytXL1Ez#fk(G=r6ACMl8Wu&J++Xl+`K67
zT7A37e*0VV9`pL_44a=IQ;=di66qU^&9paqFcg;%%F!52$xg7gd>AYR0@Y)2w6wHz
zIEuCE46+o~G~xna`5a<Ai_lsB=}mVqoGFH_h#Z`LO+YM6RE^KXC>3vx^qX6|66Me5
zECC_+lO;OvkHjkhGztLUyUQKFt<*PsabZ54r#bEC3lKW>^&j7E!mAGs{lz74>aar@
zm(grqe4uiUoeHduJV!a4A1=2sf^K^fgZ6AzGmP`t1NQ1`aYGs#8&j^gI(C{CU4}5v
z&!&w4OWuplW=OZt)|x+O{&~3u>9ga;L?|rck3Vm1ZRKEV9e{~sB3C{j?VCWfT?khE
z`Qq}bs;X|{wa;1Cu;aX63b+p;2v=qp|0aMnp`^tuEFkdfZ*gQKzHH9uL6K(ST{W@K
zgA5wrTT|1kXQ=-K_Vm%*1lIcSjrb5&;c<^5H6Qx*>X#2?Qaq3~jvq+>TKVJF806A^
ztN~?{BtRbbv6{RkqJWBSKc(IEj4xFIFfcOSmm<=zj3*SKcQA#Bopxa8)I&X7?$)E(
zbMrjAXoLRfSYCaqe5m)mgqW0DHuy#wfW?fBySpgp(Xw2{U6aEG&e2(INYMSp#>S`l
z`T1;n?Z&#gq}s=i=9*pEE3Xj5-hT2|@Jb3lyoQDbK(@4d7`7<7AlC^11mVD$1@qI>
z(`%l2s|*}q`R>(W378^@y0bDF1$uXj@$WFgz7rxjsE}A>QHvp86J7+1$bX4B!WI)O
zRd0jyQd3h?%CClP1LW~>>RMZ`9ufK7iF+iIB3oN!BWi4N??PVMl#J{sU|L8nj5tW0
z7Rj<@y-0|@3sf{ItMO3a@d^R3%NYuyGk=G3ULcd#ZQ%>~XBC|mpJ1Y^+B{F`(@Q_d
zxO3X(E#iP-G9vY(8gL|_s-|XzNEwiPs~<j-d&}94m9)yMy<h%qq7{D|iZZ^-5TgD}
zm4zN<3j&c2);S5{?!fX&$37X&A9p~R!_|jchw>d3hm+xy3cef9EM-Lf_Yt!e^)o_<
z>+BO2ep@!Nv_D)8w7X8lv7}1ovqB>@OcX#~azL?Zhb60x<eOy~pseTbr1HPDp#p&Y
z5ARG(9T~p@knXDusm5k^X}dZr0gDe)41xB%QBW!I{t)>ArUpO)pz$lr$gIfgu$OmD
zoCr1d%88oU6i)wh`-iQ+pL%2Vu*tjpM?_wQBWV?uuGk=*)MggUDwip!^yCtR9_ggC
zv-#=(B-M*_I-5uzqaQpzyq%UGnkH6i+mj_P_HeO3*Of!Op=)+9wF}BBeF<1d$yl|3
zwLIV>J`jHvm{RI*OMAK~8n&+#d+ADyk}xp+njQg`%Y@qAH;0eEtOh8Rle}aBvz5bN
zFT4!OF$FUH1UR$%FE}hD-D!v;Z~){)x(93C1u6J}q_}2W2nm!oE%f!jkChIN_#Bzg
z{TLw4>HixN`_hy}tQ8bAM2aME3`gyk12ss^%2G>T1Hji|sPnjWhq4L=sJ9SUj#%jg
zr%5%htSP!^Fn?bpBX%Ju&l~PYK28w2XeC1Xn7r<RF3MQ{YZGDJvN+0n!qjNFXJWav
znbf&V!4mWEuC`yXNUCeuV^c8KJRs78cXe=5MR8`>ua~sbE4Svlkv7J)<I~QF(4&Wk
zSr1fuueaMyZbu;OhYqz;P^+i}Iny1t8SCnvR_+vvSy_FI38bfkOBCitmgg`Ut!L)r
z6WeBmnm4NTxY~LnbKI?chdxyFGxuxUVgcXF1%F@3x}$kx{O)$jNT;cBuiZIneo?1M
z*1vs=W1PwAQ18PW0)x*mMlWEIu{Bw;EFiqU^tVPA1~att0qyx_Ssm2w6*s#wcAg7Z
znp%CW?(1y_giq!L<z$ZD+Z-OK2Z9<}r>(rkKX|=Q+xnSP^}3;|X0*=KEAu1rTNLcf
zvrXrpKdyhrd%F*e8JS;y@Bf}$Zed+qq}&STYx6xUhm_Pb-!Yf<kvEo>F1BbjOOr1H
zD<2wD0S~PVyuwAu{EfY|L2{L?`8Dyrp3@I2TTuZ=*5#0RjDcVH1+pjOG=?u`?Xfk$
zMs#9Zj&Vr}BA~wRP9a<JO1i|&ASJh3Wj*4h$ppW=dcRScM9xdS+BOweyUcG=D0(pI
z$wZo5e!x>tyL=<nKLy6qPiFW!i>h3Xk@`vif&61+Q|#NMj2GmADtKh{C12}0bUXv8
zNRHR#g-2Rq)Su6%RZ5bhGRCX0laS>UnAnE_Lbz<+=?ck#&HYE+#}v%Pfk3J-z`&ec
zHI$@sok)<!)Iiw&CQ5D7?Mu_A{cAm^Ca+El)_}zKZvh$$TX{jP_Jur3oQb;&`3Pl4
zTx%3-yHq)oTK+k~VNZSlyIl)od69Sd_xwNYpIA6%k}B)V!)6;-z{7hpo+AWQ454qk
zZTbMOPjCN_L(pER*bTfYC@o*0v@m^@%DQ|L!ERgr0|5%Rd?BJX`oAwS)P63ekwP7r
zbxL(${M3RrQvMNse*vt<_4F_?1L$i4OTx=?QC}48b_%fl3Am0yO|UR5fu-lI8@y9$
z1pJ+)PQBHR%Z5JO*{=1cfWYWmGG!_OmV-Y7NyK~t$XE3x1`NmYAL?J$mqdR0?oH-N
zmO}6%-ZWl<Aw=&QZu!8u1>kGB-oXiqG=6yYP}%TF1c|lLCYq1BmfY~2dN?`@<(q1!
z(dPTbYw&Kini7&bSP9hvKa(w{uL5!z&;b+0yT90~*C{Nevp{F^d5HLqgUL7;ik|vf
zhSrMI;26@W7dyOTCY}2TbgMy`su#zIAyDz^2?-73oGf;xCpa`}L=Qs=^f=P+Xp1G)
z${uk9QWip6!FI;uug8r<RZ*YL$@D+@KBMIu<ps}OWsALaW;ztXdtU+^U4WYOhO?9y
zD1AVWK@UR@RhH58+9`|9yEK-xM0kEMLm2UkC8k6V;6G+VkvOhVU4((=+Iu57eLL$p
z4lEv5kVCpRJXItM7us0t{MfAV)LRW;jK6EEB~nFvG_kP0%Z$fDZUIdd6=JukNgZca
zDkG_`!lA2+b4G|rzEh<IgpvzU%7yX6JD|4nkmN{8@AhLnGt!x?xk<nM#~JwgcD|bg
zeX<<`N`^Bc5=2H+RL;cONGa&xBVZlR@O(G^5n;I&Adttip$-khV+Yy9KcJR#?{~w&
zNvfBaVVO>pb)?z=#uYF<lp>E=B^CWc`#A(^8<Vg}=8GR6>CJ{>f{9x|8PP3AQ-i^*
z`ZmAJ7x8tGkJcl(IpCYPTCG}r?6yFNu34Jd)Xh}9_Jo=QY9Y-U?;%9qo?aXp#azKN
zS}q-2DCrCQCRw<)aSw6&!<v7*SsiL^T%Fi>7OtKEuq9h7d}oilSq)i%D7*=;h{nsW
zSN&2T$Ow)2-9ec8Yd6~F0h*0Jneju1Bka7|Avv_5hSf&<#mui&sn6kld)4X=Bi=S5
zlStU{Mp87CX0p@JrJ8b~r*GTT9l+QT`N|(JPqGit8%pFTzmeJ+C5y9ET}W&X#W8^K
zkJC}1fsbcq(4#mYdJ(8K*E11g(Y<&5MTw}|G^R&%R4O-$6@kvg)M^-m)NZEiFzP`&
zWm*oH=8pBatj1+ntE&n+@{D|3ESTwtUYV8d82%KUM(;^4tE8Eq!bIpco%)J#Js8p~
zWR2Xy0XrukUMRhgaD+s#FadC36;Po11a~;BS%z$vi|QW>50#K3;o>BjQwphlJK$#@
zH*6bsdfw_xZz{Rz^MxippDw|t{ndZeM&+U12ox-1jyr5^a6F<R6n%)0RO?X@k(VCV
zduHD*>y#CiL`H*`RV#r5789#-&WNd!=+L`E5%j^=tZV)t@EPS(rp;v8TU?45hz_iS
ze^EVcq8Uy^#N@%KoC*NH3dU-q??YT+Q1!;Lze;+I6X>#6i|`Sb3kW&GQyyN{FMt?0
z3@^H_DYd}C8J1k=tt_GvCu!kQm}qywkK;h9*!WJ4f%*Wl@Y`26s>uM*9ZpD94n!uD
z?}+RgNsjk(F)w7jZg32H%DCK6Y)G3<+n{&1MJN49RGE2Zmjw;(WTOH0xZ^zD#K(QB
zn>zq{VB<@DVM4An?*IKVh2x1R(yB}o&xp^f#5na8sB4`=gwF7Zbi8kPf4)QC&H<m5
zbfjs7sdxa;f()VbfGe~U#Hw^c_G&H0jZQcCL3fM0w`N~!3~u%n89_q9pdfeiI1f44
zLx}TiVV;V4*aa-gMl2u;)=3=XpyG?=7zc|?mlOzu^eBVjA4C5%(Q}BpGekDliK6k|
z?(z|b=U8i060l8kGBJMpNY9~RxR>-=l%+Z{iqD*aKk;~9U3HuC=kuY(orz~uQ_tt8
zweU(=KljeNfJ&8)rLZOA2v15RG7thgo>utn2Vy_Pw|bE^GtB60+(JXu-Zy9144{sd
zS^(n{lPKo9qf>#Q%(zVg&o(>YQEdz`@(b?hgfCZ#`r~WBa1i>5G0O?%D5TG~#>D}L
zv}UTzGbkTVQ29j?34-1WTewZ@TU#Zs{p1~V@Rh8`3G-_n*fahlp-x1^Ey$L;r-BJV
zRAdU3TjGmqdc)jNp7of?lt{lXde9gsF@QF#f~Xt6F90SX$i6D6{4lm)z1JP0iucTc
zlu-{#AE%;Pv#M+uMO2Ic4<U#-XDdY2$s@sXES&1^2@&cjv7E4%AKufZs=Kl|R{$Et
zEY#PjP1N_FAnA<>GpqrxLQUS${gto<3MxlS!?Rylah^Jp)sX+h<ME)iaAn)37kWoK
z_l$79NbC{HHull_*|$)2!X-xo^3pgxo0NKociKiq^ut;rY7HH{ug@i+u5=eA^Ont{
zi@A~h53V}BGItAk+`Rc%fA9G|s$+@suOc=b(l~sn=Vr2Ks4fAHd&`~A@a6l5Z{k)!
zCb*`m;H@PQsBZL?(&m?bWFT4bx{Y}Ty@)l{hiJ}Fes9WM2PlABqbTfk|1_`>9~CAj
zKKhY$s0(hJ6km##LATOD?0tSe8*nGJw|>9QDG3fMC^n#d0s@H>M0YQAzW!B9avW-m
zZyS-+cSimF+6rhuu3j=OA~F-xchmJ?_q>3CmP?Vp<SX%0r5YkmIN*ad8|tZg2LmqM
zB(8q~fkC{)BQ}fK3THtno@9!bksJlx{RWU8zG<+YR&l)K<24fWYg5{%P$x1LIFk|a
zXL;dAMcY9y2(#FF4S(iG=?M@ymqZ~(E|>&fR4PU!h3{CGixKeId#1(rcWXq|0z=~j
z2@Ptj+enD<81NlQ#~a1|&V01N`2)r!+Mv(-fp)i#sZovFq3w3yZ=sEQuA(&X)Ockl
zS%e5!YERq^ZR4OL1}xCA^nI{H^t3aGa3bw3KI-K=IFg24I(*0LD|d^vh-iwV9X6;}
z5ePf3PFgoG<hxvK4&$LBo+fM5El`$^FB~-2sm5Pe2(PH$UTy4d{EFx+v0S$peC)-;
z8wK~Q`wkgcYWcQa8k;{-Hha7zd*rT%Ix@u>iOgdm4XQ(v*%mmKC6YfvzF**Xi}oNu
zDAO+dPIGyz*NZ3a=S5lCy+B3AKlB6{+VtaTxEd<U;A)>3chC9eFuQRHl}$lWVg;E(
zudBQm7aw*?4NEi|`f1+Ez;1iN;sJE9(i*)%0IYDy2-gVE<hsr|q6<&pBvK+{OBoaO
zGCvuMf?JzFsbtmUy1PUjzhxn~MX~mNKu89B8C`0}LHt54HT#3FPCTzzkr!3O#{u$`
z-z;hM#q(B1pz*9)9=D9o#Ty0?mgYGr*;O66D@Nz4db*OKh81o^-n7&&?O!E>vQg%W
z$*!xzfe_8e{1V`d2fnt;npWeOI%>xjMo=DJ>9#*@r;$rr5Ic>xt)l;U&V@ZpVnZU@
z+Gwpb(;farY-4jz-$c<wPHrmuTc4yw#MRpg+8LVCEE`XM`|};8Bm|_BHJPAp?cHak
zDPP=*-;FV=uAsV<M{09-oOJIf7?Dozq4u1Az#m(ivFl+iO$lo9GBfD+EGTX-JYOO_
zk?J)C(X`=hk@s5>rk|1{PJ|ybGvVtdHKI0U5!!I?@}U(K*>u_0z)&M{34nNMkuJTm
z%=5j?TtF{_O04WFWpNhzj$HB2Tk8cp2G)XLd&vTRM<+wdEu6`#Ht)esU<?S&69xX>
zYX<m*8nYj+&KXMF_VYz+etn-`o?v$*NSid?Wop0}IVHYYR-a#DvXa)GJv}eA+)X9s
z=~At1jl(URG;U8b*}{~<DJI%7UZE{}mBw<W_ENlIwOIu%+2Twr#!V)|!p1c9N<k4H
z`i&nWTd1DjktQ;H!0>HY6goJ`KnmTqsoY{$$2ictaM~yZ<}ij4!187Ora5_iBz~Nm
z=noVUB8S-d5K)m^!OH!NTBU4ay`u}|s01mf3Gn3DYZuEA#U#&##9L$3#O-Z~aLa>e
zQzJsve}bJudr?|)N*f(epU*N@xfR3~Dt!NlDrc^Z%c;q=<)XM_TSWg##sRAaO<GaW
zL?WBA-;3E6`qfIkMl{-7MSGY~6h-&nba!!R2<qyoMNwIm+`5HXpf71JN{SE?O|V^j
zQzo!kOKL%E4GRv5YvzmwuA0~L6f*0aQc>P~E~u-xc1mYYlRzI6ndm>}e3KjaLe?lA
zs*Woat|}q!-~fP_eYsDJSAnoE%;^;p1(1@plhH$M(-WzoJMeEJ>F6Ao;600Ttk-*S
z+~T4Os+vy#w3>4&j<Uup{<jU{PRaLb!G-av>MB2G0}=^-F+7iW9>j_no~4nDI+FZ_
zGi_H&jZT{hqb`7$+H<vp+tJ}duaw>R^oV{uR+<~7k!+C0|192r{e^UHltA`rwLjEK
zaBV?ilbL8C_HSscAO$(F0LeKpPgR-9LsZU8Pe5euxzQ=_PF<SFK^oSUio?h0MUc)}
z8aWV7ZcsR`);yZ|OC&-j&n^VMwZ61nSC1&r5Z#0_&7CS-Tvztq0>w4uCJkW)d?PV(
zOleUP*X*wNj4W^zkD+91pk$h7q>7=XT(P%IV;M_T`x~a|IE=v?{v4-@cIWOMJRX1C
zzdwF|WUZ^9;Ao8_;$L-r=PmEFL4|}za1Ng!%guHlUFYqq_5x;DesFGMbV(?)m>#B;
zKFrrDf6U#xFw~O;M^$h2<DC=HI0^g}p)gJ{RWZ}=4WG_H>gPX-h$7v9c`7Fau*Zuf
z`y-TXY>g_>YQI)t3;2`1S;&}%4oqUyt^bCP;{s7PRMdq$e~1T#|2waHkZJsFFSyVZ
z{;5|_M9_|JT3#ED60%apjYz6i4{Gp;LP)lT6HEL_qO?<k6KC`Uh}PsCjA<yA-%#<L
zkn?fRd6ZS75~4C95=|e0aNfU}Xxzou`$9IE|Myf@|4|h=U+mAZk&~l*`biei)xfop
z3LxDk-ln6mBW$|7?*?g{5Aw!!URvWH_FSym;%#vWZSYzUH()xcQxUSv5(HPFV|blO
zGt7_(X0)MH4Y75}^bs;gMN=w;WmKKJ;sN6Em!gVtLaoNWKALGy)sZ#XSrOsOEg8D!
zO&rCS2?(Mt!~LEK^y@dKLCM}6uEl#_&F>J~rZ7uZLsOX_v6B&B!vx`p_?+rh>G2$O
z9U`{HKM@rDz(#6lyDHHp9!LIt!a1R-4i}@*a#{j)r?)&5pLGbeJTM`<=)`@rZO%w#
z;P&IrK_Q9={DXprc3gzpmwf*h35(gFF;y~;X@lHvX`p?2#5@C)ld-o5A!`+}mZ^%e
zDQgl=MwkQ(_|}=dd2WYQmDUY+k&GGCROGxr?ttf6F{}fMDh%@)k$r5{<86^Jn5)(*
z+>tJs%)(j#sUVul9pL8^(Flj}w82T0H$2PDzvo-v>VeC~Q#d79O;I9m^%OeXH+3ln
zie*t|HrBdARLexT+=}h;U-yw427SZ9@a<iV*s_4BZ*~8&93qYK>(1SlB;8$EggL6F
z^`3fXts6_U@X^JR%DzWdwT%oUt$n6aBYBOV;C1##?Af*lf7RE_V?#OE9>KSV22R$L
zk||B<rHf%yi%iEn-L4P;3U~WaU}IBkfS5YYVt}$X1fprPp%@?2^6saIYBx;Fzg6^s
z)pnbwOf5bUt(ixZ2xR%<S~-BMmW;%?nqbbiXkcx$v}AprB%`l4836#`eE;t(z}b>&
z_jWqObV*2|d3P*aQZJR2Ny}uYNh4uf%M2&%&vH}gG8LX=u2PoT%(G0djMW>)#bPv=
z1+e^c+?ZI&lr<)*BP={YN^o{;ts?<)KT{fh`Z5cm1or*h5a{je9qnup9VsBr{FRvo
ze503NK?CJcp1e8aupdD(ylPHs7ee)|CC2}h+H8len<=rbRpVP|vsRS>b)N~o&^@J2
z)}B8zqB;rtT&;2bMg_YK(c|*U_U#a^J)s|*oD+w^o-kB{S-}wpCzCn~E?~TC1EtnO
zX*8|$N+bz-Ji|Baa>wp3j5oD$Un@BFJ1^d;taO{}Vt=@-z$i7Wk7F};Bg0mmuw6tx
zX)Jf0f&L&yYR)=+E>28|O%@e9!W8rgvAdJ7h7M;IskzM!{7t5b;Q6~1ujNir9`J2I
zbD~|#YoJKAjpJrzMJ=iMNWQD8-AJ#En);>mihid`>k?NP=2V3#Dzkm!(toyZxnUkN
z-+MN(!{XyCChp?4IIA(%FZ$k$_5Iq`3cF92)5&vu-WmF&31wz;T2a}e*|yFV#1Qch
zJA4PRFvRD4n8L1qcO7*@ZB#w0sK?T!-RkP_2VtMXaiyw$<Oo~sRzntbKIHPoe^w*X
zn$4ruwocfmePPUeALDv8H`@G(;)h*gRtM(A0lU2Gl)ob~NU+0&a@2m}t!(M*#CDfe
zsjJI#lqG+-FlI$V1<B0jrrz%!9w_nT#_<T9x)yUD0FzlAz=J95_=lz1bBT>Bu=6?<
z?WBv*@8-Ckca9LvBQa9)Ohp(A!P6C@(weePVV`}+TzU<HxCDt&BxF|tDQh+V`$_|D
zrP?~F+>(5_M0P3#Z`l>~ZP<Rlv^%13C@omG5|B<CcQ!n=h4-GVq5o_L{@b4j#j5@$
wp1Dtc3j598x8>U}y7BSc`(L2r?|a;v`fa+-DkW{~aTkEL`b)JM6>!-90nS=cZ2$lO

literal 0
HcmV?d00001

diff --git a/src/calibre/gui2/dialogs/drm_error.py b/src/calibre/gui2/dialogs/drm_error.py
new file mode 100644
index 0000000000..5fbba47165
--- /dev/null
+++ b/src/calibre/gui2/dialogs/drm_error.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from PyQt4.Qt import QDialog
+from calibre.gui2.dialogs.drm_error_ui import Ui_Dialog
+
+class DRMErrorMessage(QDialog, Ui_Dialog):
+
+    def __init__(self, parent=None, title=None):
+        QDialog.__init__(self, parent)
+        self.setupUi(self)
+        if title is not None:
+            t = unicode(self.msg.text())
+            self.msg.setText('<h2>%s</h2>%s'%(title, t))
+        self.resize(self.sizeHint())
+
diff --git a/src/calibre/gui2/dialogs/drm_error.ui b/src/calibre/gui2/dialogs/drm_error.ui
new file mode 100644
index 0000000000..842807c9bc
--- /dev/null
+++ b/src/calibre/gui2/dialogs/drm_error.ui
@@ -0,0 +1,102 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>Dialog</class>
+ <widget class="QDialog" name="Dialog">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>417</width>
+    <height>235</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>This book is DRMed</string>
+  </property>
+  <layout class="QGridLayout" name="gridLayout">
+   <item row="0" column="0">
+    <widget class="QLabel" name="label">
+     <property name="sizePolicy">
+      <sizepolicy hsizetype="Preferred" vsizetype="Preferred">
+       <horstretch>0</horstretch>
+       <verstretch>0</verstretch>
+      </sizepolicy>
+     </property>
+     <property name="maximumSize">
+      <size>
+       <width>132</width>
+       <height>16777215</height>
+      </size>
+     </property>
+     <property name="text">
+      <string/>
+     </property>
+     <property name="pixmap">
+      <pixmap resource="../../../../resources/images.qrc">:/images/document-encrypt.png</pixmap>
+     </property>
+    </widget>
+   </item>
+   <item row="0" column="1">
+    <widget class="QLabel" name="msg">
+     <property name="text">
+      <string>&lt;p&gt;This book is locked by &lt;b&gt;DRM&lt;/b&gt;. To learn more about DRM and why you cannot read or convert this book in calibre, 
+&lt;a href=&quot;http://bugs.calibre-ebook.com/wiki/DRM&quot;&gt;click here&lt;/a&gt;.</string>
+     </property>
+     <property name="wordWrap">
+      <bool>true</bool>
+     </property>
+     <property name="openExternalLinks">
+      <bool>true</bool>
+     </property>
+    </widget>
+   </item>
+   <item row="1" column="0" colspan="2">
+    <widget class="QDialogButtonBox" name="buttonBox">
+     <property name="orientation">
+      <enum>Qt::Horizontal</enum>
+     </property>
+     <property name="standardButtons">
+      <set>QDialogButtonBox::Close</set>
+     </property>
+    </widget>
+   </item>
+  </layout>
+ </widget>
+ <resources>
+  <include location="../../../../resources/images.qrc"/>
+ </resources>
+ <connections>
+  <connection>
+   <sender>buttonBox</sender>
+   <signal>accepted()</signal>
+   <receiver>Dialog</receiver>
+   <slot>accept()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>248</x>
+     <y>254</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>157</x>
+     <y>274</y>
+    </hint>
+   </hints>
+  </connection>
+  <connection>
+   <sender>buttonBox</sender>
+   <signal>rejected()</signal>
+   <receiver>Dialog</receiver>
+   <slot>reject()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>316</x>
+     <y>260</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>286</x>
+     <y>274</y>
+    </hint>
+   </hints>
+  </connection>
+ </connections>
+</ui>
diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py
index a6eeabd57f..01d3180778 100644
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@@ -468,12 +468,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
         try:
             if 'calibre.ebooks.DRMError' in job.details:
                 if not minz:
-                    d = error_dialog(self, _('Conversion Error'),
-                        _('<p>Could not convert: %s<p>It is a '
-                        '<a href="%s">DRM</a>ed book. You must first remove the '
-                        'DRM using third party tools.')%\
-                            (job.description.split(':')[-1],
-                                'http://bugs.calibre-ebook.com/wiki/DRM'))
+                    from calibre.gui2.dialogs.drm_error import DRMErrorMessage
+                    d = DRMErrorMessage(self, job.description.split(':')[-1])
                     d.setModal(False)
                     d.show()
                     self._modeless_dialogs.append(d)
diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py
index 6468cd88c6..c5001659a0 100644
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@@ -627,9 +627,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
             QApplication.processEvents()
         if worker.exception is not None:
             if isinstance(worker.exception, DRMError):
-                error_dialog(self, _('DRM Error'),
-                        _('<p>This book is protected by <a href="%s">DRM</a>')
-                        %'http://wiki.mobileread.com/wiki/DRM').exec_()
+                from calibre.gui2.dialogs.drm_error import DRMErrorMessage
+                DRMErrorMessage(self).exec_()
             else:
                 r = getattr(worker.exception, 'reason', worker.exception)
                 error_dialog(self, _('Could not open ebook'),

From f0881c3d26f5666dc2cd914ee5f55b737e166c8c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 18:59:25 -0700
Subject: [PATCH 52/55] News download: Convert various HTML 5 tags into <div>

---
 src/calibre/web/feeds/news.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 7bd5301dfb..ee5b11c5f6 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -700,10 +700,17 @@ class BasicNewsRecipe(Recipe):
         for attr in self.remove_attributes:
             for x in soup.findAll(attrs={attr:True}):
                 del x[attr]
-        for base in list(soup.findAll(['base', 'iframe'])):
+        for base in list(soup.findAll(['base', 'iframe', 'canvas', 'embed',
+            'command', 'datalist', 'video', 'audio'])):
             base.extract()
 
         ans = self.postprocess_html(soup, first_fetch)
+
+        # Nuke HTML5 tags
+        for x in ans.findAll(['article', 'aside', 'header', 'footer', 'nav',
+            'figcaption', 'figure', 'section']):
+            x.name = 'div'
+
         if job_info:
             url, f, a, feed_len = job_info
             try:

From d0f92778f8309aa3f8f4d765fe61031a23246b35 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 18:59:41 -0700
Subject: [PATCH 53/55] Fix Globe and Mail

---
 resources/recipes/globe_and_mail.recipe | 30 +++++++++++--------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/resources/recipes/globe_and_mail.recipe b/resources/recipes/globe_and_mail.recipe
index 4cc76688c1..22cb6fa5bb 100644
--- a/resources/recipes/globe_and_mail.recipe
+++ b/resources/recipes/globe_and_mail.recipe
@@ -8,12 +8,13 @@ __docformat__ = 'restructuredtext en'
 globeandmail.com
 '''
 
+import re
+
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class AdvancedUserRecipe1287083651(BasicNewsRecipe):
     title          = u'Globe & Mail'
-    __license__   = 'GPL v3'
-    __author__ = 'Szing'
+    __author__ = 'Kovid Goyal'
     oldest_article = 2
     no_stylesheets = True
     max_articles_per_feed = 100
@@ -38,24 +39,19 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
       (u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
     ]
 
-    keep_only_tags = [
-      dict(name='h1'),
-      dict(name='h2', attrs={'id':'articletitle'}),
-      dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}),
-      dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}),
-      dict(name='id', attrs={'class':'article'}),
-      dict(name='table', attrs={'class':'todays-market'}),
-      dict(name='header', attrs={'id':'leadheader'})
-    ]
+    preprocess_regexps = [
+        (re.compile(r'<head.*?</head>', re.DOTALL), lambda m: '<head></head>'),
+        (re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
+        ]
 
+    remove_tags_before = dict(name='h1')
     remove_tags = [
-      dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']})
-    ]
-
-    #this has to be here or the text in the article appears twice.
-    remove_tags_after = [dict(id='article')]
+            dict(name='div', attrs={'id':['ShareArticles', 'topStories']}),
+            dict(href=lambda x: x and 'tracking=' in x),
+            {'class':['articleTools', 'pagination', 'Ads', 'topad',
+                'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]
 
     #Use the mobile version rather than the web version
     def print_version(self, url):
-        return url + '&service=mobile'
+        return url.rpartition('?')[0] + '?service=mobile'
 

From bce5a1b4bc9ccb92112af849f64b020e6b4c5efb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 23:29:02 -0700
Subject: [PATCH 54/55] Pure python implementation of WMF parser to extract
 bitmapped images stored in WMF files

---
 src/calibre/utils/wmf/parse.py | 269 +++++++++++++++++++++++++++++++++
 1 file changed, 269 insertions(+)
 create mode 100644 src/calibre/utils/wmf/parse.py

diff --git a/src/calibre/utils/wmf/parse.py b/src/calibre/utils/wmf/parse.py
new file mode 100644
index 0000000000..c618884e33
--- /dev/null
+++ b/src/calibre/utils/wmf/parse.py
@@ -0,0 +1,269 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import sys, struct
+
+
+
+class WMFHeader(object):
+
+    '''
+    For header documentation, see
+    http://www.skynet.ie/~caolan/publink/libwmf/libwmf/doc/ora-wmf.html
+    '''
+
+    def __init__(self, data, log, verbose):
+        self.log, self.verbose = log, verbose
+        offset = 0
+        file_type, header_size, windows_version = struct.unpack_from('<HHH', data)
+        offset += 6
+
+        if header_size != 9:
+            raise ValueError('Not a WMF file')
+
+        file_size, num_of_objects = struct.unpack_from('<IH', data, offset)
+
+        if file_size * 2 != len(data):
+            # file size is in 2-byte units
+            raise ValueError('WMF file header specifies incorrect file size')
+        offset += 6
+
+        self.records_start_at = header_size * 2
+
+class DIBHeader(object):
+
+    '''
+    See http://en.wikipedia.org/wiki/BMP_file_format
+    '''
+
+    def __init__(self, raw):
+        hsize = struct.unpack('<I', raw[:4])[0]
+        if hsize == 40:
+            parts = struct.unpack('<IiiHHIIIIII', raw[:hsize])
+            for i, attr in enumerate((
+                'header_size', 'width', 'height', 'color_planes',
+                'bits_per_pixel', 'compression', 'image_size',
+                'hres', 'vres', 'ncols', 'nimpcols'
+                )):
+                setattr(self, attr, parts[i])
+        elif hsize == 12:
+            parts = struct.unpack('<IHHHH', raw[:hsize])
+            for i, attr in enumerate((
+                'header_size', 'width', 'height', 'color_planes',
+                'bits_per_pixel')):
+                setattr(self, attr, parts[i])
+        else:
+            raise ValueError('Unsupported DIB header type of size: %d'%hsize)
+
+        self.bitmasks_size = 12 if getattr(self, 'compression', 0) == 3 else 0
+        self.color_table_size = 0
+        if self.bits_per_pixel != 24:
+            # See http://support.microsoft.com/kb/q81498/
+            # for all the gory Micro and soft details
+            self.color_table_size = getattr(self, 'ncols', 0) * 4
+
+
+class WMF(object):
+
+    def __init__(self, log=None, verbose=0):
+        if log is None:
+            from calibre.utils.logging import default_log as log
+        self.log = log
+        self.verbose = verbose
+
+        self.map_mode = None
+        self.window_origin = None
+        self.window_extent = None
+        self.bitmaps = []
+
+        self.function_map = { # {{{
+                30: 'SaveDC',
+                53: 'RealizePalette',
+                55: 'SetPalEntries',
+                79: 'StartPage',
+                80: 'EndPage',
+                82: 'AbortDoc',
+                94: 'EndDoc',
+                258: 'SetBkMode',
+                259: 'SetMapMode',
+                260: 'SetROP2',
+                261: 'SetRelabs',
+                262: 'SetPolyFillMode',
+                263: 'SetStretchBltMode',
+                264: 'SetTextCharExtra',
+                295: 'RestoreDC',
+                298: 'InvertRegion',
+                299: 'PaintRegion',
+                300: 'SelectClipRegion',
+                301: 'SelectObject',
+                302: 'SetTextAlign',
+                313: 'ResizePalette',
+                332: 'ResetDc',
+                333: 'StartDoc',
+                496: 'DeleteObject',
+                513: 'SetBkColor',
+                521: 'SetTextColor',
+                522: 'SetTextJustification',
+                523: 'SetWindowOrg',
+                524: 'SetWindowExt',
+                525: 'SetViewportOrg',
+                526: 'SetViewportExt',
+                527: 'OffsetWindowOrg',
+                529: 'OffsetViewportOrg',
+                531: 'LineTo',
+                532: 'MoveTo',
+                544: 'OffsetClipRgn',
+                552: 'FillRegion',
+                561: 'SetMapperFlags',
+                564: 'SelectPalette',
+                1040: 'ScaleWindowExt',
+                1042: 'ScaleViewportExt',
+                1045: 'ExcludeClipRect',
+                1046: 'IntersectClipRect',
+                1048: 'Ellipse',
+                1049: 'FloodFill',
+                1051: 'Rectangle',
+                1055: 'SetPixel',
+                1065: 'FrameRegion',
+                1352: 'ExtFloodFill',
+                1564: 'RoundRect',
+                1565: 'PatBlt',
+                2071: 'Arc',
+                2074: 'Pie',
+                2096: 'Chord',
+                3379: 'SetDibToDev',
+                247: 'CreatePalette',
+                248: 'CreateBrush',
+                322: 'DibCreatePatternBrush',
+                496: 'DeleteObject',
+                505: 'CreatePatternBrush',
+                762: 'CreatePenIndirect',
+                763: 'CreateFontIndirect',
+                764: 'CreateBrushIndirect',
+                765: 'CreateBitmapIndirect',
+                804: 'Polygon',
+                805: 'Polyline',
+                1078: 'AnimatePalette',
+                1313: 'TextOut',
+                1336: 'PolyPolygon',
+                1574: 'Escape',
+                1583: 'DrawText',
+                1790: 'CreateBitmap',
+                1791: 'CreateRegion',
+                2338: 'BitBlt',
+                2368: 'DibBitblt',
+                2610: 'ExtTextOut',
+                2851: 'StretchBlt',
+                2881: 'DibStretchBlt',
+                3907: 'StretchDIBits'
+        } # }}}
+
+    def __call__(self, stream_or_data):
+        data = stream_or_data
+        if hasattr(data, 'read'):
+            data = data.read()
+        self.log.filter_level = self.log.DEBUG
+        self.header = WMFHeader(data, self.log, self.verbose)
+
+        offset = self.header.records_start_at
+        hsize = struct.calcsize('<IH')
+        self.records = []
+        while offset < len(data)-6:
+            size, func = struct.unpack_from('<IH', data, offset)
+            size *= 2 # Convert to bytes
+            offset += hsize
+            params = ''
+            delta = size - hsize
+            if delta > 0:
+                params = data[offset:offset+delta]
+                offset += delta
+
+            func = self.function_map.get(func, func)
+
+            if self.verbose > 3:
+                self.log.debug('WMF Record:', size, func)
+            self.records.append((func, params))
+
+        for rec in self.records:
+            f = getattr(self, rec[0], None)
+            if callable(f):
+                f(rec[1])
+            elif self.verbose > 2:
+                self.log.debug('Ignoring record:', rec[0])
+
+        self.has_raster_image = len(self.bitmaps) > 0
+
+
+    def SetMapMode(self, params):
+        if len(params) == 2:
+            self.map_mode = struct.unpack('<H', params)[0]
+        else:
+            self.log.warn('Invalid SetMapMode param')
+
+    def SetWindowOrg(self, params):
+        if len(params) == 4:
+            self.window_origin = struct.unpack('<HH', params)
+        elif len(params) == 8:
+            self.window_origin = struct.unpack('<II', params)
+        elif len(params) == 16:
+            self.window_origin = struct.unpack('<LL', params)
+        else:
+            self.log.warn('Invalid SetWindowOrg param', repr(params))
+
+    def SetWindowExt(self, params):
+        if len(params) == 4:
+            self.window_extent = struct.unpack('<HH', params)
+        elif len(params) == 8:
+            self.window_extent = struct.unpack('<II', params)
+        elif len(params) == 16:
+            self.window_extent = struct.unpack('<LL', params)
+        else:
+            self.log.warn('Invalid SetWindowExt param', repr(params))
+
+    def DibStretchBlt(self, raw):
+        offset = 0
+        fmt = '<IHHHHHHHH'
+        raster_op, src_height, src_width, y_src, x_src, dest_height, \
+            dest_width, y_dest, x_dest = struct.unpack_from('<IHHHHHHHH', raw, offset)
+        offset += struct.calcsize(fmt)
+        bmp_data = raw[offset:]
+        bmp = self.create_bmp_from_dib(bmp_data)
+        self.bitmaps.append(bmp)
+
+    def create_bmp_from_dib(self, raw):
+        size = len(raw) + 14
+        dh = DIBHeader(raw)
+        pixel_array_offset = dh.header_size + dh.bitmasks_size + \
+                             dh.color_table_size
+        parts = ['BM', struct.pack('<I', size), '\0'*4, struct.pack('<I',
+            pixel_array_offset)]
+        return ''.join(parts) + raw
+
+    def to_png(self):
+        bmps = list(sorted(self.bitmaps, key=lambda x: len(x)))
+        bmp = bmps[-1]
+        from calibre.utils.magick import Image
+        img = Image()
+        img.load(bmp)
+        return img.export('png')
+
+def wmf_unwrap(wmf_data):
+    '''
+    Return the largest embedded raster image in the WMF.
+    The returned data is in PNG format.
+    '''
+    w = WMF()
+    w(wmf_data)
+    if not w.has_raster_image:
+        raise ValueError('No raster image found in the WMF')
+    return w.to_png()
+
+if __name__ == '__main__':
+    wmf = WMF(verbose=4)
+    wmf(open(sys.argv[-1], 'rb'))
+    open('/t/test.bmp', 'wb').write(wmf.bitmaps[0])
+

From f48c31c4935b599171b66885d2e3e6f52ad7bf54 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jan 2011 23:29:29 -0700
Subject: [PATCH 55/55] RTF Input: Improved support for conversion of embedded
 WMF images

---
 src/calibre/ebooks/rtf/input.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index ba13668eb7..92ac8a2519 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -190,12 +190,11 @@ class RTFInput(InputFormatPlugin):
         return name
 
     def rasterize_wmf(self, name):
-        raise ValueError('Conversion of WMF images not supported')
-        from calibre.utils.wmf import extract_raster_image
+        from calibre.utils.wmf.parse import wmf_unwrap
         with open(name, 'rb') as f:
             data = f.read()
-        data = extract_raster_image(data)
-        name = name.replace('.wmf', '.jpg')
+        data = wmf_unwrap(data)
+        name = name.replace('.wmf', '.png')
         with open(name, 'wb') as f:
             f.write(data)
         return name