Sync to trunk.

2025-07-09 03:04:10 -04:00 · 2011-01-13 07:12:06 -05:00 · 2011-01-13 07:12:06 -05:00 · 87d5f40d96
commit 87d5f40d96
parent 06723a0748 d51bd60c9c
69 changed files with 2603 additions and 1784 deletions
--- a/resources/images/document-encrypt.png
+++ b/resources/images/document-encrypt.png
--- a/resources/images/news/zerohedge.png
+++ b/resources/images/news/zerohedge.png
--- a/resources/recipes/expansion_spanish.recipe
+++ b/resources/recipes/expansion_spanish.recipe
@ -1,59 +1,79 @@
 #!/usr/bin/env  python
-# -*- coding: utf-8 -*-
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__author__    = 'Gerardo Diez'
+__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
+description   = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
+__docformat__ = 'restructuredtext en'
+
 '''
-www.expansion.com
+expansion.es
 '''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+class Publico(BasicNewsRecipe):
+    title               =u'Expansion.com'
+    __author__      ='Gerardo Diez'
+    publisher       =u'Unidad Editorial Información Económica, S.L.'
+    category                ='finances, catalunya'
+    oldest_article      =1
+    max_articles_per_feed   =100
+    simultaneous_downloads  =10
+    cover_url       =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
+    timefmt         ='[%A, %d %B, %Y]'
+    encoding        ='latin'
+    language        ='es'
+    remove_javascript   =True
+    no_stylesheets      =True
+    keep_only_tags      =dict(name='div', attrs={'class':['noticia primer_elemento']})
+    remove_tags         =[
+                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
+                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
+                dict(name='span', attrs={'class':['comentarios']}),
+                dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
+                dict(name='div', attrs={'id':['comentarios_lectores_listado']})
+                            ]
+    feeds               =[
+                (u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
+                (u'Portada: Bolsas', u'http://estaticos.expansion.com/rss/mercados.xml'),
+                (u'Divisas', u'http://estaticos.expansion.com/rss/mercadosdivisas.xml'),
+                (u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
+                (u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
+                (u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),

-from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag
+                (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
+                (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
+                (u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
+                (u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
+                (u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
+                (u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),

-class Expansion(BasicNewsRecipe):
-    title                 = 'Diario Expansion'
-    __author__            = 'Darko Miletic'
-    description           = 'Lider de informacion de mercados, economica y politica'
-    publisher             = 'expansion.com'
-    category              = 'news, politics, Spain'
-    oldest_article        = 2
-    max_articles_per_feed = 100
-    no_stylesheets        = True
-    use_embedded_content  = False
-    delay                 = 1
-    encoding              = 'iso-8859-15'
-    language = 'es'
+                (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
+                (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
+                (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
+                (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
+                (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
+                (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
+                (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
+                (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
+                (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
+                (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
+                (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
+                (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),

-    direction             = 'ltr'
+                (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
+                (u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
+                (u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),

-    html2lrf_options = [
-                          '--comment'  , description
-                        , '--category' , category
-                        , '--publisher', publisher
-                        ]
+                (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
+                (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
+                (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),

-    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+                (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
+                (u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
+                (u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
+                (u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),

-    feeds              = [
-                            (u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178')
-                           ,(u'Temas del dia'   , u'http://rss.expansion.com/rss/descarga.htm?data2=178')
-                         ]
-
-
-    keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
-
-    remove_tags        = [
-                             dict(name=['object','link','script'])
-                            ,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
-                         ]
-
-    remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
-
-    def preprocess_html(self, soup):
-        soup.html['dir' ] = self.direction
-        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
-        soup.head.insert(0,mcharset)
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
+                (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
+                (u'Catalu&ntilde;a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
+                (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
+                ]

--- a/resources/recipes/msnbc.recipe
+++ b/resources/recipes/msnbc.recipe
@ -1,10 +1,9 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 msnbc.msn.com
 '''

-import re
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class MsNBC(BasicNewsRecipe):
@ -19,7 +18,16 @@ class MsNBC(BasicNewsRecipe):
    publisher              = 'msnbc.com'
    category               = 'news, USA, world'
    language               = 'en'
-    extra_css              = ' body{ font-family: sans-serif } .head{font-family: serif; font-size: xx-large; font-weight: bold; color: #CC0000} .abstract{font-weight: bold} .source{font-size: small} .updateTime{font-size: small} '
+    extra_css              = """
+                                body{ font-family: Georgia,Times,serif }
+                                .hide{display: none}
+                                .caption{font-family: Arial,sans-serif; font-size: x-small}
+                                .entry-summary{font-family: Arial,sans-serif}
+                                .copyright{font-size: 0.95em; font-style: italic}
+                                .source-org{font-size: small; font-family: Arial,sans-serif}
+                                img{display: block; margin-bottom: 0.5em}
+                                span.byline{display: none}
+                            """

    conversion_options = {
                             'comments' : description
@ -28,14 +36,20 @@ class MsNBC(BasicNewsRecipe):
                            ,'publisher': publisher
                         }

-    preprocess_regexps = [
-        (re.compile(r'</style></head>', re.DOTALL|re.IGNORECASE),lambda match: '</style>')
-       ,(re.compile(r'<div class="head">', re.DOTALL|re.IGNORECASE),lambda match: '</head><body><div class="head">'),
-    ]
+    remove_tags_before = dict(name='h1', attrs={'id':'headline'})
+    remove_tags_after = dict(name='span', attrs={'class':['copyright','Linear copyright']})
+    keep_only_tags=[
+                      dict(attrs={'id':['headline','deck','byline','source','intelliTXT']})
+                     ,dict(attrs={'class':['gl_headline','articleText','drawer-content Linear','v-center3','byline','textBodyBlack']})
+                   ]
+    remove_attributes=['property','lang','rel','xmlns:fb','xmlns:v','xmlns:dc','xmlns:dcmitype','xmlns:og','xmlns:media','xmlns:vcard','typeof','itemscope','itemtype','itemprop','about','type','size','width','height','onreadystatechange','data','border','hspace','vspace']
+
+    remove_tags      = [
+                          dict(name=['iframe','object','link','embed','meta','table'])
+                         ,dict(name='span', attrs={'class':['copyright','Linear copyright']})
+                         ,dict(name='div', attrs={'class':'social'})
+                       ]

-    remove_tags_before = dict(name='div', attrs={'class':'head'})
-    remove_tags_after = dict(name='div', attrs={'class':'copyright'})
-    remove_tags      = [dict(name=['iframe','object','link','script','form'])]

    feeds = [
               (u'US News'       , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml'      )
@ -48,11 +62,26 @@ class MsNBC(BasicNewsRecipe):
              ,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml'      )
            ]

-    def print_version(self, url):
-        return url + 'print/1/displaymode/1098/'
-
    def preprocess_html(self, soup):
-        for item in soup.head.findAll('div'):
-            item.extract()
+        for item in soup.body.findAll('html'):
+            item.name='div'
+        for item in soup.body.findAll('div'):
+            if item.has_key('id') and item['id'].startswith('vine-'):
+               item.extract()
+            if item.has_key('class') and ( item['class'].startswith('ad') or item['class'].startswith('vine')):
+               item.extract()
+        for item in soup.body.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        for item in soup.body.findAll('ol'):
+            if item.has_key('class') and item['class'].startswith('grid'):
+               item.extract()
+        for item in soup.body.findAll('span'):
+            if ( item.has_key('id') and item['id'].startswith('byLine') and item.string is None) or ( item.has_key('class') and item['class'].startswith('inline') ):
+               item.extract()
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
        return soup

--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -685,3 +685,28 @@ class NYTimes(BasicNewsRecipe):
            divTag.replaceWith(tag)

        return soup
+
+    def populate_article_metadata(self, article, soup, first):
+        shortparagraph = ""
+        try:
+            if len(article.text_summary.strip()) == 0:
+                articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
+                if articlebodies:
+                    for articlebody in articlebodies:
+                        if articlebody:
+                            paras = articlebody.findAll('p')
+                            for p in paras:
+                                refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
+                                #account for blank paragraphs and short paragraphs by appending them to longer ones
+                                if len(refparagraph) > 0:
+                                    if len(refparagraph) > 70: #approximately one line of text
+                                        article.summary = article.text_summary = shortparagraph + refparagraph
+                                        return
+                                    else:
+                                        shortparagraph = refparagraph + " "
+                                        if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
+                                            shortparagraph = shortparagraph + "- "
+        except:
+            self.log("Error creating article descriptions")
+            return
+
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -685,4 +685,27 @@ class NYTimes(BasicNewsRecipe):
            divTag.replaceWith(tag)

        return soup
+    def populate_article_metadata(self, article, soup, first):
+        shortparagraph = ""
+        try:
+            if len(article.text_summary.strip()) == 0:
+                articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
+                if articlebodies:
+                    for articlebody in articlebodies:
+                        if articlebody:
+                            paras = articlebody.findAll('p')
+                            for p in paras:
+                                refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
+                                #account for blank paragraphs and short paragraphs by appending them to longer ones
+                                if len(refparagraph) > 0:
+                                    if len(refparagraph) > 70: #approximately one line of text
+                                        article.summary = article.text_summary = shortparagraph + refparagraph
+                                        return
+                                    else:
+                                        shortparagraph = refparagraph + " "
+                                        if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
+                                            shortparagraph = shortparagraph + "- "
+        except:
+            self.log("Error creating article descriptions")
+            return

--- a/resources/recipes/technology_review.recipe
+++ b/resources/recipes/technology_review.recipe
@ -35,7 +35,6 @@ class TechnologyReview(BasicNewsRecipe):
    def get_article_url(self, article):
        return article.get('guid', article.get('id', None))

-
    def print_version(self, url):
        baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
        split1 = string.split(url,"/")
@ -43,3 +42,25 @@ class TechnologyReview(BasicNewsRecipe):
        split2= string.split(xxx,"/")
        s =  baseurl + split2[0]
        return s
+
+
+    def postprocess_html(self,soup, True):
+        #remove picture
+        headerhtml = soup.find(True, {'class':'header'})
+        headerhtml.replaceWith("")
+
+        #remove close button
+        closehtml = soup.find(True, {'class':'close'})
+        closehtml.replaceWith("")
+
+        #remove banner advertisement
+        bannerhtml = soup.find(True, {'class':'bannerad'})
+        bannerhtml.replaceWith("")
+
+        #thanks kiklop74!  This code removes all links from the text
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
+
+        return soup
--- a/resources/recipes/tyzden.recipe
+++ b/resources/recipes/tyzden.recipe
@ -28,7 +28,7 @@ class TyzdenRecipe(BasicNewsRecipe):
    if (weeknum > 1):
        weeknum -= 1

-    title = u'.tyzden ' + str(weeknum) + '/' + str(year)
+    title = u'tyzden'

    base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum)
    base_url = base_url_path + '.html'
--- a/resources/recipes/wired_daily.recipe
+++ b/resources/recipes/wired_daily.recipe
@ -2,8 +2,10 @@
 __license__   = 'GPL v3'
 __docformat__ = 'restructuredtext en'

+import re

 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.chardet import xml_to_unicode

 class Wired_Daily(BasicNewsRecipe):

@ -15,30 +17,43 @@ class Wired_Daily(BasicNewsRecipe):

    no_stylesheets = True

+    preprocess_regexps = [(re.compile(r'<head.*</head>', re.DOTALL), lambda m:
+        '<head></head>')]
+
    remove_tags_before = dict(name='div', id='content')
-    remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
-        'footer', 'advertisement', 'blog_subscription_unit',
-        'brightcove_component']),
-        {'class':'entryActions'},
-        dict(name=['noscript', 'script'])]
+    remove_tags = [dict(id=['header', 'commenting_module', 'post_nav',
+        'social_tools', 'sidebar', 'footer', 'social_wishlist', 'pgwidget',
+        'outerWrapper', 'inf_widget']),
+        {'class':['entryActions', 'advertisement', 'entryTags']},
+        dict(name=['noscript', 'script']),
+        dict(name='h4', attrs={'class':re.compile(r'rat\d+')}),
+        {'class':lambda x: x and x.startswith('contentjump')},
+        dict(name='li', attrs={'class':['entryCategories', 'entryEdit']})]
+

    feeds = [
        ('Top News', 'http://feeds.wired.com/wired/index'),
-        ('Culture', 'http://feeds.wired.com/wired/culture'),
-        ('Software', 'http://feeds.wired.com/wired/software'),
-        ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
-        ('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
-        ('Cars', 'http://feeds.wired.com/wired/cars'),
-        ('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
-        ('Gaming', 'http://feeds.wired.com/wired/gaming'),
-        ('Science', 'http://feeds.wired.com/wired/science'),
-        ('Med Tech', 'http://feeds.wired.com/wired/medtech'),
-        ('Politics', 'http://feeds.wired.com/wired/politics'),
-        ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
-        ('Commentary', 'http://feeds.wired.com/wired/commentary'),
+        ('Product Reviews',
+            'http://www.wired.com/reviews/feeds/latestProductsRss'),
+        ('Autopia', 'http://www.wired.com/autopia/feed/'),
+        ('Danger Room', 'http://www.wired.com/dangerroom/feed/'),
+        ('Epicenter', 'http://www.wired.com/epicenter/feed/'),
+        ('Gadget Lab', 'http://www.wired.com/gadgetlab/feed/'),
+        ('Geek Dad', 'http://www.wired.com/geekdad/feed/'),
+        ('Playbook', 'http://www.wired.com/playbook/feed/'),
+        ('Rawfile', 'http://www.wired.com/rawfile/feed/'),
+        ('This Day in Tech', 'http://www.wired.com/thisdayintech/feed/'),
+        ('Threat Level', 'http://www.wired.com/threatlevel/feed/'),
+        ('Underwire', 'http://www.wired.com/underwire/feed/'),
+        ('Web Monkey', 'http://www.webmonkey.com/feed/'),
+        ('Science', 'http://www.wired.com/wiredscience/feed/'),
        ]

+    def populate_article_metadata(self, article, soup, first):
+        if article.text_summary:
+            article.text_summary = xml_to_unicode(article.text_summary,
+                    resolve_entities=True)[0]
+
    def print_version(self, url):
-        return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
-
+        return url + '/all/1'

--- a/resources/recipes/zerohedge.recipe
+++ b/resources/recipes/zerohedge.recipe
@ -0,0 +1,33 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.zerohedge.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ZeroHedge(BasicNewsRecipe):
+    title                  = 'Zero Hedge'
+    __author__             = 'Darko Miletic'
+    description            = 'On a long enough timeline the survival rate for everyone drops to zero'
+    oldest_article         = 10
+    max_articles_per_feed  = 100
+    no_stylesheets         = True
+    use_embedded_content   = True
+    encoding               = 'utf8'
+    publisher              = 'zero hedge'
+    category               = 'news, USA, world, economy, politics'
+    language               = 'en'
+    masthead_url           = 'http://www.zerohedge.com/themes/newsflash/logo.png'
+    publication_type       = 'blog'
+    extra_css              = 'body{ font-family: sans-serif }'
+
+    conversion_options = {
+                             'comments' : description
+                            ,'tags'     : category
+                            ,'language' : language
+                            ,'publisher': publisher
+                         }
+
+
+    feeds = [(u'Articles', u'http://feeds.feedburner.com/zerohedge/feed')]
--- a/resources/templates/rtf.xsl
+++ b/resources/templates/rtf.xsl
@ -287,7 +287,7 @@
                <xsl:value-of select="count(preceding::rtf:footnote) + 1"/>
                <xsl:text>]</xsl:text>
            </xsl:when>
-            <xsl:when test="(@superscript = 'true')">
+            <xsl:when test="(@superscript)">
                <xsl:element name="sup">
                    <xsl:element name="span">
                        <xsl:attribute name="class">
@ -297,7 +297,7 @@
                    </xsl:element>
                </xsl:element>
            </xsl:when>
-            <xsl:when test="(@underscript = 'true')">
+            <xsl:when test="(@underscript or @subscript)">
                <xsl:element name="sub">
                    <xsl:element name="span">
                        <xsl:attribute name="class">
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -459,6 +459,18 @@ def force_unicode(obj, enc=preferred_encoding):
                        obj = obj.decode('utf-8')
    return obj

+def as_unicode(obj, enc=preferred_encoding):
+    if not isbytestring(obj):
+        try:
+            obj = unicode(obj)
+        except:
+            try:
+                obj = str(obj)
+            except:
+                obj = repr(obj)
+    return force_unicode(obj, enc=enc)
+
+

 def human_readable(size):
    """ Convert a size in bytes into a human readable form """
--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@ -91,3 +91,19 @@ class NOOK_COLOR(NOOK):

    EBOOK_DIR_MAIN = 'My Files/Books'

+    '''
+    def create_upload_path(self, path, mdata, fname, create_dirs=True):
+        filepath = NOOK.create_upload_path(self, path, mdata, fname,
+                create_dirs=create_dirs)
+        edm = self.EBOOK_DIR_MAIN.replace('/', os.sep)
+        npath = os.path.join(edm, _('News')) + os.sep
+        if npath in filepath:
+            filepath = filepath.replace(npath, os.sep.join('My Files',
+                'Magazines')+os.sep)
+            filedir = os.path.dirname(filepath)
+            if create_dirs and not os.path.exists(filedir):
+                os.makedirs(filedir)
+
+        return filepath
+    '''
+
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -88,6 +88,7 @@ class Plumber(object):
        self.ui_reporter = report_progress
        self.abort_after_input_dump = abort_after_input_dump

+        # Pipeline options {{{
        # Initialize the conversion options that are independent of input and
        # output formats. The input and output plugins can still disable these
        # options via recommendations.
@ -527,6 +528,7 @@ OptionRecommendation(name='timestamp',
    help=_('Set the book timestamp (used by the date column in calibre).')),

 ]
+        # }}}

        input_fmt = os.path.splitext(self.input)[1]
        if not input_fmt:
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -16,7 +16,6 @@ import uuid

 from lxml import etree

-from calibre import guess_type
 from calibre import prepare_string_for_xml
 from calibre.constants import __appname__, __version__
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
@ -41,7 +40,7 @@ class FB2MLizer(object):
        # in different directories. FB2 images are all in a flat layout so we rename all images
        # into a sequential numbering system to ensure there are no collisions between image names.
        self.image_hrefs = {}
-        # Mapping of toc items and their 
+        # Mapping of toc items and their
        self.toc = {}
        # Used to see whether a new <section> needs to be opened
        self.section_level = 0
@ -51,7 +50,7 @@ class FB2MLizer(object):
        self.oeb_book = oeb_book
        self.opts = opts
        self.reset_state()
-        
+
        # Used for adding <section>s and <title>s to allow readers
        # to generate toc from the document.
        if self.opts.sectionize == 'toc':
@ -75,20 +74,20 @@ class FB2MLizer(object):
        text = re.sub(r'(?miu)<p>\s*</p>', '', text)
        text = re.sub(r'(?miu)\s*</p>', '</p>', text)
        text = re.sub(r'(?miu)</p>\s*<p>', '</p>\n\n<p>', text)
-        
+
        text = re.sub(r'(?miu)<title>\s*</title>', '', text)
        text = re.sub(r'(?miu)\s+</title>', '</title>', text)
-        
+
        text = re.sub(r'(?miu)<section>\s*</section>', '', text)
        text = re.sub(r'(?miu)\s*</section>', '\n</section>', text)
        text = re.sub(r'(?miu)</section>\s*', '</section>\n\n', text)
        text = re.sub(r'(?miu)\s*<section>', '\n<section>', text)
        text = re.sub(r'(?miu)<section>\s*', '<section>\n', text)
        text = re.sub(r'(?miu)</section><section>', '</section>\n\n<section>', text)
-        
+
        if self.opts.insert_blank_line:
            text = re.sub(r'(?miu)</p>', '</p><empty-line />', text)
-        
+
        return text

    def fb2_header(self):
@ -122,7 +121,7 @@ class FB2MLizer(object):
                break
        if metadata['id'] is None:
            self.log.warn('No UUID identifier found')
-            metadata['id'] = str(uuid.uuid4()) 
+            metadata['id'] = str(uuid.uuid4())

        for key, value in metadata.items():
            if not key == 'cover':
@ -159,7 +158,7 @@ class FB2MLizer(object):

    def get_cover(self):
        cover_href = None
-        
+
        # Get the raster cover if it's available.
        if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
            id = unicode(self.oeb_book.metadata.cover[0])
@ -180,41 +179,41 @@ class FB2MLizer(object):
                for img in cover_item.xpath('//img'):
                    cover_href = cover_item.abshref(img.get('src'))
                    break
-                
+
        if cover_href:
            # Only write the image tag if it is in the manifest.
            if cover_href in self.oeb_book.manifest.hrefs.keys():
                if cover_href not in self.image_hrefs.keys():
                    self.image_hrefs[cover_href] = '_%s.jpg' % len(self.image_hrefs.keys())
            return u'<coverpage><image xlink:href="#%s" /></coverpage>' % self.image_hrefs[cover_href]
-        
-        return u'' 
+
+        return u''

    def get_text(self):
        text = ['<body>']
-        
+
        # Create main section if there are no others to create
        if self.opts.sectionize == 'nothing':
            text.append('<section>')
            self.section_level += 1
-        
+
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to FictionBook2 XML' % item.href)
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
-            
+
            # Start a <section> if we must sectionize each file or if the TOC references this page
            page_section_open = False
            if self.opts.sectionize == 'files' or self.toc.get(item.href) == 'page':
                text.append('<section>')
                page_section_open = True
                self.section_level += 1
-            
+
            text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
-            
+
            if page_section_open:
                text.append('</section>')
                self.section_level -= 1
-                
+
        # Close any open sections
        while self.section_level > 0:
            text.append('</section>')
@ -353,7 +352,7 @@ class FB2MLizer(object):
                        self.toc[page.href] = None
                elif toc_entry and elem_tree.attrib.get('id', None):
                    newlevel = toc_entry.get(elem_tree.attrib.get('id', None), None)
-                    
+
                # Start a new section if necessary
                if newlevel:
                    if not (newlevel > self.section_level):
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@ -10,7 +10,8 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors
 title_pat    = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
 author_pat   = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
 comment_pat  = re.compile(r'\{\\info.*?\{\\subject(.*?)(?<!\\)\}', re.DOTALL)
-category_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL)
+tags_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL)
+publisher_pat = re.compile(r'\{\\info.*?\{\\manager(.*?)(?<!\\)\}', re.DOTALL)

 def get_document_info(stream):
    """
@ -82,61 +83,73 @@ def decode(raw, codec):

 def get_metadata(stream):
    """ Return metadata as a L{MetaInfo} object """
-    title, author, comment, category = None, None, None, None
    stream.seek(0)
    if stream.read(5) != r'{\rtf':
-        return MetaInformation(None, None)
+        return MetaInformation(_('Unknown'))
    block = get_document_info(stream)[0]
    if not block:
-        return MetaInformation(None, None)
+        return MetaInformation(_('Unknown'))

    stream.seek(0)
    cpg = detect_codepage(stream)
    stream.seek(0)

    title_match = title_pat.search(block)
-    if title_match:
+    if title_match is not None:
        title = decode(title_match.group(1).strip(), cpg)
+    else:
+        title = _('Unknown')
    author_match = author_pat.search(block)
-    if author_match:
+    if author_match is not None:
        author = decode(author_match.group(1).strip(), cpg)
-    comment_match = comment_pat.search(block)
-    if comment_match:
-        comment = decode(comment_match.group(1).strip(), cpg)
-    category_match = category_pat.search(block)
-    if category_match:
-        category = decode(category_match.group(1).strip(), cpg)
-    mi = MetaInformation(title, author)
+    else:
+        author = None
+    mi = MetaInformation(title)
    if author:
        mi.authors = string_to_authors(author)
-    mi.comments = comment
-    mi.category = category
+
+    comment_match = comment_pat.search(block)
+    if comment_match is not None:
+        comment = decode(comment_match.group(1).strip(), cpg)
+        mi.comments = comment
+    tags_match = tags_pat.search(block)
+    if tags_match is not None:
+        tags = decode(tags_match.group(1).strip(), cpg)
+        mi.tags = tags
+    publisher_match = publisher_pat.search(block)
+    if publisher_match is not None:
+        publisher = decode(publisher_match.group(1).strip(), cpg)
+        mi.publisher = publisher
+
    return mi

-
 def create_metadata(stream, options):
-    md = r'{\info'
+    md = [r'{\info']
    if options.title:
        title = options.title.encode('ascii', 'ignore')
-        md += r'{\title %s}'%(title,)
+        md.append(r'{\title %s}'%(title,))
    if options.authors:
        au = options.authors
        if not isinstance(au, basestring):
            au = u', '.join(au)
        author = au.encode('ascii', 'ignore')
-        md += r'{\author %s}'%(author,)
-    if options.get('category', None):
-        category = options.category.encode('ascii', 'ignore')
-        md += r'{\category %s}'%(category,)
+        md.append(r'{\author %s}'%(author,))
    comp = options.comment if hasattr(options, 'comment') else options.comments
    if comp:
        comment = comp.encode('ascii', 'ignore')
-        md += r'{\subject %s}'%(comment,)
-    if len(md) > 6:
-        md += '}'
+        md.append(r'{\subject %s}'%(comment,))
+    if options.publisher:
+        publisher = options.publisher.encode('ascii', 'ignore')
+        md.append(r'{\manager %s}'%(publisher,))
+    if options.tags:
+        tags = u', '.join(options.tags)
+        tags = tags.encode('ascii', 'ignore')
+        md.append(r'{\category %s}'%(tags,))
+    if len(md) > 1:
+        md.append('}')
        stream.seek(0)
        src   = stream.read()
-        ans = src[:6] + md + src[6:]
+        ans = src[:6] + u''.join(md) + src[6:]
        stream.seek(0)
        stream.write(ans)

@ -156,7 +169,7 @@ def set_metadata(stream, options):

        base_pat = r'\{\\name(.*?)(?<!\\)\}'
        title = options.title
-        if title != None:
+        if title is not None:
            title = title.encode('ascii', 'replace')
            pat = re.compile(base_pat.replace('name', 'title'), re.DOTALL)
            if pat.search(src):
@ -164,7 +177,7 @@ def set_metadata(stream, options):
            else:
                src = add_metadata_item(src, 'title', title)
        comment = options.comments
-        if comment != None:
+        if comment is not None:
            comment = comment.encode('ascii', 'replace')
            pat = re.compile(base_pat.replace('name', 'subject'), re.DOTALL)
            if pat.search(src):
@ -172,7 +185,7 @@ def set_metadata(stream, options):
            else:
                src = add_metadata_item(src, 'subject', comment)
        author = options.authors
-        if author != None:
+        if author is not None:
            author =  ', '.join(author)
            author = author.encode('ascii', 'ignore')
            pat = re.compile(base_pat.replace('name', 'author'), re.DOTALL)
@ -180,14 +193,23 @@ def set_metadata(stream, options):
                src = pat.sub(r'{\\author ' + author + r'}', src)
            else:
                src = add_metadata_item(src, 'author', author)
-        category = options.get('category', None)
-        if category != None:
-            category = category.encode('ascii', 'replace')
+        tags = options.tags
+        if tags is not None:
+            tags =  ', '.join(tags)
+            tags = tags.encode('ascii', 'replace')
            pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL)
            if pat.search(src):
-                src = pat.sub(r'{\\category ' + category + r'}', src)
+                src = pat.sub(r'{\\category ' + tags + r'}', src)
            else:
-                src = add_metadata_item(src, 'category', category)
+                src = add_metadata_item(src, 'category', tags)
+        publisher = options.publisher
+        if publisher is not None:
+            publisher = publisher.encode('ascii', 'replace')
+            pat = re.compile(base_pat.replace('name', 'manager'), re.DOTALL)
+            if pat.search(src):
+                src = pat.sub(r'{\\manager ' + publisher + r'}', src)
+            else:
+                src = add_metadata_item(src, 'manager', publisher)
        stream.seek(pos + olen)
        after = stream.read()
        stream.seek(pos)
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -77,7 +77,15 @@ class RTFInput(InputFormatPlugin):

    def generate_xml(self, stream):
        from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
-        ofile = 'out.xml'
+        ofile = 'dataxml.xml'
+        run_lev, debug_dir = 1, None
+        if getattr(self.opts, 'debug_pipeline', None) is not None:
+            try:
+                os.mkdir(debug_dir)
+                debug_dir = 'rtfdebug'
+                run_lev = 4
+            except:
+                pass
        parser = ParseRtf(
            in_file    = stream,
            out_file   = ofile,
@ -115,43 +123,45 @@ class RTFInput(InputFormatPlugin):

            # Write or do not write paragraphs. Default is 0.
            empty_paragraphs = 1,
+
+            #debug
+            deb_dir = debug_dir,
+            run_level = run_lev,
        )
        parser.parse_rtf()
-        ans = open('out.xml').read()
-        os.remove('out.xml')
-        return ans
+        with open(ofile, 'rb') as f:
+            return f.read()

    def extract_images(self, picts):
+        import imghdr
        self.log('Extracting images...')

+        with open(picts, 'rb') as f:
+            raw = f.read()
+        picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
+        hex = re.compile(r'[^a-fA-F0-9]')
+        encs = [hex.sub('', pict) for pict in picts]
+
        count = 0
-        raw = open(picts, 'rb').read()
-        starts = []
-        for match in re.finditer(r'\{\\pict([^}]+)\}', raw):
-            starts.append(match.start(1))
-
        imap = {}
-
-        for start in starts:
-            pos, bc = start, 1
-            while bc > 0:
-                if raw[pos] == '}': bc -= 1
-                elif raw[pos] == '{': bc += 1
-                pos += 1
-            pict = raw[start:pos+1]
-            enc = re.sub(r'[^a-zA-Z0-9]', '', pict)
+        for enc in encs:
            if len(enc) % 2 == 1:
                enc = enc[:-1]
            data = enc.decode('hex')
+            fmt = imghdr.what(None, data)
+            if fmt is None:
+                fmt = 'wmf'
            count += 1
-            name = (('%4d'%count).replace(' ', '0'))+'.wmf'
-            open(name, 'wb').write(data)
+            name = '%04d.%s' % (count, fmt)
+            with open(name, 'wb') as f:
+                f.write(data)
            imap[count] = name
            #open(name+'.hex', 'wb').write(enc)
        return self.convert_images(imap)

    def convert_images(self, imap):
-        for count, val in imap.items():
+        self.default_img = None
+        for count, val in imap.iteritems():
            try:
                imap[count] = self.convert_image(val)
            except:
@ -159,11 +169,35 @@ class RTFInput(InputFormatPlugin):
        return imap

    def convert_image(self, name):
-        from calibre.utils.magick import Image
-        img = Image()
-        img.open(name)
+        if not name.endswith('.wmf'):
+            return name
+        try:
+            return self.rasterize_wmf(name)
+        except:
+            self.log.exception('Failed to convert WMF image %r'%name)
+        return self.replace_wmf(name)
+
+    def replace_wmf(self, name):
+        from calibre.ebooks import calibre_cover
+        if self.default_img is None:
+            self.default_img = calibre_cover('Conversion of WMF images is not supported',
+            'Use Microsoft Word or OpenOffice to save this RTF file'
+            ' as HTML and convert that in calibre.', title_size=36,
+            author_size=20)
        name = name.replace('.wmf', '.jpg')
-        img.save(name)
+        with open(name, 'wb') as f:
+            f.write(self.default_img)
+        return name
+
+    def rasterize_wmf(self, name):
+        raise ValueError('Conversion of WMF images not supported')
+        from calibre.utils.wmf import extract_raster_image
+        with open(name, 'rb') as f:
+            data = f.read()
+        data = extract_raster_image(data)
+        name = name.replace('.wmf', '.jpg')
+        with open(name, 'wb') as f:
+            f.write(data)
        return name


@ -192,27 +226,27 @@ class RTFInput(InputFormatPlugin):
        css += '\n'+'\n'.join(font_size_classes)
        css += '\n' +'\n'.join(color_classes)

-        for cls, val in border_styles.items():
+        for cls, val in border_styles.iteritems():
            css += '\n\n.%s {\n%s\n}'%(cls, val)

        with open('styles.css', 'ab') as f:
            f.write(css)

-    def preprocess(self, fname):
-        self.log('\tPreprocessing to convert unicode characters')
-        try:
-            data = open(fname, 'rb').read()
-            from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
-            tokenizer = RtfTokenizer(data)
-            tokens = RtfTokenParser(tokenizer.tokens)
-            data = tokens.toRTF()
-            fname = 'preprocessed.rtf'
-            with open(fname, 'wb') as f:
-                f.write(data)
-        except:
-            self.log.exception(
-            'Failed to preprocess RTF to convert unicode sequences, ignoring...')
-        return fname
+    # def preprocess(self, fname):
+        # self.log('\tPreprocessing to convert unicode characters')
+        # try:
+            # data = open(fname, 'rb').read()
+            # from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
+            # tokenizer = RtfTokenizer(data)
+            # tokens = RtfTokenParser(tokenizer.tokens)
+            # data = tokens.toRTF()
+            # fname = 'preprocessed.rtf'
+            # with open(fname, 'wb') as f:
+                # f.write(data)
+        # except:
+            # self.log.exception(
+            # 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
+        # return fname

    def convert_borders(self, doc):
        border_styles = []
@ -249,17 +283,14 @@ class RTFInput(InputFormatPlugin):
        self.log = log
        self.log('Converting RTF to XML...')
        #Name of the preprocesssed RTF file
-        fname = self.preprocess(stream.name)
+        # fname = self.preprocess(stream.name)
        try:
-            xml = self.generate_xml(fname)
+            xml = self.generate_xml(stream.name)
        except RtfInvalidCodeException, e:
+            raise
            raise ValueError(_('This RTF file has a feature calibre does not '
            'support. Convert it to HTML first and then try it.\n%s')%e)

-        '''dataxml = open('dataxml.xml', 'w')
-        dataxml.write(xml)
-        dataxml.close'''
-
        d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
        if d:
            imap = {}
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -17,7 +17,8 @@
 #########################################################################
 # $Revision: 1.41 $
 # $Date: 2006/03/24 23:50:07 $
-import sys,os
+import sys, os
+
 from calibre.ebooks.rtf2xml import headings_to_sections, \
    line_endings, footnote, fields_small, default_encoding, \
    make_lists, preamble_div, header, colors, group_borders, \
@ -90,7 +91,6 @@ class ParseRtf:
                out_file = '',
                out_dir = None,
                dtd = '',
-                #debug = 0, #why? calibre
                deb_dir = None,
                convert_symbol = None,
                convert_wingdings = None,
@ -107,6 +107,7 @@ class ParseRtf:
                no_dtd = 0,
                char_data = '',
                ):
+
        """
        Requires:
        'file' --file to parse
@ -119,12 +120,11 @@ class ParseRtf:
            script tries to output to directory where is script is exectued.)
            'deb_dir' --debug directory. If a debug_dir is provided, the script
            will copy each run through as a file to examine in the debug_dir
-            'perl_script'--use perl to make tokens. This runs just a bit faster.
-            (I will probably phase this out.)
            'check_brackets' -- make sure the brackets match up after each run
            through a file. Only for debugging.
        Returns: Nothing
        """
+
        self.__file = in_file
        self.__out_file = out_file
        self.__out_dir = out_dir
@ -132,7 +132,7 @@ class ParseRtf:
        self.__dtd_path = dtd
        self.__check_file(in_file,"file_to_parse")
        self.__char_data = char_data
-        self.__debug_dir = deb_dir #self.__debug_dir = debug calibre
+        self.__debug_dir = deb_dir
        self.__check_dir(self.__temp_dir)
        self.__copy = self.__check_dir(self.__debug_dir)
        self.__convert_caps = convert_caps
@ -155,25 +155,24 @@ class ParseRtf:
        if hasattr(the_file, 'read'): return
        if the_file == None:
            if type == "file_to_parse":
-                message = "You must provide a file for the script to work"
-            msg = message
+                msg = "\nYou must provide a file for the script to work"
            raise RtfInvalidCodeException, msg
        elif os.path.exists(the_file):
            pass # do nothing
        else:
-            message = "The file '%s' cannot be found" % the_file
-            msg = message
+            msg = "\nThe file '%s' cannot be found" % the_file
            raise RtfInvalidCodeException, msg
+
    def __check_dir(self, the_dir):
        """Check to see if directory exists"""
        if not the_dir :
            return
        dir_exists = os.path.isdir(the_dir)
        if not dir_exists:
-            message = "%s is not a directory" % the_dir
-            msg = message
+            msg = "\n%s is not a directory" % the_dir
            raise RtfInvalidCodeException, msg
        return 1
+
    def parse_rtf(self):
        """
        Parse the file by calling on other classes.
@ -194,13 +193,14 @@ class ParseRtf:
            copy_obj.set_dir(self.__debug_dir)
            copy_obj.remove_files()
            copy_obj.copy_file(self.__temp_file, "original_file")
-        # new as of 2005-08-02. Do I want this?
+        # Function to check if bracket are well handled
        if self.__debug_dir or self.__run_level > 2:
            self.__check_brack_obj = check_brackets.CheckBrackets\
            (file = self.__temp_file,
                bug_handler = RtfInvalidCodeException,
                    )
-        # convert Macintosh line endings to Unix line endings
+        #convert Macintosh and Windows line endings to Unix line endings
+        #why do this if you don't wb after?
        line_obj = line_endings.FixLineEndings(
                in_file = self.__temp_file,
                bug_handler = RtfInvalidCodeException,
@ -208,13 +208,13 @@ class ParseRtf:
                run_level = self.__run_level,
                replace_illegals = self.__replace_illegals,
                )
-        return_value = line_obj.fix_endings()
+        return_value = line_obj.fix_endings() #calibre return what?
        self.__return_code(return_value)
        tokenize_obj = tokenize.Tokenize(
                bug_handler = RtfInvalidCodeException,
                in_file = self.__temp_file,
                copy = self.__copy,
-                run_level = self.__run_level,)
+                run_level = self.__run_level)
        tokenize_obj.tokenize()
        process_tokens_obj = process_tokens.ProcessTokens(
            in_file = self.__temp_file,
@ -230,12 +230,25 @@ class ParseRtf:
                os.remove(self.__temp_file)
            except OSError:
                pass
+            #Check to see if the file is correctly encoded
+            encode_obj = default_encoding.DefaultEncoding(
+            in_file = self.__temp_file,
+            run_level = self.__run_level,
+            bug_handler = RtfInvalidCodeException,
+            check_raw = True,
+            )
+            platform, code_page, default_font_num = encode_obj.find_default_encoding()
            check_encoding_obj = check_encoding.CheckEncoding(
-                bug_handler = RtfInvalidCodeException,
-                    )
-            check_encoding_obj.check_encoding(self.__file)
-            sys.stderr.write('File "%s" does not appear to be RTF.\n' % self.__file if isinstance(self.__file, str) else self.__file.encode('utf-8'))
-            raise InvalidRtfException, msg
+                    bug_handler = RtfInvalidCodeException,
+                        )
+            enc = encode_obj.get_codepage()
+            if enc != 'mac_roman':
+                enc = 'cp' + enc
+            if check_encoding_obj.check_encoding(self.__file, enc):
+                file_name = self.__file if isinstance(self.__file, str) \
+                                    else self.__file.encode('utf-8')
+                msg = 'File %s does not appear to be correctly encoded.\n' % file_name
+                raise InvalidRtfException, msg
        delete_info_obj = delete_info.DeleteInfo(
            in_file = self.__temp_file,
            copy = self.__copy,
@ -508,6 +521,7 @@ class ParseRtf:
                indent = self.__indent,
                run_level = self.__run_level,
                no_dtd = self.__no_dtd,
+                encoding = encode_obj.get_codepage(),
                bug_handler = RtfInvalidCodeException,
                )
        tags_obj.convert_to_tags()
@ -520,35 +534,28 @@ class ParseRtf:
        output_obj.output()
        os.remove(self.__temp_file)
        return self.__exit_level
+
    def __bracket_match(self, file_name):
        if self.__run_level > 2:
            good_br, msg =  self.__check_brack_obj.check_brackets()
            if good_br:
                pass
-                # sys.stderr.write( msg + ' in ' + file_name + "\n")
+                #sys.stderr.write( msg + ' in ' + file_name + "\n")
            else:
-                msg += msg +  " in file '" + file_name + "'\n"
+                msg = '%s in file %s\n' % (msg, file_name)
                raise RtfInvalidCodeException, msg
+
    def __return_code(self, num):
-        if num == None:
-            return
-        if int(num) > self.__exit_level:
-            self.__exit_level = num
+      if num == None:
+          return
+      if int(num) > self.__exit_level:
+          self.__exit_level = num
+
    def __make_temp_file(self,file):
        """Make a temporary file to parse"""
        write_file="rtf_write_file"
        read_obj = file if hasattr(file, 'read') else open(file,'r')
-        write_obj = open(write_file, 'w')
-        line = "dummy"
-        while line:
-            line = read_obj.read(1000)
-            write_obj.write(line )
-        write_obj.close()
+        with open(write_file, 'wb') as write_obj:
+            for line in read_obj:
+                write_obj.write(line)
        return write_file
-    """
-mi<tg<open______<style-sheet\n
-mi<tg<close_____<style-sheet\n
-mi<tg<open-att__<footnote<num>1\n
-mi<tg<empty-att_<page-definition<margin>33\n
-mi<tg<empty_____<para\n
-"""
--- a/src/calibre/ebooks/rtf2xml/check_brackets.py
+++ b/src/calibre/ebooks/rtf2xml/check_brackets.py
@ -24,38 +24,38 @@ class CheckBrackets:
        self.__ob_count = 0
        self.__cb_count = 0
        self.__open_bracket_num = []
+
    def open_brack(self, line):
        num = line[-5:-1]
        self.__open_bracket_num.append(num)
        self.__bracket_count += 1
+
    def close_brack(self, line):
        num = line[-5:-1]
-        ##self.__open_bracket_num.append(num)
        try:
            last_num = self.__open_bracket_num.pop()
        except:
-            return 0
+            return False
        if num != last_num:
-            return 0
+            return False
        self.__bracket_count -= 1
-        return 1
+        return True
+
    def check_brackets(self):
-        read_obj = open(self.__file, 'r')
-        line = 'dummy'
        line_count = 0
-        while line:
-            line_count += 1
-            line = read_obj.readline()
-            self.__token_info = line[:16]
-            if self.__token_info == 'ob<nu<open-brack':
-                self.open_brack(line)
-            if self.__token_info == 'cb<nu<clos-brack':
-                right_count = self.close_brack(line)
-                if not right_count:
-                    return (0, "closed bracket doesn't match, line %s" % line_count)
-        read_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            for line in read_obj:
+                line_count += 1
+                self.__token_info = line[:16]
+                if self.__token_info == 'ob<nu<open-brack':
+                    self.open_brack(line)
+                if self.__token_info == 'cb<nu<clos-brack':
+                    if not self.close_brack(line):
+                        return (False, "closed bracket doesn't match, line %s" % line_count)
+
        if self.__bracket_count != 0:
-            msg = 'At end of file open and closed brackets don\'t match\n'
-            msg = msg + 'total number of brackets is %s' % self.__bracket_count
-            return (0, msg)
-        return (1, "brackets match!")
+            msg = ('At end of file open and closed brackets don\'t match\n' \
+                        'total number of brackets is %s') % self.__bracket_count
+            return (False, msg)
+        return (True, "Brackets match!")
+
--- a/src/calibre/ebooks/rtf2xml/check_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/check_encoding.py
@ -1,8 +1,11 @@
 #!/usr/bin/env python
 import sys
+
 class CheckEncoding:
+
    def __init__(self, bug_handler):
        self.__bug_handler = bug_handler
+
    def __get_position_error(self, line, encoding, line_num):
        char_position = 0
        for char in line:
@ -12,21 +15,23 @@ class CheckEncoding:
            except UnicodeError, msg:
                sys.stderr.write('line: %s char: %s\n' %  (line_num, char_position))
                sys.stderr.write(str(msg) + '\n')
-    def check_encoding(self, path, encoding='us-ascii'):
-        read_obj = open(path, 'r')
-        line_to_read = 1
+
+    def check_encoding(self, path, encoding='us-ascii', verbose=True):
        line_num = 0
-        while line_to_read:
-            line_num += 1
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            try:
-                line.decode(encoding)
-            except UnicodeError:
-                if len(line) < 1000:
-                    self.__get_position_error(line, encoding, line_num)
-                else:
-                    sys.stderr.write('line: %d has bad encoding\n'%line_num)
+        with open(path, 'r') as read_obj:
+            for line in read_obj:
+                line_num += 1
+                try:
+                    line.decode(encoding)
+                except UnicodeError:
+                    if verbose:
+                        if len(line) < 1000:
+                            self.__get_position_error(line, encoding, line_num)
+                        else:
+                            sys.stderr.write('line: %d has bad encoding\n' % line_num)
+                    return True
+        return False
+
 if __name__ == '__main__':
    check_encoding_obj = CheckEncoding()
    check_encoding_obj.check_encoding(sys.argv[1])
--- a/src/calibre/ebooks/rtf2xml/combine_borders.py
+++ b/src/calibre/ebooks/rtf2xml/combine_borders.py
@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+
 class CombineBorders:
    """Combine borders in RTF tokens to make later processing easier"""
    def __init__(self,
@ -32,28 +34,31 @@ class CombineBorders:
        self.__state = 'default'
        self.__bord_pos = 'default'
        self.__bord_att = []
+
    def found_bd(self, line):
        #cw<bd<bor-t-r-vi
        self.__state = 'border'
        self.__bord_pos = line[6:16]
+
    def __default_func(self, line):
        #cw<bd<bor-t-r-vi
        if self.__first_five == 'cw<bd':
            self.found_bd(line)
            return ''
        return line
+
    def end_border(self, line, write_obj):
-        joiner = "|"
-        border_string = joiner.join(self.__bord_att)
+        border_string = "|".join(self.__bord_att)
        self.__bord_att = []
        write_obj.write('cw<bd<%s<nu<%s\n' % (self.__bord_pos,
-        border_string))
+                                                border_string))
        self.__state = 'default'
        self.__bord_string = ''
        if self.__first_five == 'cw<bd':
            self. found_bd(line)
        else:
            write_obj.write(line)
+
    def add_to_border_desc(self, line):
        #cw<bt<bdr-hair__<nu<true
        #cw<bt<bdr-linew<nu<0.50
@ -65,26 +70,22 @@ class CombineBorders:
        else:
            num = ':' + num
        self.__bord_att.append(border_desc + num)
+
    def __border_func(self, line, write_obj):
        if self.__first_five != 'cw<bt':
            self.end_border(line, write_obj)
        else:
            self.add_to_border_desc(line)
+
    def combine_borders(self):
-        read_obj = open(self.__file, 'r')
-        write_obj = open(self.__write_to, 'w')
-        line_to_read = 'dummy'
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__first_five = line[0:5]
-            if self.__state == 'border':
-                self.__border_func(line, write_obj)
-            else:
-                to_print = self.__default_func(line)
-                write_obj.write(to_print)
-        read_obj.close()
-        write_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as write_obj:
+                for line in read_obj:
+                    self.__first_five = line[0:5]
+                    if self.__state == 'border':
+                        self.__border_func(line, write_obj)
+                    else:
+                        write_obj.write(self.__default_func(line))
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "combine_borders.data")
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@ -1,6 +1,9 @@
-import os, tempfile
-from calibre.ebooks.rtf2xml import copy
+import os, tempfile, sys
+
+from calibre.ebooks.rtf2xml import copy, check_encoding
+
 public_dtd = 'rtf2xml1.0.dtd'
+
 class ConvertToTags:
    """
    Convert file to XML
@ -10,6 +13,7 @@ class ConvertToTags:
            bug_handler,
            dtd_path,
            no_dtd,
+            encoding,
            indent = None,
            copy = None,
            run_level = 1,
@ -29,9 +33,14 @@ class ConvertToTags:
        self.__copy = copy
        self.__dtd_path = dtd_path
        self.__no_dtd = no_dtd
+        if encoding != 'mac_roman':
+            self.__encoding = 'cp' + encoding
+        else:
+            self.__encoding = 'mac_roman'
        self.__indent = indent
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
+
    def __initiate_values(self):
        """
        Set values, including those for the dictionary.
@ -61,6 +70,7 @@ class ConvertToTags:
        'tx<ut<__________'  :   self.__text_func,
        'mi<tg<empty_____'  :   self.__empty_func,
        }
+
    def __open_func(self, line):
        """
        Print the opening tag and newlines when needed.
@ -73,6 +83,7 @@ class ConvertToTags:
        if info in self.__two_new_line:
            self.__write_extra_new_line()
        self.__write_obj.write('<%s>' % info)
+
    def __empty_func(self, line):
        """
        Print out empty tag and newlines when needed.
@ -85,10 +96,11 @@ class ConvertToTags:
            self.__write_new_line()
        if info in self.__two_new_line:
            self.__write_extra_new_line()
+
    def __open_att_func(self, line):
        """
        Process lines for open tags that have attributes.
-        The important infor is between [17:-1]. Take this info and split it
+        The important info is between [17:-1]. Take this info and split it
        with the delimeter '<'. The first token in this group is the element
        name. The rest are attributes, separated fromt their values by '>'. So
        read each token one at a time, and split them by '>'.
@ -119,6 +131,7 @@ class ConvertToTags:
            self.__write_new_line()
        if element_name in self.__two_new_line:
            self.__write_extra_new_line()
+
    def __empty_att_func(self, line):
        """
        Same as the __open_att_func, except a '/' is placed at the end of the tag.
@ -143,6 +156,7 @@ class ConvertToTags:
            self.__write_new_line()
        if element_name in self.__two_new_line:
            self.__write_extra_new_line()
+
    def __close_func(self, line):
        """
        Print out the closed tag and new lines, if appropriate.
@ -156,6 +170,7 @@ class ConvertToTags:
            self.__write_new_line()
        if info in self.__two_new_line:
            self.__write_extra_new_line()
+
    def __text_func(self, line):
        """
        Simply print out the information between [17:-1]
@ -163,6 +178,7 @@ class ConvertToTags:
        #tx<nu<__________<Normal;
        # change this!
        self.__write_obj.write(line[17:-1])
+
    def __write_extra_new_line(self):
        """
        Print out extra new lines if the new lines have not exceeded two. If
@ -172,8 +188,10 @@ class ConvertToTags:
            return
        if self.__new_line < 2:
            self.__write_obj.write('\n')
+
    def __default_func(self, line):
        pass
+
    def __write_new_line(self):
        """
        Print out a new line if a new line has not already been printed out.
@ -183,11 +201,23 @@ class ConvertToTags:
        if not self.__new_line:
            self.__write_obj.write('\n')
            self.__new_line += 1
+
    def __write_dec(self):
        """
        Write the XML declaration at the top of the document.
        """
-        self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
+        #keep maximum compatibility with previous version
+        check_encoding_obj = check_encoding.CheckEncoding(
+                    bug_handler=self.__bug_handler)
+
+        if not check_encoding_obj.check_encoding(self.__file, verbose=False):
+            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
+        elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
+            self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
+        else:
+            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
+            sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
+                    ' hope for the best')
        self.__new_line = 0
        self.__write_new_line()
        if self.__no_dtd:
@ -207,6 +237,7 @@ class ConvertToTags:
            )
        self.__new_line = 0
        self.__write_new_line()
+
    def convert_to_tags(self):
        """
        Read in the file one line at a time. Get the important info, between
@ -222,18 +253,14 @@ class ConvertToTags:
            an empty tag function.
            """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
        self.__write_obj = open(self.__write_to, 'w')
        self.__write_dec()
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            action = self.__state_dict.get(self.__token_info)
-            if action != None:
-                action(line)
-        read_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            for line in read_obj:
+                self.__token_info = line[:16]
+                action = self.__state_dict.get(self.__token_info)
+                if action is not None:
+                    action(line)
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
--- a/src/calibre/ebooks/rtf2xml/copy.py
+++ b/src/calibre/ebooks/rtf2xml/copy.py
@ -23,6 +23,7 @@ class Copy:
    def __init__(self, bug_handler, file = None, deb_dir = None, ):
        self.__file = file
        self.__bug_handler = bug_handler
+
    def set_dir(self, deb_dir):
        """Set the temporary directory to write files to"""
        if deb_dir is None:
@ -33,19 +34,11 @@ class Copy:
            message = "%(deb_dir)s is not a directory" % vars()
            raise self.__bug_handler , message
        Copy.__dir = deb_dir
+
    def remove_files(self ):
        """Remove files from directory"""
        self.__remove_the_files(Copy.__dir)
-        """
-        list_of_files = os.listdir(Copy.__dir)
-        list_of_files = os.listdir(the_dir)
-        for file in list_of_files:
-            rem_file = os.path.join(Copy.__dir,file)
-            if os.path.isdir(rem_file):
-                self.remove_files(rem_file)
-            else:
-                os.remove(rem_file)
-        """
+
    def __remove_the_files(self, the_dir):
        """Remove files from directory"""
        list_of_files = os.listdir(the_dir)
@ -58,6 +51,7 @@ class Copy:
                    os.remove(rem_file)
                except OSError:
                    pass
+
    def copy_file(self, file, new_file):
        """
        Copy the file to a new name
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@ -1,61 +1,142 @@
 #########################################################################
 #                                                                       #
-#                                                                       #
 #   copyright 2002 Paul Henry Tremblay                                  #
 #                                                                       #
-#   This program is distributed in the hope that it will be useful,     #
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of      #
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU    #
-#   General Public License for more details.                            #
-#                                                                       #
-#   You should have received a copy of the GNU General Public License   #
-#   along with this program; if not, write to the Free Software         #
-#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA            #
-#   02111-1307 USA                                                      #
-#                                                                       #
-#                                                                       #
 #########################################################################
+
+'''
+Codepages as to RTF 1.9.1:
+    437	United States IBM
+    708	Arabic (ASMO 708)
+    709	Arabic (ASMO 449+, BCON V4)
+    710	Arabic (transparent Arabic)
+    711	Arabic (Nafitha Enhanced)
+    720	Arabic (transparent ASMO)
+    819	Windows 3.1 (United States and Western Europe)
+    850	IBM multilingual
+    852	Eastern European
+    860	Portuguese
+    862	Hebrew
+    863	French Canadian
+    864	Arabic
+    865	Norwegian
+    866	Soviet Union
+    874	Thai
+    932	Japanese
+    936	Simplified Chinese
+    949	Korean
+    950	Traditional Chinese
+    1250	Eastern European
+    1251	Cyrillic
+    1252	Western European
+    1253	Greek
+    1254	Turkish
+    1255	Hebrew
+    1256	Arabic
+    1257	Baltic
+    1258	Vietnamese
+    1361	Johab
+    10000	MAC Roman
+    10001	MAC Japan
+    10004	MAC Arabic
+    10005	MAC Hebrew
+    10006	MAC Greek
+    10007	MAC Cyrillic
+    10029	MAC Latin2
+    10081	MAC Turkish
+    57002	Devanagari
+    57003	Bengali
+    57004	Tamil
+    57005	Telugu
+    57006	Assamese
+    57007	Oriya
+    57008	Kannada
+    57009	Malayalam
+    57010	Gujarati
+    57011	Punjabi
+'''
+import re
+
 class DefaultEncoding:
    """
    Find the default encoding for the doc
    """
-    def __init__(self, in_file, bug_handler, run_level = 1,):
-        """
-        Required:
-            'file'
-        Returns:
-            nothing
-            """
+    def __init__(self, in_file, bug_handler, run_level = 1, check_raw = False):
        self.__file = in_file
        self.__bug_handler = bug_handler
+        self.__platform = 'Windows'
+        self.__default_num = 'not-defined'
+        self.__code_page = '1252'
+        self.__datafetched = False
+        self.__fetchraw = check_raw
+
    def find_default_encoding(self):
-        platform = 'Windows'
-        default_num = 'not-defined'
-        code_page = 'ansicpg1252'
-        read_obj = open(self.__file, 'r')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            if self.__token_info == 'mi<mk<rtfhed-end':
-                break
-            if self.__token_info == 'cw<ri<ansi-codpg':
-                #cw<ri<ansi-codpg<nu<10000
-                num = line[20:-1]
-                if not num:
-                    num = '1252'
-                code_page = 'ansicpg' + num
-            if self.__token_info == 'cw<ri<macintosh_':
-                platform = 'Macintosh'
-            if self.__token_info == 'cw<ri<deflt-font':
-                default_num = line[20:-1]
-                #cw<ri<deflt-font<nu<0
-            #action = self.__state_dict.get(self.__state)
-            #if action == None:
-                #print self.__state
-            #action(line)
-        read_obj.close()
-        if platform == 'Macintosh':
-            code_page = 'mac_roman'
-        return platform, code_page, default_num
+        if not self.__datafetched:
+            self._encoding()
+            self.__datafetched = True
+        if self.__platform == 'Macintosh':
+            code_page = self.__code_page
+        else:
+            code_page = 'ansicpg' + self.__code_page
+        return self.__platform, code_page, self.__default_num
+
+    def get_codepage(self):
+        if not self.__datafetched:
+            self._encoding()
+            self.__datafetched = True
+        return self.__code_page
+
+    def get_platform(self):
+        if not self.__datafetched:
+            self._encoding()
+            self.__datafetched = True
+        return self.__platform
+
+    def _encoding(self):
+        with open(self.__file, 'r') as read_obj:
+            if not self.__fetchraw:
+                for line in read_obj:
+                    self.__token_info = line[:16]
+                    if self.__token_info == 'mi<mk<rtfhed-end':
+                        break
+                    if self.__token_info == 'cw<ri<ansi-codpg':
+                        #cw<ri<ansi-codpg<nu<10000
+                        self.__code_page = line[20:-1] if int(line[20:-1]) \
+                                            else '1252'
+                    if self.__token_info == 'cw<ri<macintosh_':
+                        self.__platform = 'Macintosh'
+                        self.__code_page = 'mac_roman'
+                    elif self.__token_info == 'cw<ri<pc________':
+                        self.__platform = 'IBMPC'
+                        self.__code_page = '437'
+                    elif self.__token_info == 'cw<ri<pca_______':
+                        self.__platform = 'OS/2'
+                        self.__code_page = '850'
+                    if self.__token_info == 'cw<ri<deflt-font':
+                        self.__default_num = line[20:-1]
+                        #cw<ri<deflt-font<nu<0
+            else:
+                fenc = re.compile(r'\\(mac|pc|ansi|pca)[\\ \{\}\t\n]+')
+                fenccp = re.compile(r'\\ansicpg(\d+)[\\ \{\}\t\n]+')
+                for line in read_obj:
+                    if fenccp.search(line):
+                        cp = fenccp.search(line).group(1)
+                        if not int(cp):
+                            self.__code_page = cp
+                        break
+                    if fenc.search(line):
+                        enc = fenc.search(line).group(1)
+                        if enc == 'mac':
+                            self.__code_page = 'mac_roman'
+                        elif enc == 'pc':
+                            self.__code_page = '437'
+                        elif enc == 'pca':
+                            self.__code_page = '850'
+
+# if __name__ == '__main__':
+    # encode_obj = DefaultEncoding(
+            # in_file = sys.argv[1],
+            # bug_handler = Exception,
+            # check_raw = True,
+            # )
+    # print encode_obj.get_codepage()
--- a/src/calibre/ebooks/rtf2xml/delete_info.py
+++ b/src/calibre/ebooks/rtf2xml/delete_info.py
@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import sys, os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+
 class DeleteInfo:
    """Delelet unecessary destination groups"""
    def __init__(self,
@ -29,17 +31,18 @@ class DeleteInfo:
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__write_to = tempfile.mktemp()
-        self.__bracket_count=0
+        self.__bracket_count= 0
        self.__ob_count = 0
        self.__cb_count = 0
-        self.__after_asterisk = 0
-        self.__delete = 0
+        # self.__after_asterisk = False
+        # self.__delete = 0
        self.__initiate_allow()
        self.__ob = 0
-        self.__write_cb = 0
+        self.__write_cb = False
        self.__run_level = run_level
-        self.__found_delete = 0
-        self.__list = 0
+        self.__found_delete = False
+        # self.__list = False
+
    def __initiate_allow(self):
        """
        Initiate a list of destination groups which should be printed out.
@ -66,9 +69,10 @@ class DeleteInfo:
        self.__state_dict = {
            'default'           : self.__default_func,
            'after_asterisk'    : self.__asterisk_func,
-            'delete'           : self.__delete_func,
+            'delete'            : self.__delete_func,
            'list'              : self.__list_func,
        }
+
    def __default_func(self,line):
        """Handle lines when in no special state. Look for an asterisk to
        begin a special state. Otherwise, print out line."""
@ -81,27 +85,29 @@ class DeleteInfo:
            if self.__ob:
                self.__write_obj.write(self.__ob)
            self.__ob = line
-            return 0
+            return False
        else:
            # write previous bracket, since didn't fine asterisk
            if self.__ob:
                self.__write_obj.write(self.__ob)
                self.__ob = 0
-            return 1
+            return True
+
    def __delete_func(self,line):
        """Handle lines when in delete state. Don't print out lines
        unless the state has ended."""
        if self.__delete_count == self.__cb_count:
            self.__state = 'default'
            if self.__write_cb:
-                self.__write_cb = 0
-                return 1
-            return 0
+                self.__write_cb = True
+                return True
+            return False
+
    def __asterisk_func(self,line):
        """
        Determine whether to delete info in group
        Note on self.__cb flag.
-        If you find that you are in a delete group, and the preivous
+        If you find that you are in a delete group, and the previous
        token in not an open bracket (self.__ob = 0), that means
        that the delete group is nested inside another acceptable
        detination group. In this case, you have alrady written
@ -110,21 +116,21 @@ class DeleteInfo:
        """
        # Test for {\*}, in which case don't enter
        # delete state
-        self.__after_asterisk = 0 # only enter this function once
-        self.__found_delete = 1
+        # self.__after_asterisk = False # only enter this function once
+        self.__found_delete = True
        if self.__token_info == 'cb<nu<clos-brack':
            if self.__delete_count == self.__cb_count:
                self.__state = 'default'
                self.__ob = 0
                # changed this because haven't printed out start
-                return 0
+                return False
            else:
                # not sure what happens here!
                # believe I have a '{\*}
                if self.__run_level > 3:
                    msg = 'flag problem\n'
                    raise self.__bug_handler, msg
-                return 1
+                return True
        elif self.__token_info in self.__allowable :
            if self.__ob:
                self.__write_obj.write(self.__ob)
@ -132,85 +138,81 @@ class DeleteInfo:
                self.__state = 'default'
            else:
                pass
-            return 1
+            return True
        elif self.__token_info == 'cw<ls<list______':
            self.__ob = 0
            self.__found_list_func(line)
        elif self.__token_info in self.__not_allowable:
            if not self.__ob:
-                self.__write_cb = 1
+                self.__write_cb = True
            self.__ob = 0
            self.__state = 'delete'
            self.__cb_count = 0
-            return 0
+            return False
        else:
            if self.__run_level > 5:
-                msg = 'After an asterisk, and found neither an allowable or non-allowble token\n'
-                msg += 'token is "%s"\n' % self.__token_info
-                raise self.__bug_handler
+                msg = ('After an asterisk, and found neither an allowable or non-allowable token\n\
+                            token is "%s"\n') % self.__token_info
+                raise self.__bug_handler, msg
            if not self.__ob:
-                self.__write_cb = 1
+                self.__write_cb = True
            self.__ob = 0
            self.__state = 'delete'
            self.__cb_count = 0
-            return 0
+            return False
+
    def __found_list_func(self, line):
        """
        print out control words in this group
        """
        self.__state = 'list'
+
    def __list_func(self, line):
        """
        Check to see if the group has ended.
-        Return 1 for all control words.
-        Return 0 otherwise.
+        Return True for all control words.
+        Return False otherwise.
        """
        if self.__delete_count == self.__cb_count and self.__token_info ==\
            'cb<nu<clos-brack':
            self.__state = 'default'
            if self.__write_cb:
-                self.__write_cb = 0
-                return 1
-            return 0
+                self.__write_cb = False
+                return True
+            return False
        elif line[0:2] == 'cw':
-            return 1
+            return True
        else:
-            return 0
+            return False
+
    def delete_info(self):
        """Main method for handling other methods. Read one line in at
-        a time, and determine wheter to print the line based on the state."""
-        line_to_read = 'dummy'
-        read_obj = open(self.__file, 'r')
-        self.__write_obj = open(self.__write_to, 'w')
-        while line_to_read:
-            #ob<nu<open-brack<0001
-            to_print =1
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            if self.__token_info == 'ob<nu<open-brack':
-                self.__ob_count = line[-5:-1]
-            if self.__token_info == 'cb<nu<clos-brack':
-                self.__cb_count = line[-5:-1]
-            action = self.__state_dict.get(self.__state)
-            if not action:
-                sys.stderr.write('No action in dictionary state is "%s" \n'
-                        % self.__state)
-            to_print = action(line)
-            """
-            if self.__after_asterisk:
-                to_print = self.__asterisk_func(line)
-            elif self.__list:
-                self.__in_list_func(line)
-            elif self.__delete:
-                to_print = self.__delete_func(line)
-            else:
-                to_print = self.__default_func(line)
-            """
-            if to_print:
-                self.__write_obj.write(line)
-        self.__write_obj.close()
-        read_obj.close()
+        a time, and determine whether to print the line based on the state."""
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                for line in read_obj:
+                    #ob<nu<open-brack<0001
+                    to_print = True
+                    self.__token_info = line[:16]
+                    if self.__token_info == 'ob<nu<open-brack':
+                        self.__ob_count = line[-5:-1]
+                    if self.__token_info == 'cb<nu<clos-brack':
+                        self.__cb_count = line[-5:-1]
+                    action = self.__state_dict.get(self.__state)
+                    if not action:
+                        sys.stderr.write(_('No action in dictionary state is "%s" \n')
+                                % self.__state)
+                    to_print = action(line)
+                    # if self.__after_asterisk:
+                        # to_print = self.__asterisk_func(line)
+                    # elif self.__list:
+                        # self.__in_list_func(line)
+                    # elif self.__delete:
+                        # to_print = self.__delete_func(line)
+                    # else:
+                        # to_print = self.__default_func(line)
+                    if to_print:
+                        self.__write_obj.write(line)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "delete_info.data")
--- a/src/calibre/ebooks/rtf2xml/footnote.py
+++ b/src/calibre/ebooks/rtf2xml/footnote.py
@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+
 class Footnote:
    """
    Two public methods are available. The first separates all of the
@ -35,6 +37,7 @@ class Footnote:
        self.__copy = copy
        self.__write_to = tempfile.mktemp()
        self.__found_a_footnote = 0
+
    def __first_line_func(self, line):
        """
        Print the tag info for footnotes.  Check whether footnote is an
@ -47,6 +50,7 @@ class Footnote:
            self.__write_to_foot_obj.write(
            'mi<tg<open-att__<footnote<num>%s\n' % self.__footnote_count)
        self.__first_line = 0
+
    def __in_footnote_func(self, line):
        """Handle all tokens that are part of footnote"""
        if self.__first_line:
@ -68,6 +72,7 @@ class Footnote:
            'mi<mk<footnt-clo\n')
        else:
            self.__write_to_foot_obj.write(line)
+
    def __found_footnote(self, line):
        """ Found a footnote"""
        self.__found_a_footnote = 1
@ -81,6 +86,7 @@ class Footnote:
        'mi<mk<footnt-ind<%04d\n' % self.__footnote_count)
        self.__write_to_foot_obj.write(
        'mi<mk<footnt-ope<%04d\n' % self.__footnote_count)
+
    def __default_sep(self, line):
        """Handle all tokens that are not footnote tokens"""
        if self.__token_info == 'cw<nt<footnote__':
@ -91,6 +97,7 @@ class Footnote:
            self.__write_obj.write(
                'tx<nu<__________<%s\n' % num
            )
+
    def __initiate_sep_values(self):
        """
        initiate counters for separate_footnotes method.
@ -102,6 +109,7 @@ class Footnote:
        self.__in_footnote = 0
        self.__first_line = 0 #have not processed the first line of footnote
        self.__footnote_count = 0
+
    def separate_footnotes(self):
        """
        Separate all the footnotes in an RTF file and put them at the bottom,
@ -111,58 +119,50 @@ class Footnote:
        bottom of the main file.
        """
        self.__initiate_sep_values()
-        read_obj = open(self.__file)
-        self.__write_obj = open(self.__write_to, 'w')
        self.__footnote_holder = tempfile.mktemp()
-        self.__write_to_foot_obj = open(self.__footnote_holder, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            # keep track of opening and closing brackets
-            if self.__token_info == 'ob<nu<open-brack':
-                self.__ob_count = line[-5:-1]
-            if self.__token_info == 'cb<nu<clos-brack':
-                self.__cb_count = line[-5:-1]
-            # In the middle of footnote text
-            if self.__in_footnote:
-                self.__in_footnote_func(line)
-            # not in the middle of footnote text
-            else:
-                self.__default_sep(line)
-        self.__write_obj.close()
-        read_obj.close()
-        self.__write_to_foot_obj.close()
-        read_obj = open(self.__footnote_holder, 'r')
-        write_obj = open(self.__write_to, 'a')
-        write_obj.write(
-        'mi<mk<sect-close\n'
-        'mi<mk<body-close\n'
-        'mi<tg<close_____<section\n'
-        'mi<tg<close_____<body\n'
-        'mi<tg<close_____<doc\n'
-        'mi<mk<footnt-beg\n')
-        line = 1
-        while line:
-            line = read_obj.readline()
-            write_obj.write(line)
-        write_obj.write(
-        'mi<mk<footnt-end\n')
-        read_obj.close()
-        write_obj.close()
+        with open(self.__file) as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
+                    for line in read_obj:
+                        self.__token_info = line[:16]
+                        # keep track of opening and closing brackets
+                        if self.__token_info == 'ob<nu<open-brack':
+                            self.__ob_count = line[-5:-1]
+                        if self.__token_info == 'cb<nu<clos-brack':
+                            self.__cb_count = line[-5:-1]
+                        # In the middle of footnote text
+                        if self.__in_footnote:
+                            self.__in_footnote_func(line)
+                        # not in the middle of footnote text
+                        else:
+                            self.__default_sep(line)
+        with open(self.__footnote_holder, 'r') as read_obj:
+            with open(self.__write_to, 'a') as write_obj:
+                write_obj.write(
+                    'mi<mk<sect-close\n'
+                    'mi<mk<body-close\n'
+                    'mi<tg<close_____<section\n'
+                    'mi<tg<close_____<body\n'
+                    'mi<tg<close_____<doc\n'
+                    'mi<mk<footnt-beg\n')
+                for line in read_obj:
+                    write_obj.write(line)
+                write_obj.write(
+                'mi<mk<footnt-end\n')
        os.remove(self.__footnote_holder)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "footnote_separate.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
+
    def update_info(self, file, copy):
        """
        Unused method
        """
        self.__file = file
        self.__copy = copy
+
    def __get_foot_body_func(self, line):
        """
        Process lines in main body and look for beginning of footnotes.
@ -172,6 +172,7 @@ class Footnote:
            self.__state = 'foot'
        else:
            self.__write_obj.write(line)
+
    def __get_foot_foot_func(self, line):
        """
        Copy footnotes from bottom of file to a separate, temporary file.
@ -180,6 +181,7 @@ class Footnote:
            self.__state = 'body'
        else:
            self.__write_to_foot_obj.write(line)
+
    def __get_footnotes(self):
        """
        Private method to remove footnotes from main file.  Read one line from
@ -188,21 +190,16 @@ class Footnote:
        These two functions do the work of separating the footnotes form the
        body.
        """
-        read_obj = open(self.__file)
-        self.__write_obj = open(self.__write_to, 'w')
-            # self.__write_to = "footnote_info.data"
-        self.__write_to_foot_obj = open(self.__footnote_holder, 'w')
-        line = 1
-        while line:
-            line = read_obj.readline()
-            self.__token_info = line[:16]
-            if self.__state == 'body':
-                self.__get_foot_body_func(line)
-            elif self.__state == 'foot':
-                self.__get_foot_foot_func(line)
-        read_obj.close()
-        self.__write_obj.close()
-        self.__write_to_foot_obj.close()
+        with open(self.__file) as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
+                    for line in read_obj:
+                        self.__token_info = line[:16]
+                        if self.__state == 'body':
+                            self.__get_foot_body_func(line)
+                        elif self.__state == 'foot':
+                            self.__get_foot_foot_func(line)
+
    def __get_foot_from_temp(self, num):
        """
        Private method for joining footnotes to body. This method reads from
@ -213,9 +210,7 @@ class Footnote:
        look_for = 'mi<mk<footnt-ope<' + num + '\n'
        found_foot = 0
        string_to_return = ''
-        line = 1
-        while line:
-            line = self.__read_from_foot_obj.readline()
+        for line in self.__read_from_foot_obj:
            if found_foot:
                if line == 'mi<mk<footnt-clo\n':
                    return string_to_return
@ -223,6 +218,7 @@ class Footnote:
            else:
                if line == look_for:
                    found_foot = 1
+
    def __join_from_temp(self):
        """
        Private method for rejoining footnotes to body.  Read from the
@ -232,16 +228,14 @@ class Footnote:
        print out to the third file.
        If no footnote marker is found, simply print out the token (line).
        """
-        self.__read_from_foot_obj = open(self.__footnote_holder, 'r')
-        read_obj = open(self.__write_to, 'r')
-        self.__write_obj = open(self.__write_to2, 'w')
-        line = 1
-        while line:
-            line = read_obj.readline()
-            if line[:16] == 'mi<mk<footnt-ind':
-                line = self.__get_foot_from_temp(line[17:-1])
-            self.__write_obj.write(line)
-        read_obj.close()
+        with open(self.__footnote_holder, 'r') as self.__read_from_foot_obj:
+            with open(self.__write_to, 'r') as read_obj:
+                with open(self.__write_to2, 'w') as self.__write_obj:
+                    for line in read_obj:
+                        if line[:16] == 'mi<mk<footnt-ind':
+                            line = self.__get_foot_from_temp(line[17:-1])
+                        self.__write_obj.write(line)
+
    def join_footnotes(self):
        """
        Join the footnotes from the bottom of the file and put them in their
@ -258,8 +252,8 @@ class Footnote:
        self.__state = 'body'
        self.__get_footnotes()
        self.__join_from_temp()
-        self.__write_obj.close()
-        self.__read_from_foot_obj.close()
+        # self.__write_obj.close()
+        # self.__read_from_foot_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to2, "footnote_joined.data")
--- a/src/calibre/ebooks/rtf2xml/get_char_map.py
+++ b/src/calibre/ebooks/rtf2xml/get_char_map.py
@ -43,27 +43,28 @@ class GetCharMap:
    def get_char_map(self, map):
        if map == 'ansicpg0':
            map = 'ansicpg1250'
-        found_map = 0
+        if map in ('ansicpg10000', '10000'):
+            map = 'mac_roman'
+        found_map = False
        map_dict = {}
        self.__char_file.seek(0)
-        for line in self.__char_file.readlines():
+        for line in self.__char_file:
            if not line.strip(): continue
            begin_element = '<%s>' % map;
            end_element = '</%s>' % map
            if not found_map:
                if begin_element in line:
-                    found_map = 1
+                    found_map = True
            else:
                if end_element in line:
                    break
                fields = line.split(':')
                fields[1].replace('\\colon', ':')
                map_dict[fields[1]] = fields[3]
-            
-        
+
+
        if not found_map:
-            msg = 'no map found\n'
-            msg += 'map is "%s"\n'%(map,)
+            msg = 'no map found\nmap is "%s"\n'%(map,)
            raise self.__bug_handler, msg
        return map_dict

--- a/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
+++ b/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
@ -54,10 +54,10 @@ class Hex2Utf8:
            'convert_to_caps'--wether to convert caps to utf-8
        Returns:
            nothing
-            """
+        """
        self.__file = in_file
        self.__copy = copy
-        if area_to_convert != 'preamble' and area_to_convert != 'body':
+        if area_to_convert not in ('preamble', 'body'):
            msg = (
            'Developer error! Wrong flag.\n'
            'in module "hex_2_utf8.py\n'
@ -79,7 +79,8 @@ class Hex2Utf8:
        self.__write_to = tempfile.mktemp()
        self.__bug_handler = bug_handler
        self.__invalid_rtf_handler = invalid_rtf_handler
-    def update_values(  self,
+
+    def update_values(self,
                        file,
                        area_to_convert,
                        char_file,
@ -132,6 +133,7 @@ class Hex2Utf8:
        # self.__convert_symbol = 0
        # self.__convert_wingdings = 0
        # self.__convert_zapf = 0
+
    def __initiate_values(self):
        """
        Required:
@ -191,6 +193,7 @@ class Hex2Utf8:
            'body'          :       self.__body_func,
            'mi<mk<body-open_'  :   self.__found_body_func,
            'tx<hx<__________'  :   self.__hex_text_func,
+            # 'tx<nu<__________'  :   self.__text_func,
            }
        self.__body_state_dict = {
            'preamble'      :       self.__preamble_for_body_func,
@ -209,6 +212,7 @@ class Hex2Utf8:
        }
        self.__caps_list = ['false']
        self.__font_list = ['not-defined']
+
    def __hex_text_func(self, line):
        """
        Required:
@ -218,12 +222,12 @@ class Hex2Utf8:
            token is in the dictionary, then check if the value starts with a
            "&". If it does, then tag the result as utf text. Otherwise, tag it
            as normal text.
-            If the nex_num is not in the dictionary, then a mistake has been
+            If the hex_num is not in the dictionary, then a mistake has been
            made.
            """
        hex_num = line[17:-1]
        converted = self.__current_dict.get(hex_num)
-        if converted != None:
+        if converted is not None:
            # tag as utf-8
            if converted[0:1] == "&":
                font = self.__current_dict_name
@ -263,42 +267,43 @@ class Hex2Utf8:
                    # msg += 'dictionary is %s\n' % self.__current_dict_name
                    msg = 'Character "&#x%s;" does not appear to be valid (or is a control character)\n' % token
                    raise self.__bug_handler, msg
+
    def __found_body_func(self, line):
        self.__state = 'body'
        self.__write_obj.write(line)
+
    def __body_func(self, line):
        """
        When parsing preamble
        """
        self.__write_obj.write(line)
+
    def __preamble_func(self, line):
        action = self.__preamble_state_dict.get(self.__token_info)
-        if action != None:
+        if action is not None:
            action(line)
        else:
            self.__write_obj.write(line)
+
    def __convert_preamble(self):
        self.__state = 'preamble'
-        read_obj = open(self.__file, 'r')
        self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            action = self.__preamble_state_dict.get(self.__state)
-            if action == None:
-                sys.stderr.write('error no state found in hex_2_utf8',
-                self.__state
-                )
-            action(line)
-        read_obj.close()
+        with open(self.__file, 'r') as read_obj:
+           for line in read_obj:
+                self.__token_info = line[:16]
+                action = self.__preamble_state_dict.get(self.__state)
+                if action is None:
+                    sys.stderr.write(_('error no state found in hex_2_utf8'),
+                    self.__state
+                    )
+                action(line)
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
+
    def __preamble_for_body_func(self, line):
        """
        Required:
@ -311,6 +316,7 @@ class Hex2Utf8:
        if self.__token_info == 'mi<mk<body-open_':
            self.__found_body_func(line)
        self.__write_obj.write(line)
+
    def __body_for_body_func(self, line):
        """
        Required:
@ -321,10 +327,11 @@ class Hex2Utf8:
            Used when parsing the body.
        """
        action = self.__in_body_dict.get(self.__token_info)
-        if action != None:
+        if action is not None:
            action(line)
        else:
            self.__write_obj.write(line)
+
    def __start_font_func(self, line):
        """
        Required:
@ -348,6 +355,7 @@ class Hex2Utf8:
        else:
            self.__current_dict_name = 'default'
            self.__current_dict = self.__def_dict
+
    def __end_font_func(self, line):
        """
        Required:
@ -376,6 +384,7 @@ class Hex2Utf8:
        else:
            self.__current_dict_name = 'default'
            self.__current_dict = self.__def_dict
+
    def __start_special_font_func_old(self, line):
        """
        Required:
@ -398,6 +407,7 @@ class Hex2Utf8:
            self.__current_dict.append(self.__dingbats_dict)
            self.__special_fonts_found += 1
            self.__current_dict_name = 'Zapf Dingbats'
+
    def __end_special_font_func(self, line):
        """
        Required:
@ -416,6 +426,7 @@ class Hex2Utf8:
            self.__current_dict.pop()
            self.__special_fonts_found -= 1
            self.__dict_name = 'default'
+
    def __start_caps_func_old(self, line):
        """
        Required:
@ -427,6 +438,7 @@ class Hex2Utf8:
            self.__in_caps to 1
        """
        self.__in_caps = 1
+
    def __start_caps_func(self, line):
        """
        Required:
@ -440,6 +452,7 @@ class Hex2Utf8:
        self.__in_caps = 1
        value = line[17:-1]
        self.__caps_list.append(value)
+
    def __end_caps_func(self, line):
        """
        Required:
@ -455,7 +468,8 @@ class Hex2Utf8:
        else:
            sys.stderr.write('Module is hex_2_utf8\n')
            sys.stderr.write('method is __end_caps_func\n')
-            sys.stderr.write('caps list should be more than one?\n')
+            sys.stderr.write('caps list should be more than one?\n') #self.__in_caps not set
+
    def __text_func(self, line):
        """
        Required:
@ -466,9 +480,8 @@ class Hex2Utf8:
            if in caps, convert. Otherwise, print out.
        """
        text = line[17:-1]
-        if self.__current_dict_name == 'Symbol'\
-          or self.__current_dict_name == 'Wingdings'\
-          or self.__current_dict_name == 'Zapf Dingbats':
+        # print line
+        if self.__current_dict_name in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
            the_string = ''
            for letter in text:
                hex_num = hex(ord(letter))
@ -477,21 +490,21 @@ class Hex2Utf8:
                hex_num = hex_num[2:]
                hex_num = '\'%s' % hex_num
                converted = self.__current_dict.get(hex_num)
-                if converted == None:
+                if converted is None:
                    sys.stderr.write('module is hex_2_ut8\n')
                    sys.stderr.write('method is __text_func\n')
                    sys.stderr.write('no hex value for "%s"\n' % hex_num)
                else:
                    the_string += converted
            self.__write_obj.write('tx<nu<__________<%s\n' % the_string)
+            # print the_string
        else:
            if self.__caps_list[-1] == 'true' \
                and self.__convert_caps\
-                and self.__current_dict_name != 'Symbol'\
-                and self.__current_dict_name != 'Wingdings'\
-                and self.__current_dict_name != 'Zapf Dingbats':
+                and self.__current_dict_name not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
                text = text.upper()
            self.__write_obj.write('tx<nu<__________<%s\n' % text)
+
    def __utf_to_caps_func(self, line):
        """
        Required:
@ -506,6 +519,7 @@ class Hex2Utf8:
            # utf_text = utf_text.upper()
            utf_text = self.__utf_token_to_caps_func(utf_text)
        self.__write_obj.write('tx<ut<__________<%s\n' % utf_text)
+
    def __utf_token_to_caps_func(self, char_entity):
        """
        Required:
@ -530,28 +544,26 @@ class Hex2Utf8:
            return char_entity
        else:
            return converted
+
    def __convert_body(self):
        self.__state = 'body'
-        read_obj = open(self.__file, 'r')
-        self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            action = self.__body_state_dict.get(self.__state)
-            if action == None:
-                sys.stderr.write('error no state found in hex_2_utf8',
-                self.__state
-                )
-            action(line)
-        read_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            self.__write_obj = open(self.__write_to, 'w')
+            for line in read_obj:
+                self.__token_info = line[:16]
+                action = self.__body_state_dict.get(self.__state)
+                if action is None:
+                    sys.stderr.write('error no state found in hex_2_utf8',
+                    self.__state
+                    )
+                action(line)
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "body_utf_convert.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
+
    def convert_hex_2_utf8(self):
        self.__initiate_values()
        if self.__area_to_convert == 'preamble':
--- a/src/calibre/ebooks/rtf2xml/inline.py
+++ b/src/calibre/ebooks/rtf2xml/inline.py
@ -1,5 +1,7 @@
 import sys, os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+
 """
 States.
 1. default
@ -36,6 +38,7 @@ class Inline:
        self.__copy = copy
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
+
    def __initiate_values(self):
        """
        Initiate all values.
@ -51,7 +54,6 @@ class Inline:
            'tx<ut<__________'  :       self.__found_text_func,
            'mi<mk<inline-fld'  :       self.__found_text_func,
            'text'              :       self.__found_text_func,
-            'cw<nu<hard-lineb'  :       self.__found_text_func, #calibre
            'cb<nu<clos-brack'  :       self.__close_bracket_func,
            'mi<mk<par-end___'  :       self.__end_para_func,
            'mi<mk<footnt-ope'  :       self.__end_para_func,
@ -63,7 +65,6 @@ class Inline:
            'tx<hx<__________'  :       self.__found_text_func,
            'tx<ut<__________'  :       self.__found_text_func,
            'text'              :       self.__found_text_func,
-            'cw<nu<hard-lineb'  :       self.__found_text_func, #calibre
            'mi<mk<inline-fld'  :       self.__found_text_func,
            'ob<nu<open-brack':         self.__found_open_bracket_func,
            'mi<mk<par-end___'  :       self.__end_para_func,
@ -83,12 +84,12 @@ class Inline:
        self.__in_para = 0 #  not in paragraph
        self.__char_dict = {
            # character info => ci
-            'annotation'    :       'annotation',
+            'annotation'    :   'annotation',
            'blue______'    :   'blue',
            'bold______'    :   'bold',
-            'caps______'    :       'caps',
-            'char-style'    :       'character-style',
-            'dbl-strike'    :    'double-strike-through',
+            'caps______'    :   'caps',
+            'char-style'    :   'character-style',
+            'dbl-strike'    :   'double-strike-through',
            'emboss____'    :   'emboss',
            'engrave___'    :   'engrave',
            'font-color'    :   'font-color',
@ -96,7 +97,7 @@ class Inline:
            'font-size_'    :   'font-size',
            'font-style'    :   'font-style',
            'font-up___'    :   'superscript',
-            'footnot-mk'    :       'footnote-marker',
+            'footnot-mk'    :   'footnote-marker',
            'green_____'    :   'green',
            'hidden____'    :   'hidden',
            'italics___'    :   'italics',
@ -107,9 +108,10 @@ class Inline:
            'strike-thr'    :   'strike-through',
            'subscript_'    :   'subscript',
            'superscrip'    :   'superscript',
-            'underlined'    :       'underlined',
+            'underlined'    :   'underlined',
        }
        self.__caps_list = ['false']
+
    def __set_list_func(self, line):
        """
        Requires:
@ -128,6 +130,7 @@ class Inline:
                self.__place = 'in_list'
                self.__inline_list = self.__list_inline_list
                self.__groups_in_waiting = self.__groups_in_waiting_list
+
    def __default_func(self, line):
        """
        Requires:
@ -140,8 +143,8 @@ class Inline:
        action = self.__default_dict.get(self.__token_info)
        if action:
            action(line)
-        if self.__token_info != 'cw<nu<hard-lineb': #calibre
-            self.__write_obj.write(line)
+        self.__write_obj.write(line)
+
    def __found_open_bracket_func(self, line):
        """
        Requires:
@ -156,6 +159,7 @@ class Inline:
        self.__groups_in_waiting[0] += 1
        self.__inline_list.append({})
        self.__inline_list[-1]['contains_inline'] = 0
+
    def __after_open_bracket_func(self, line):
        """
        Requires:
@ -176,6 +180,7 @@ class Inline:
                self.__state = 'default' #  a non control word?
                action(line)
        self.__write_obj.write(line)
+
    def __handle_control_word(self, line):
        """
        Required:
@ -206,6 +211,7 @@ class Inline:
                elif char_value == 'Zapf Dingbats':
                    self.__write_obj.write('mi<mk<font-dingb\n')
            """
+
    def __close_bracket_func(self, line):
        """
        Requires:
@ -244,6 +250,7 @@ class Inline:
        self.__inline_list.pop()
        if self.__groups_in_waiting[0] != 0:
            self.__groups_in_waiting[0] -= 1
+
    def __found_text_func(self, line):
        """
        Required:
@ -257,7 +264,6 @@ class Inline:
                Text can mark the start of a paragraph.
                If already in a paragraph, check to see if any groups are waiting
                to be added. If so, use another method to write these groups.
-            3. If not check if hardline break, then write
        """
        if self.__place == 'in_list':
            self.__write_inline()
@ -265,12 +271,9 @@ class Inline:
            if not self.__in_para:
                self.__in_para = 1
                self.__start_para_func(line)
-            else:
-                if self.__token_info == 'cw<nu<hard-lineb': #calibre
-                    self.__write_obj.write('mi<tg<empty_____<hardline-break\n')
-                if self.__groups_in_waiting[0] != 0:
+            elif self.__groups_in_waiting[0] != 0:
                    self.__write_inline()
-                
+
    def __write_inline(self):
        """
        Required:
@ -314,6 +317,7 @@ class Inline:
                            self.__write_obj.write('<%s>%s' % (the_key, the_dict[the_key]))
                    self.__write_obj.write('\n')
        self.__groups_in_waiting[0] = 0
+
    def __end_para_func(self, line):
        """
        Requires:
@ -342,6 +346,7 @@ class Inline:
                    self.__write_obj.write('mi<mk<caps-end__\n')
                self.__write_obj.write('mi<tg<close_____<inline\n')
        self.__in_para = 0
+
    def __start_para_func(self, line):
        """
        Requires:
@ -369,12 +374,14 @@ class Inline:
                        self.__write_obj.write('<%s>%s' % (the_key, the_dict[the_key]))
                self.__write_obj.write('\n')
        self.__groups_in_waiting[0] = 0
+
    def __found_field_func(self, line):
        """
        Just a default function to make sure I don't prematurely exit
        default state
        """
        pass
+
    def form_tags(self):
        """
        Requires:
@ -386,32 +393,27 @@ class Inline:
            the state.
        """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
-        self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            token = line[0:-1]
-            self.__token_info = ''
-            if token == 'tx<mc<__________<rdblquote'\
-                or token == 'tx<mc<__________<ldblquote'\
-                or token == 'tx<mc<__________<lquote'\
-                or token == 'tx<mc<__________<rquote'\
-                or token == 'tx<mc<__________<emdash'\
-                or token == 'tx<mc<__________<endash'\
-                or token == 'tx<mc<__________<bullet':
-                self.__token_info = 'text'
-            else:
-                self.__token_info = line[:16]
-            self.__set_list_func(line)
-            action = self.__state_dict.get(self.__state)
-            if action == None:
-                sys.stderr.write('No matching state in module inline_for_lists.py\n')
-                sys.stderr.write(self.__state + '\n')
-            action(line)
-        read_obj.close()
-        self.__write_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                for line in read_obj:
+                    token = line[0:-1]
+                    self.__token_info = ''
+                    if token == 'tx<mc<__________<rdblquote'\
+                        or token == 'tx<mc<__________<ldblquote'\
+                        or token == 'tx<mc<__________<lquote'\
+                        or token == 'tx<mc<__________<rquote'\
+                        or token == 'tx<mc<__________<emdash'\
+                        or token == 'tx<mc<__________<endash'\
+                        or token == 'tx<mc<__________<bullet':
+                        self.__token_info = 'text'
+                    else:
+                        self.__token_info = line[:16]
+                    self.__set_list_func(line)
+                    action = self.__state_dict.get(self.__state)
+                    if action is None:
+                        sys.stderr.write('No matching state in module inline_for_lists.py\n')
+                        sys.stderr.write(self.__state + '\n')
+                    action(line)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "inline.data")
--- a/src/calibre/ebooks/rtf2xml/line_endings.py
+++ b/src/calibre/ebooks/rtf2xml/line_endings.py
@ -15,8 +15,11 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import os, tempfile, re
+import os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+from calibre.utils.cleantext import clean_ascii_chars
+
 class FixLineEndings:
    """Fix line endings"""
    def __init__(self,
@ -32,36 +35,23 @@ class FixLineEndings:
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
        self.__replace_illegals = replace_illegals
+
    def fix_endings(self):
-        ##tempFileName = tempfile.mktemp()
-        illegal_regx = re.compile( '\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0B|\x0E|\x0F|\x10|\x11|\x12|\x13')
-        #nums = [0, 1, 2, 3, 4, 5, 6, 7, 8,  11,  14, 15, 16, 17, 18, 19]
-        """
-read_obj = open(self.__file, 'r')
-line = read_obj.read(1000)
-regexp = re.compile(r"\r")
-macintosh = regexp.search(line)
-read_obj.close()
-        """
-        # always check since I have to get rid of illegal characters
-        macintosh = 1
-        if macintosh:
-            line = 1
-            read_obj = open(self.__file, 'r')
-            write_obj = open(self.__write_to, 'w')
-            while line:
-                line = read_obj.read(1000)
-                # line = re.sub(regexp,"\n",line)
-                line = line.replace ('\r', '\n')
-                if self.__replace_illegals:
-                    line = re.sub(illegal_regx, '', line)
-                    # for num in nums:
-                        # line = line.replace(chr(num), '')
-                write_obj.write(line )
-            read_obj.close()
-            write_obj.close()
-            copy_obj = copy.Copy(bug_handler = self.__bug_handler)
-            if self.__copy:
-                copy_obj.copy_file(self.__write_to, "line_endings.data")
-            copy_obj.rename(self.__write_to, self.__file)
-            os.remove(self.__write_to)
+        #read
+        with open(self.__file, 'r') as read_obj:
+            input_file = read_obj.read()
+        #calibre go from win and mac to unix
+        input_file = input_file.replace ('\r\n', '\n')
+        input_file = input_file.replace ('\r', '\n')
+        #remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27
+        if self.__replace_illegals:
+            input_file = clean_ascii_chars(input_file)
+        #write
+        with open(self.__write_to, 'wb') as write_obj:
+            write_obj.write(input_file)
+        #copy
+        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
+        if self.__copy:
+            copy_obj.copy_file(self.__write_to, "line_endings.data")
+        copy_obj.rename(self.__write_to, self.__file)
+        os.remove(self.__write_to)
--- a/src/calibre/ebooks/rtf2xml/pict.py
+++ b/src/calibre/ebooks/rtf2xml/pict.py
@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import sys, os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+
 class Pict:
    """Process graphic information"""
    def __init__(self,
@ -36,13 +38,11 @@ class Pict:
        self.__ob_count = 0
        self.__cb_count = 0
        self.__pict_count = 0
-        self.__in_pict = 0
-        self.__already_found_pict = 0
+        self.__in_pict = False
+        self.__already_found_pict = False
        self.__orig_file = orig_file
        self.__initiate_pict_dict()
        self.__out_file = out_file
-        # this is left over
-        self.__no_ask = 1

    def __initiate_pict_dict(self):
        self.__pict_dict = {
@ -71,57 +71,43 @@ class Pict:
                self.__out_file))
        else:
            dir_name = os.path.dirname(self.__orig_file)
-        # self.__output_to_file_func()
        self.__dir_name = base_name + "_rtf_pict_dir/"
        self.__dir_name = os.path.join(dir_name, self.__dir_name)
        if not os.path.isdir(self.__dir_name):
            try:
                os.mkdir(self.__dir_name)
            except OSError, msg:
-                msg = str(msg)
-                msg += "Couldn't make directory '%s':\n" % (self.__dir_name)
+                msg = "%sCouldn't make directory '%s':\n" % (str(msg), self.__dir_name)
                raise self.__bug_handler
        else:
-            if self.__no_ask:
-                user_response = 'r'
-            else:
-                msg = 'Do you want to remove all files in %s?\n' % self.__dir_name
-                msg += 'Type "r" to remove.\n'
-                msg +=  'Type any other key to keep files in place.\n'
-                sys.stderr.write(msg)
-                user_response = raw_input()
-            if user_response == 'r':
-                if self.__run_level > 1:
-                    sys.stderr.write('Removing files from old pict directory...\n')
-                all_files = os.listdir(self.__dir_name)
-                for the_file in all_files:
-                    the_file = os.path.join(self.__dir_name, the_file)
-                    try:
-                        os.remove(the_file)
-                    except OSError:
-                        pass
-                if self.__run_level > 1:
-                    sys.stderr.write('Files removed.\n')
+            if self.__run_level > 1:
+                sys.stderr.write('Removing files from old pict directory...\n')
+            all_files = os.listdir(self.__dir_name)
+            for the_file in all_files:
+                the_file = os.path.join(self.__dir_name, the_file)
+                try:
+                    os.remove(the_file)
+                except OSError:
+                    pass
+            if self.__run_level > 1:
+                sys.stderr.write('Files removed.\n')

    def __create_pict_file(self):
        """Create a file for all the pict data to be written to.
        """
        self.__pict_file = os.path.join(self.__dir_name, 'picts.rtf')
-        write_pic_obj = open(self.__pict_file, 'w')
-        write_pic_obj.close()
        self.__write_pic_obj = open(self.__pict_file, 'a')

    def __in_pict_func(self, line):
        if self.__cb_count == self.__pict_br_count:
-            self.__in_pict = 0
+            self.__in_pict = False
            self.__write_pic_obj.write("}\n")
-            return 1
+            return True
        else:
            action = self.__pict_dict.get(self.__token_info)
            if action:
-                line = action(line)
-                self.__write_pic_obj.write(line)
-            return 0
+                self.__write_pic_obj.write(action(line))
+            return False

    def __default(self, line, write_obj):
        """Determine if each token marks the beginning of pict data.
@ -142,53 +128,50 @@ class Pict:
            write_obj.write('mi<mk<pict-end__\n')
            if not self.__already_found_pict:
                self.__create_pict_file()
-                self.__already_found_pict=1;
+                self.__already_found_pict=True;
                self.__print_rtf_header()
            self.__in_pict = 1
            self.__pict_br_count = self.__ob_count
            self.__cb_count = 0
            self.__write_pic_obj.write("{\\pict\n")
-            return 0
-        return 1
+            return False
+        return True

    def __print_rtf_header(self):
        """Print to pict file the necessary RTF data for the file to be
        recognized as an RTF file.
        """
-        self.__write_pic_obj.write("{\\rtf1 \n")
-        self.__write_pic_obj.write("{\\fonttbl\\f0\\null;} \n")
-        self.__write_pic_obj.write("{\\colortbl\\red255\\green255\\blue255;} \n")
-        self.__write_pic_obj.write("\\pard \n")
+        self.__write_pic_obj.write("{\\rtf1 \n{\\fonttbl\\f0\\null;} \n")
+        self.__write_pic_obj.write("{\\colortbl\\red255\\green255\\blue255;} \n\\pard \n")

    def process_pict(self):
        self.__make_dir()
-        read_obj = open(self.__file)
-        write_obj = open(self.__write_to, 'w')
-        line_to_read = 'dummy'
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            if self.__token_info == 'ob<nu<open-brack':
-                self.__ob_count = line[-5:-1]
-            if self.__token_info == 'cb<nu<clos-brack':
-                self.__cb_count = line[-5:-1]
-            if not self.__in_pict:
-                to_print = self.__default(line, write_obj)
-                if to_print :
-                    write_obj.write(line)
-            else:
-                to_print = self.__in_pict_func(line)
-                if to_print :
-                    write_obj.write(line)
-        if self.__already_found_pict:
-            self.__write_pic_obj.write("}\n")
-            self.__write_pic_obj.close()
-        read_obj.close()
-        write_obj.close()
+        with open(self.__file) as read_obj:
+            with open(self.__write_to, 'w') as write_obj:
+                for line in read_obj:
+                    self.__token_info = line[:16]
+                    if self.__token_info == 'ob<nu<open-brack':
+                        self.__ob_count = line[-5:-1]
+                    if self.__token_info == 'cb<nu<clos-brack':
+                        self.__cb_count = line[-5:-1]
+                    if not self.__in_pict:
+                        to_print = self.__default(line, write_obj)
+                        if to_print :
+                            write_obj.write(line)
+                    else:
+                        to_print = self.__in_pict_func(line)
+                        if to_print :
+                            write_obj.write(line)
+                if self.__already_found_pict:
+                    self.__write_pic_obj.write("}\n")
+                    self.__write_pic_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "pict.data")
+            try:
+                copy_obj.copy_file(self.__pict_file, "pict.rtf")
+            except:
+                pass
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
        if self.__pict_count == 0:
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@ -15,8 +15,10 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import os, re,  tempfile
+import os, re, tempfile
+
 from calibre.ebooks.rtf2xml import copy, check_brackets
+
 class ProcessTokens:
    """
    Process each token on a line and add information that will be useful for
@ -41,14 +43,16 @@ class ProcessTokens:
        self.__bracket_count=0
        self.__exception_handler = exception_handler
        self.__bug_handler = bug_handler
+
    def compile_expressions(self):
        self.__num_exp = re.compile(r"([a-zA-Z]+)(.*)")
        self.__utf_exp = re.compile(r'(&.*?;)')
+
    def initiate_token_dict(self):
        self.__return_code = 0
        self.dict_token={
        # unicode
-        'mshex'             :   ('nu', '__________', self.__ms_hex_func),
+        'mshex'              :  ('nu', '__________', self.__ms_hex_func),
        # brackets
        '{'                  :	('nu', '{', self.ob_func),
        '}'                  :	('nu', '}', self.cb_func),
@ -66,6 +70,7 @@ class ProcessTokens:
        ';'                  :	('mc', ';', self.ms_sub_func),
        # this must be wrong
        '-'                  :	('mc', '-', self.ms_sub_func),
+        'line'               :  ('mi', 'hardline-break', self.hardline_func), #calibre
        # misc => ml
        '*'                  :	('ml', 'asterisk__', self.default_func),
        ':'                  :	('ml', 'colon_____', self.default_func),
@ -73,7 +78,6 @@ class ProcessTokens:
        'backslash'          :	('nu', '\\', self.text_func),
        'ob'                 :	('nu', '{', self.text_func),
        'cb'                 :	('nu', '}', self.text_func),
-        'line'               :  ('nu', 'hard-lineb', self.default_func), #calibre
        #'line'               :  ('nu', ' ', self.text_func), calibre
        # paragraph formatting => pf
        'page'               :  ('pf', 'page-break', self.default_func),
@ -159,15 +163,17 @@ class ProcessTokens:
        'rtf'                :	('ri', 'rtf_______', self.default_func),
        'deff'               :	('ri', 'deflt-font', self.default_func),
        'mac'                :	('ri', 'macintosh_', self.default_func),
+        'pc'                 :	('ri', 'pc________', self.default_func),
+        'pca'                :	('ri', 'pca_______', self.default_func),
        'ansi'               :	('ri', 'ansi______', self.default_func),
        'ansicpg'            :	('ri', 'ansi-codpg', self.default_func),
        # notes => nt
        'footnote'           :	('nt', 'footnote__', self.default_func),
        'ftnalt'             :	('nt', 'type______<endnote', self.two_part_func),
        # anchor => an
-        'tc'                :	('an', 'toc_______', self.default_func),
+        'tc'                 :	('an', 'toc_______', self.default_func),
        'bkmkstt'            :	('an', 'book-mk-st', self.default_func),
-        'bkmkstart'         :	('an', 'book-mk-st', self.default_func),
+        'bkmkstart'          :	('an', 'book-mk-st', self.default_func),
        'bkmkend'            :	('an', 'book-mk-en', self.default_func),
        'xe'                 :	('an', 'index-mark', self.default_func),
        'rxe'                :	('an', 'place_____', self.default_func),
@ -347,7 +353,7 @@ class ProcessTokens:
            10:     'Kanji numbering without the digit character',
            11:     'Kanji numbering with the digit character',
            1246:   'phonetic Katakana characters in aiueo order',
-            1346:    'phonetic katakana characters in iroha order',
+            1346:   'phonetic katakana characters in iroha order',
            14:     'double byte character',
            15:     'single byte character',
            16:     'Kanji numbering 3',
@ -392,7 +398,7 @@ class ProcessTokens:
            5121 	:  'Arabic Algeria',
            15361 	:  'Arabic Bahrain',
            3073 	:  'Arabic Egypt',
-            1 	        :   'Arabic General',
+            1 	    :   'Arabic General',
            2049 	:  'Arabic Iraq',
            11265 	:  'Arabic Jordan',
            13313 	:  'Arabic Kuwait',
@ -417,7 +423,7 @@ class ProcessTokens:
            1059 	:  'Byelorussian',
            1027 	:  'Catalan',
            2052 	:  'Chinese China',
-            4 	        :  'Chinese General',
+            4 	    :  'Chinese General',
            3076 	:  'Chinese Hong Kong',
            4100 	:  'Chinese Singapore',
            1028 	:  'Chinese Taiwan',
@ -431,7 +437,7 @@ class ProcessTokens:
            2057 	:  'English British',
            4105 	:  'English Canada',
            9225 	:  'English Caribbean',
-            9 	        :  'English General',
+            9 	    :  'English General',
            6153 	:  'English Ireland',
            8201 	:  'English Jamaica',
            5129 	:  'English New Zealand',
@ -595,30 +601,37 @@ class ProcessTokens:
        num = num[1:] # chop off leading 0, which I added
        num = num.upper() # the mappings store hex in caps
        return 'tx<hx<__________<\'%s\n' % num # add an ' for the mappings
+
    def ms_sub_func(self, pre, token, num):
        return 'tx<mc<__________<%s\n' % token
+
+    def hardline_func(self, pre, token, num):
+        return 'mi<tg<empty_____<%s\n' % token
+
    def default_func(self, pre, token, num):
-        if num == None:
+        if num is None:
            num = 'true'
        return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
+
    def __list_type_func(self, pre, token, num):
        type = 'arabic'
-        if num == None:
+        if num is None:
            type = 'Arabic'
        else:
            try:
                num = int(num)
            except ValueError:
                if self.__run_level > 3:
-                    msg = 'number "%s" cannot be converted to integer\n' % num
+                    msg = 'Number "%s" cannot be converted to integer\n' % num
                    raise self.__bug_handler, msg
            type = self.__number_type_dict.get(num)
-            if type == None:
+            if type is None:
                if self.__run_level > 3:
                    msg = 'No type for "%s" in self.__number_type_dict\n'
                    raise self.__bug_handler
                type = 'Arabic'
        return 'cw<%s<%s<nu<%s\n' % (pre, token, type)
+
    def __language_func(self, pre, token, num):
        lang_name = self.__language_dict.get(int(re.search('[0-9]+', num).group()))
        if not lang_name:
@ -627,31 +640,36 @@ class ProcessTokens:
                msg = 'No entry for number "%s"' % num
                raise self.__bug_handler, msg
        return 'cw<%s<%s<nu<%s\n' % (pre, token, lang_name)
+
    def two_part_func(self, pre, token, num):
        list = token.split("<")
        token = list[0]
        num = list[1]
        return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
        ##return 'cw<nu<nu<nu<%s>num<%s\n' % (token, num)
+
    def divide_by_2(self, pre, token, num):
        num = self.divide_num(num, 2)
        return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
        ##return 'cw<nu<nu<nu<%s>%s<%s\n' % (token, num, token)
+
    def divide_by_20(self, pre, token, num):
        num = self.divide_num(num, 20)
        return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
        ##return 'cw<nu<nu<nu<%s>%s<%s\n' % (token, num, token)
+
    def text_func(self, pre, token, num=None):
        return 'tx<nu<__________<%s\n' % token
+
    def ob_func(self, pre, token, num=None):
        self.__bracket_count += 1
-        ##return 'ob<%04d\n' % self.__bracket_count
        return 'ob<nu<open-brack<%04d\n' % self.__bracket_count
+
    def cb_func(self, pre, token, num=None):
-        ##line = 'cb<%04d\n' % self.__bracket_count
        line = 'cb<nu<clos-brack<%04d\n' % self.__bracket_count
        self.__bracket_count -= 1
        return line
+
    def color_func(self, pre, token, num):
        third_field = 'nu'
        if num[-1] == ';':
@ -662,6 +680,7 @@ class ProcessTokens:
            num = "0" + num
        return 'cw<%s<%s<%s<%s\n' % (pre, token, third_field, num)
        ##return 'cw<cl<%s<nu<nu<%s>%s<%s\n' % (third_field, token, num, token)
+
    def bool_st_func(self, pre, token, num):
        if num is None or num == '' or num == '1':
            return 'cw<%s<%s<nu<true\n' % (pre, token)
@ -670,24 +689,23 @@ class ProcessTokens:
            return 'cw<%s<%s<nu<false\n' % (pre, token)
                ##return 'cw<nu<nu<nu<%s>false<%s\n' % (token, token)
        else:
-            msg = 'boolean should have some value module process tokens\n'
-            msg += 'token is ' + token + "\n"
-            msg += "'" + num + "'" + "\n"
+            msg = "boolean should have some value module process tokens\ntoken is %s\n'%s'\n" % (token, num)
            raise self.__bug_handler, msg
+
    def __no_sup_sub_func(self, pre, token, num):
        the_string = 'cw<ci<subscript_<nu<false\n'
        the_string += 'cw<ci<superscrip<nu<false\n'
        return the_string
+
    def divide_num(self, numerator, denominator):
        try:
-            numerator = float(re.search('[0-9.]+', numerator).group())            
+            #calibre why ignore negative number? Wrong in case of \fi
+            numerator = float(re.search('[0-9.\-]+', numerator).group())
        except TypeError, msg:
            if self.__run_level > 3:
-                msg = 'no number to process?\n'
-                msg += 'this indicates that the token '
-                msg += ' \(\\li\) should have a number and does not\n'
-                msg += 'numerator is "%s"\n' % numerator
-                msg += 'denominator is "%s"\n' % denominator
+                msg = ('No number to process?\nthis indicates that the token \(\\li\) \
+                should have a number and does not\nnumerator is \
+                "%s"\ndenominator is "%s"\n') % (numerator, denominator)
                raise self.__bug_handler, msg
            if 5 > self.__return_code:
                self.__return_code = 5
@ -698,9 +716,10 @@ class ProcessTokens:
        if string_num[-2:] == ".0":
            string_num = string_num[:-2]
        return string_num
+
    def split_let_num(self, token):
        match_obj = re.search(self.__num_exp,token)
-        if match_obj != None:
+        if match_obj is not None:
            first = match_obj.group(1)
            second = match_obj.group(2)
            if not second:
@ -714,6 +733,7 @@ class ProcessTokens:
                raise self.__bug_handler
            return token, 0
        return first, second
+
    def convert_to_hex(self,number):
        """Convert a string to uppercase hexidecimal"""
        num = int(number)
@ -722,6 +742,7 @@ class ProcessTokens:
            return hex_num
        except:
            raise self.__bug_handler
+
    def process_cw(self, token):
        """Change the value of the control word by determining what dictionary
        it belongs to"""
@ -737,89 +758,62 @@ class ProcessTokens:
        pre, token, action = self.dict_token.get(token, (None, None, None))
        if action:
            return action(pre, token, num)
-    # unused function
-    def initiate_token_actions(self):
-        self.action_for_token={
-        '{'     :   self.ob_func,
-        '}'     :   self.cb_func,
-        '\\'    :   self.process_cw,
-        }
-    # unused function
-    def evaluate_token(self,token):
-        """Evaluate tokens. Return a value if the token is not a
-        control word. Otherwise, pass token onto another method
-        for further evaluation."""
-        token, action = self.dict_token.get(token[0:1])
-        if action:
-            line = action(token)
-            return line
-        else :
-            return  'tx<nu<nu<nu<nu<%s\n' % token
+
    def __check_brackets(self, in_file):
        self.__check_brack_obj = check_brackets.CheckBrackets\
            (file = in_file)
        good_br =  self.__check_brack_obj.check_brackets()[0]
        if not good_br:
            return 1
+
    def process_tokens(self):
        """Main method for handling other methods. """
-        first_token = 0
-        second_token = 0
-        read_obj = open(self.__file, 'r')
-        write_obj = open(self.__write_to, 'w')
-        line_to_read = "dummy"
        line_count = 0
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            token = line_to_read
-            token = token.replace("\n","")
-            if not token:
-                continue
-            line_count += 1
-            try:
-                token.decode('us-ascii')
-            except UnicodeError, msg:
-                msg = str(msg)
-                msg += 'Invalid RTF: File not ascii encoded.\n'
-                raise self.__exception_handler, msg
-            if not first_token:
-                if token != '\\{':
-                    msg = 'Invalid RTF: document doesn\'t start with {\n'
-                    raise self.__exception_handler, msg
-                first_token = 1
-            elif first_token and not second_token:
-                if token[0:4] != '\\rtf':
-                    msg ='Invalid RTF: document doesn\'t start with \\rtf \n'
-                    raise self.__exception_handler, msg
-                second_token = 1
-            ##token = self.evaluate_token(token)
-            the_index = token.find('\\ ')
-            if token != None and  the_index > -1:
-                msg ='Invalid RTF: token "\\ " not valid. \n'
-                raise self.__exception_handler, msg
-            elif token[0:1] == "\\":
-                line = self.process_cw(token)
-                if line != None:
-                    write_obj.write(line)
-            else:
-                fields = re.split(self.__utf_exp, token)
-                for field in fields:
-                    if not field:
-                        continue
-                    if field[0:1] == '&':
-                        write_obj.write('tx<ut<__________<%s\n' % field)
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'wb') as write_obj:
+                for line in read_obj:
+                    token = line.replace("\n","")
+                    line_count += 1
+                    if line_count == 1 and token != '\\{':
+                            msg = 'Invalid RTF: document doesn\'t start with {\n'
+                            raise self.__exception_handler, msg
+                    elif line_count == 2 and token[0:4] != '\\rtf':
+                            msg = 'Invalid RTF: document doesn\'t start with \\rtf \n'
+                            raise self.__exception_handler, msg
+
+                    the_index = token.find('\\ ')
+                    if token is not None and  the_index > -1:
+                        msg = 'Invalid RTF: token "\\ " not valid.\n'
+                        raise self.__exception_handler, msg
+                    elif token[:1] == "\\":
+                        try:
+                            token.decode('us-ascii')
+                        except UnicodeError, msg:
+                            msg = 'Invalid RTF: Tokens not ascii encoded.\n%s' % str(msg)
+                            raise self.__exception_handler, msg
+                        line = self.process_cw(token)
+                        if line is not None:
+                            write_obj.write(line)
                    else:
-                        write_obj.write('tx<nu<__________<%s\n' % field)
-        read_obj.close()
-        write_obj.close()
+                        fields = re.split(self.__utf_exp, token)
+                        for field in fields:
+                            if not field:
+                                continue
+                            if field[0:1] == '&':
+                                write_obj.write('tx<ut<__________<%s\n' % field)
+                            else:
+                                write_obj.write('tx<nu<__________<%s\n' % field)
+
        if not line_count:
-            msg ='Invalid RTF: file appears to be empty. \n'
+            msg = 'Invalid RTF: file appears to be empty.\n'
            raise self.__exception_handler, msg
+
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "processed_tokens.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
+
        bad_brackets = self.__check_brackets(self.__file)
        if bad_brackets:
            msg = 'Invalid RTF: document does not have matching brackets.\n'
--- a/src/calibre/ebooks/rtf2xml/replace_illegals.py
+++ b/src/calibre/ebooks/rtf2xml/replace_illegals.py
@ -16,7 +16,10 @@
 #                                                                       #
 #########################################################################
 import os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+from calibre.utils.cleantext import clean_ascii_chars
+
 class ReplaceIllegals:
    """
    reaplace illegal lower ascii characters
@ -30,21 +33,14 @@ class ReplaceIllegals:
        self.__copy = copy
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
+
    def replace_illegals(self):
        """
        """
-        nums = [0, 1, 2, 3, 4, 5, 6, 7, 8,  11,  13, 14, 15, 16, 17, 18, 19]
-        read_obj = open(self.__file, 'r')
-        write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            for num in nums:
-                line = line.replace(chr(num), '')
-            write_obj.write(line)
-        read_obj.close()
-        write_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as write_obj:
+                for line in read_obj:
+                    write_obj.write(clean_ascii_chars(line))
        copy_obj = copy.Copy()
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "replace_illegals.data")
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@ -16,7 +16,10 @@
 #                                                                       #
 #########################################################################
 import os, re, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+from calibre.utils.mreplace import MReplace
+
 class Tokenize:
    """Tokenize RTF into one line per field. Each line will contain information useful for the rest of the script"""
    def __init__(self,
@ -28,89 +31,175 @@ class Tokenize:
        self.__file = in_file
        self.__bug_handler = bug_handler
        self.__copy = copy
-        self.__special_tokens = [ '_', '~', "'", '{', '}' ]
        self.__write_to = tempfile.mktemp()
-    def __from_ms_to_utf8(self,match_obj):
-        uni_char = int(match_obj.group(1))
-        if uni_char < 0:
-            uni_char +=  65536
-        return   '&#x' + str('%X' % uni_char) + ';'
-    def __neg_unicode_func(self, match_obj):
-        neg_uni_char = int(match_obj.group(1)) * -1
-        # sys.stderr.write(str( neg_uni_char))
-        uni_char = neg_uni_char + 65536
-        return   '&#x' + str('%X' % uni_char) + ';'
-    def __sub_line_reg(self,line):
-        line = line.replace("\\\\", "\\backslash ")
-        line = line.replace("\\~", "\\~ ")
-        line = line.replace("\\;", "\\; ")
-        line = line.replace("&", "&amp;")
-        line = line.replace("<", "&lt;")
-        line = line.replace(">", "&gt;")
-        line = line.replace("\\~", "\\~ ")
-        line = line.replace("\\_", "\\_ ")
-        line = line.replace("\\:", "\\: ")
-        line = line.replace("\\-", "\\- ")
-        # turn into a generic token to eliminate special
-        # cases and make processing easier
-        line = line.replace("\\{", "\\ob ")
-        # turn into a generic token to eliminate special
-        # cases and make processing easier
-        line = line.replace("\\}", "\\cb ")
-        # put a backslash in front of to eliminate special cases and
-        # make processing easier
-        line = line.replace("{", "\\{")
-        # put a backslash in front of to eliminate special cases and
-        # make processing easier
-        line = line.replace("}", "\\}")
-        line = re.sub(self.__utf_exp, self.__from_ms_to_utf8, line)
-        # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
-        line = re.sub(self.__ms_hex_exp, "\\mshex0\g<1> ", line)
-        ##line = line.replace("\\backslash", "\\\\")
-        # this is for older RTF
-        line = re.sub(self.__par_exp, '\\par ', line)
-        return line
-    def __compile_expressions(self):
-        self.__ms_hex_exp = re.compile(r"\\\'(..)")
-        self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}")
-        self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)")
-        self.__par_exp = re.compile(r'\\$')
-        self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
-        ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
-    def __create_tokens(self):
        self.__compile_expressions()
-        read_obj = open(self.__file, 'r')
-        write_obj = open(self.__write_to, 'w')
-        line_to_read = "dummy"
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            line = line.replace("\n", "")
-            line =  self.__sub_line_reg(line)
-            tokens = re.split(self.__splitexp, line)
-            ##print tokens
-            for token in tokens:
-                if token != "":
-                    write_obj.write(token + "\n")
-                    """
-                    match_obj = re.search(self.__mixed_exp, token)
-                    if match_obj != None:
-                        first = match_obj.group(1)
-                        second = match_obj.group(2)
-                        write_obj.write(first + "\n")
-                        write_obj.write(second + "\n")
-                    else:
-                        write_obj.write(token + "\n")
-                    """
-        read_obj.close()
-        write_obj.close()
+        #variables
+        self.__uc_char = 0
+        self.__uc_bin = False
+        self.__uc_value = [1]
+
+    def __reini_utf8_counters(self):
+        self.__uc_char = 0
+        self.__uc_bin = False
+
+    def __remove_uc_chars(self, startchar, token):
+        for i in xrange(startchar, len(token)):
+            if token[i] == " ":
+                continue
+            elif self.__uc_char:
+                self.__uc_char -= 1
+            else:
+                return token[i:]
+        #if only " " and char to skip
+        return ''
+
+    def __unicode_process(self, token):
+        #change scope in
+        if token == '\{':
+            self.__uc_value.append(self.__uc_value[-1])
+            #basic error handling
+            self.__reini_utf8_counters()
+            return token
+        #change scope out
+        elif token == '\}':
+            self.__uc_value.pop()
+            self.__reini_utf8_counters()
+            return token
+        #add a uc control
+        elif token[:3] == '\uc':
+            self.__uc_value[-1] = int(token[3:])
+            self.__reini_utf8_counters()
+            return token
+        #bin data to slip
+        elif self.__uc_bin:
+            self.__uc_bin = False
+            return ''
+        #uc char to remove
+        elif self.__uc_char:
+            #handle \bin tag in case of uc char to skip
+            if token[:4] == '\bin':
+                self.__uc_char -=1
+                self.__uc_bin = True
+                return ''
+            elif token[:1] == "\\" :
+                self.__uc_char -=1
+                return ''
+            else:
+                return self.__remove_uc_chars(0, token)
+        #go for real \u token
+        match_obj = self.__utf_exp.match(token)
+        if match_obj is not None:
+            self.__reini_utf8_counters()
+            #get value and handle negative case
+            uni_char = int(match_obj.group(1))
+            uni_len = len(match_obj.group(1)) + 2
+            if uni_char < 0:
+                uni_char += 65536
+            uni_char = unichr(uni_char).encode('ascii', 'xmlcharrefreplace')
+            self.__uc_char = self.__uc_value[-1]
+            #there is only an unicode char
+            if len(token)<= uni_len:
+                return uni_char
+            #an unicode char and something else
+            #must be after as it is splited on \
+            #necessary? maybe for \bin?
+            elif not self.__uc_char:
+                return uni_char + token[uni_len:]
+            #if not uc0 and chars
+            else:
+                return uni_char + self.__remove_uc_chars(uni_len, token)
+        #default
+        return token
+
+    def __sub_reg_split(self,input_file):
+        input_file = self.__replace_spchar.mreplace(input_file)
+        input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
+        input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
+        #remove \n in bin data
+        input_file = self.__bin_exp.sub(lambda x: \
+                                        x.group().replace('\n', '') + '\n', input_file)
+        #split
+        tokens = re.split(self.__splitexp, input_file)
+        #remove empty tokens and \n
+        return filter(lambda x: len(x) > 0 and x != '\n', tokens)
+        #input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
+        # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
+        # this is for older RTF
+        #line = re.sub(self.__par_exp, '\\par ', line)
+        #return filter(lambda x: len(x) > 0, \
+            #(self.__remove_line.sub('', x) for x in tokens))
+
+    def __compile_expressions(self):
+        SIMPLE_RPL = {
+            "\\\\": "\\backslash ",
+            "\\~": "\\~ ",
+            "\\;": "\\; ",
+            "&": "&amp;",
+            "<": "&lt;",
+            ">": "&gt;",
+            "\\~": "\\~ ",
+            "\\_": "\\_ ",
+            "\\:": "\\: ",
+            "\\-": "\\- ",
+            # turn into a generic token to eliminate special
+            # cases and make processing easier
+            "\\{": "\\ob ",
+            # turn into a generic token to eliminate special
+            # cases and make processing easier
+            "\\}": "\\cb ",
+            # put a backslash in front of to eliminate special cases and
+            # make processing easier
+            "{": "\\{",
+            # put a backslash in front of to eliminate special cases and
+            # make processing easier
+            "}": "\\}",
+            # this is for older RTF
+            r'\\$': '\\par ',
+            }
+        self.__replace_spchar = MReplace(SIMPLE_RPL)
+        #add ;? in case of char following \u
+        self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
+        self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
+        self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
+        #manage upr/ud situations
+        self.__utf_ud = re.compile(r"\\{[\n ]?\\upr[\n ]?(?:\\{.*?\\})[\n ]?" + \
+                       r"\\{[\n ]?\\*[\n ]?\\ud[\n ]?(\\{.*?\\})[\n ]?\\}[\n ]?\\}")
+        #add \n in split for whole file reading
+        #why keep backslash whereas \is replaced before?
+        #remove \n from endline char
+        self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
+        #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
+        #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
+        #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
+        #self.__par_exp = re.compile(r'\\$')
+        #self.__remove_line = re.compile(r'\n+')
+        #self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
+        ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
+
    def tokenize(self):
-        """Main class for handling other methods. Reads in one line \
-        at a time, usues method self.sub_line to make basic substitutions,\
-        uses ? to process tokens"""
-        self.__create_tokens()
+        """Main class for handling other methods. Reads the file \
+        , uses method self.sub_reg to make basic substitutions,\
+        and process tokens by itself"""
+        #read
+        with open(self.__file, 'r') as read_obj:
+            input_file = read_obj.read()
+        
+        #process simple replacements and split giving us a correct list
+        #remove '' and \n in the process
+        tokens = self.__sub_reg_split(input_file)
+        #correct unicode
+        tokens = map(self.__unicode_process, tokens)
+        #remove empty items created by removing \uc
+        tokens = filter(lambda x: len(x) > 0, tokens)
+        
+        #write
+        with open(self.__write_to, 'wb') as write_obj:
+            write_obj.write('\n'.join(tokens))
+        #Move and copy
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "tokenize.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
+        
+        #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
--- a/src/calibre/ebooks/textile/init.py
+++ b/src/calibre/ebooks/textile/init.py
@ -1,3 +1,6 @@
 from functions import textile, textile_restricted, Textile

+if False:
+    textile, textile_restricted, Textile
+
 __all__ = ['textile', 'textile_restricted']
--- a/src/calibre/ebooks/textile/functions.py
+++ b/src/calibre/ebooks/textile/functions.py
@ -425,7 +425,7 @@ class Textile(object):
        text = text.split('\n\n')

        tag = 'p'
-        atts = cite = graf = ext = ''
+        atts = cite = graf = ext = c1 = ''

        out = []

@ -441,14 +441,14 @@ class Textile(object):
                h_match = re.search(r'h([1-6])', tag)
                if h_match:
                    head_level, = h_match.groups()
-                    tag = 'h%i' % max(1, 
+                    tag = 'h%i' % max(1,
                                      min(int(head_level) + head_offset,
                                          6))
-                o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext, 
+                o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext,
                                                      cite, graf)
                # leave off c1 if this block is extended,
                # we'll close it at the start of the next block
-                
+
                if ext:
                    line = "%s%s%s%s" % (o1, o2, content, c2)
                else:
@ -772,7 +772,7 @@ class Textile(object):

        if pre == None:
            pre = ''
-            
+
        # assume ) at the end of the url is not actually part of the url
        # unless the url also contains a (
        if url.endswith(')') and not url.find('(') > -1:
@ -875,7 +875,7 @@ class Textile(object):
            atts = atts + ' title="%s" alt="%s"' % (title, title)
        else:
            atts = atts + ' alt=""'
-            
+
        if not self.isRelURL(url) and self.get_sizes:
            size = getimagesize(url)
            if (size):
@ -894,7 +894,7 @@ class Textile(object):
            out.append('<img src="%s"%s>' % (url, atts))
        else:
            out.append('<img src="%s"%s />' % (url, atts))
-        if href: 
+        if href:
            out.append('</a>')

        return ''.join(out)
@ -970,7 +970,7 @@ def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
    When lite=True is set (the default):
    Block tags are restricted to p, bq, and bc.
    Lists and tables are disabled.
-    
+
    When noimage=True is set (the default):
    Image tags are disabled.

--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -1,4 +1,8 @@
 # -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+

 '''
 Read content from txt file.
@ -7,15 +11,10 @@ Read content from txt file.
 import os, re

 from calibre import prepare_string_for_xml, isbytestring
-from calibre.ebooks.markdown import markdown
-from calibre.ebooks.textile import textile
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor
 from calibre.ebooks.conversion.preprocess import DocAnalysis
-
-__license__   = 'GPL v3'
-__copyright__ = '2009, John Schember <john@nachtimwald.com>'
-__docformat__ = 'restructuredtext en'
+from calibre.utils.cleantext import clean_ascii_chars

 HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'

@ -35,10 +34,8 @@ def clean_txt(txt):
    # Remove excessive line breaks.
    txt = re.sub('\n{3,}', '\n\n', txt)
    #remove ASCII invalid chars : 0 to 8 and 11-14 to 24
-    chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19))
-    illegal_chars = re.compile(u'|'.join(map(unichr, chars)))
-    txt = illegal_chars.sub('', txt)
-    
+    txt = clean_ascii_chars(txt)
+
    return txt

 def split_txt(txt, epub_split_size_kb=0):
@ -75,6 +72,7 @@ def convert_heuristic(txt, title='', epub_split_size_kb=0):
    return tp.convert(txt, title, epub_split_size_kb)

 def convert_markdown(txt, title='', disable_toc=False):
+    from calibre.ebooks.markdown import markdown
    md = markdown.Markdown(
          extensions=['footnotes', 'tables', 'toc'],
          extension_configs={"toc": {"disable_toc": disable_toc}},
@ -82,6 +80,7 @@ def convert_markdown(txt, title='', disable_toc=False):
    return HTML_TEMPLATE % (title, md.convert(txt))

 def convert_textile(txt, title=''):
+    from calibre.ebooks.textile import textile
    html = textile(txt, encoding='utf-8')
    return HTML_TEMPLATE % (title, html)

@ -120,43 +119,43 @@ def split_string_separator(txt, size) :
 def detect_paragraph_type(txt):
    '''
    Tries to determine the formatting of the document.
-    
+
    block: Paragraphs are separated by a blank line.
    single: Each line is a paragraph.
    print: Each paragraph starts with a 2+ spaces or a tab
           and ends when a new paragraph is reached.
    unformatted: most lines have hard line breaks, few/no blank lines or indents
-    
+
    returns block, single, print, unformatted
    '''
    txt = txt.replace('\r\n', '\n')
    txt = txt.replace('\r', '\n')
    txt_line_count = len(re.findall('(?mu)^\s*.+$', txt))
-    
+
    # Check for hard line breaks - true if 55% of the doc breaks in the same region
    docanalysis = DocAnalysis('txt', txt)
    hardbreaks = docanalysis.line_histogram(.55)
-    
+
    if hardbreaks:
        # Determine print percentage
        tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt))
        print_percent = tab_line_count / float(txt_line_count)
-     
+
        # Determine block percentage
        empty_line_count = len(re.findall('(?mu)^\s*$', txt))
        block_percent = empty_line_count / float(txt_line_count)
-        
+
        # Compare the two types - the type with the larger number of instances wins
        # in cases where only one or the other represents the vast majority of the document neither wins
        if print_percent >= block_percent:
            if .15 <= print_percent <= .75:
                return 'print'
        elif .15 <= block_percent <= .75:
-            return 'block'     
+            return 'block'

-        # Assume unformatted text with hardbreaks if nothing else matches        
+        # Assume unformatted text with hardbreaks if nothing else matches
        return 'unformatted'
-    
+
    # return single if hardbreaks is false
    return 'single'

@ -164,17 +163,17 @@ def detect_paragraph_type(txt):
 def detect_formatting_type(txt):
    markdown_count = 0
    textile_count = 0
-    
+
    # Check for markdown
    # Headings
-    markdown_count += len(re.findall('(?mu)^#+', txt)) 
+    markdown_count += len(re.findall('(?mu)^#+', txt))
    markdown_count += len(re.findall('(?mu)^=+$', txt))
    markdown_count += len(re.findall('(?mu)^-+$', txt))
    # Images
    markdown_count += len(re.findall('(?u)!\[.*?\]\(.+?\)', txt))
    # Links
    markdown_count += len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt))
-        
+
    # Check for textile
    # Headings
    textile_count += len(re.findall(r'(?mu)^h[1-6]\.', txt))
@ -184,11 +183,11 @@ def detect_formatting_type(txt):
    textile_count += len(re.findall(r'\![^\s]+(:[^\s]+)*', txt))
    # Links
    textile_count += len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt))
-    
+
    if markdown_count > 5 or textile_count > 5:
        if markdown_count > textile_count:
            return 'markdown'
        else:
            return 'textile'
-    
+
    return 'heuristic'
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -269,10 +269,14 @@ def question_dialog(parent, title, msg, det_msg='', show_copy_button=True,

    return d.exec_() == yes_button

-def info_dialog(parent, title, msg, det_msg='', show=False):
+def info_dialog(parent, title, msg, det_msg='', show=False,
+        show_copy_button=True):
    d = MessageBox(QMessageBox.Information, title, msg, QMessageBox.Ok,
                    parent, det_msg)
    d.setIconPixmap(QPixmap(I('dialog_information.png')))
+    if not show_copy_button:
+        d.cb.setVisible(False)
+
    if show:
        return d.exec_()
    return d
--- a/src/calibre/gui2/catalog/catalog_bibtex.py
+++ b/src/calibre/gui2/catalog/catalog_bibtex.py
@ -27,14 +27,17 @@ class PluginWidget(QWidget, Ui_Form):
    def __init__(self, parent=None):
        QWidget.__init__(self, parent)
        self.setupUi(self)
-        from calibre.library.catalog import FIELDS
-        self.all_fields = []
-        for x in FIELDS :
-            if x != 'all':
-                self.all_fields.append(x)
-                QListWidgetItem(x, self.db_fields)

    def initialize(self, name, db): #not working properly to update
+        from calibre.library.catalog import FIELDS
+
+        self.all_fields = [x for x in FIELDS if x != 'all']
+        #add custom columns
+        self.all_fields.extend([x for x in sorted(db.custom_field_keys())])
+        #populate
+        for x in self.all_fields:
+            QListWidgetItem(x, self.db_fields)
+
        self.name = name
        fields = gprefs.get(name+'_db_fields', self.all_fields)
        # Restore the activated db_fields from last use
--- a/src/calibre/gui2/dialogs/drm_error.py
+++ b/src/calibre/gui2/dialogs/drm_error.py
@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from PyQt4.Qt import QDialog
+from calibre.gui2.dialogs.drm_error_ui import Ui_Dialog
+
+class DRMErrorMessage(QDialog, Ui_Dialog):
+
+    def __init__(self, parent=None, title=None):
+        QDialog.__init__(self, parent)
+        self.setupUi(self)
+        if title is not None:
+            t = unicode(self.msg.text())
+            self.msg.setText('<h2>%s</h2>%s'%(title, t))
+        self.resize(self.sizeHint())
+
--- a/src/calibre/gui2/dialogs/drm_error.ui
+++ b/src/calibre/gui2/dialogs/drm_error.ui
@ -0,0 +1,102 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>Dialog</class>
+ <widget class="QDialog" name="Dialog">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>417</width>
+    <height>235</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>This book is DRMed</string>
+  </property>
+  <layout class="QGridLayout" name="gridLayout">
+   <item row="0" column="0">
+    <widget class="QLabel" name="label">
+     <property name="sizePolicy">
+      <sizepolicy hsizetype="Preferred" vsizetype="Preferred">
+       <horstretch>0</horstretch>
+       <verstretch>0</verstretch>
+      </sizepolicy>
+     </property>
+     <property name="maximumSize">
+      <size>
+       <width>132</width>
+       <height>16777215</height>
+      </size>
+     </property>
+     <property name="text">
+      <string/>
+     </property>
+     <property name="pixmap">
+      <pixmap resource="../../../../resources/images.qrc">:/images/document-encrypt.png</pixmap>
+     </property>
+    </widget>
+   </item>
+   <item row="0" column="1">
+    <widget class="QLabel" name="msg">
+     <property name="text">
+      <string>&lt;p&gt;This book is locked by &lt;b&gt;DRM&lt;/b&gt;. To learn more about DRM and why you cannot read or convert this book in calibre, 
+&lt;a href=&quot;http://bugs.calibre-ebook.com/wiki/DRM&quot;&gt;click here&lt;/a&gt;.</string>
+     </property>
+     <property name="wordWrap">
+      <bool>true</bool>
+     </property>
+     <property name="openExternalLinks">
+      <bool>true</bool>
+     </property>
+    </widget>
+   </item>
+   <item row="1" column="0" colspan="2">
+    <widget class="QDialogButtonBox" name="buttonBox">
+     <property name="orientation">
+      <enum>Qt::Horizontal</enum>
+     </property>
+     <property name="standardButtons">
+      <set>QDialogButtonBox::Close</set>
+     </property>
+    </widget>
+   </item>
+  </layout>
+ </widget>
+ <resources>
+  <include location="../../../../resources/images.qrc"/>
+ </resources>
+ <connections>
+  <connection>
+   <sender>buttonBox</sender>
+   <signal>accepted()</signal>
+   <receiver>Dialog</receiver>
+   <slot>accept()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>248</x>
+     <y>254</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>157</x>
+     <y>274</y>
+    </hint>
+   </hints>
+  </connection>
+  <connection>
+   <sender>buttonBox</sender>
+   <signal>rejected()</signal>
+   <receiver>Dialog</receiver>
+   <slot>reject()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>316</x>
+     <y>260</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>286</x>
+     <y>274</y>
+    </hint>
+   </hints>
+  </connection>
+ </connections>
+</ui>
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -15,7 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_string
 from calibre.ebooks.metadata.book.base import composite_formatter
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.gui2.custom_column_widgets import populate_metadata_page
-from calibre.gui2 import error_dialog
+from calibre.gui2 import error_dialog, ResizableDialog
 from calibre.gui2.progress_indicator import ProgressIndicator
 from calibre.utils.config import dynamic
 from calibre.utils.titlecase import titlecase
@ -49,7 +49,7 @@ def get_cover_data(path):



-class MyBlockingBusy(QDialog):
+class MyBlockingBusy(QDialog): # {{{

    do_one_signal = pyqtSignal()

@ -241,8 +241,9 @@ class MyBlockingBusy(QDialog):
        self.current_index += 1
        self.do_one_signal.emit()

+    # }}}

-class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
+class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):

    s_r_functions = {       ''              : lambda x: x,
                            _('Lower Case') : lambda x: icu_lower(x),
@ -261,9 +262,8 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
                        ]

    def __init__(self, window, rows, model, tab):
-        QDialog.__init__(self, window)
+        ResizableDialog.__init__(self, window)
        Ui_MetadataBulkDialog.__init__(self)
-        self.setupUi(self)
        self.model = model
        self.db = model.db
        self.ids = [self.db.id(r) for r in rows]
--- a/src/calibre/gui2/dialogs/metadata_bulk.ui
+++ b/src/calibre/gui2/dialogs/metadata_bulk.ui
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -823,7 +823,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                                if book.series_index is not None:
                                    self.series_index.setValue(book.series_index)
                        if book.has_cover:
-                            if d.opt_auto_download_cover.isChecked() and book.has_cover:
+                            if d.opt_auto_download_cover.isChecked():
                                self.fetch_cover()
                            else:
                                self.fetch_cover_button.setFocus(Qt.OtherFocusReason)
--- a/src/calibre/gui2/dialogs/user_profiles.py
+++ b/src/calibre/gui2/dialogs/user_profiles.py
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import time, os

 from PyQt4.Qt import SIGNAL, QUrl, QAbstractListModel, Qt, \
-        QVariant, QInputDialog
+        QVariant

 from calibre.web.feeds.recipes import compile_recipe
 from calibre.web.feeds.news import AutomaticNewsRecipe
@ -256,24 +256,61 @@ class %(classname)s(%(base_class)s):

    def add_builtin_recipe(self):
        from calibre.web.feeds.recipes.collection import \
-            get_builtin_recipe_by_title, get_builtin_recipe_titles
-        items = sorted(get_builtin_recipe_titles(), key=sort_key)
+            get_builtin_recipe_collection, get_builtin_recipe_by_id
+        from PyQt4.Qt import QDialog, QVBoxLayout, QListWidgetItem, \
+                QListWidget, QDialogButtonBox, QSize

+        d = QDialog(self)
+        d.l = QVBoxLayout()
+        d.setLayout(d.l)
+        d.list = QListWidget(d)
+        d.list.doubleClicked.connect(lambda x: d.accept())
+        d.l.addWidget(d.list)
+        d.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
+                Qt.Horizontal, d)
+        d.bb.accepted.connect(d.accept)
+        d.bb.rejected.connect(d.reject)
+        d.l.addWidget(d.bb)
+        d.setWindowTitle(_('Choose builtin recipe'))
+        items = []
+        for r in get_builtin_recipe_collection():
+            id_ = r.get('id', '')
+            title = r.get('title', '')
+            lang = r.get('language', '')
+            if id_ and title:
+                items.append((title + ' [%s]'%lang, id_))

-        title, ok = QInputDialog.getItem(self, _('Pick recipe'), _('Pick the recipe to customize'),
-                                     items, 0, False)
-        if ok:
-            title = unicode(title)
-            profile = get_builtin_recipe_by_title(title)
-            if self._model.has_title(title):
-                if question_dialog(self, _('Replace recipe?'),
-                    _('A custom recipe named %s already exists. Do you want to '
-                        'replace it?')%title):
-                    self._model.replace_by_title(title, profile)
-                else:
-                    return
+        items.sort(key=lambda x:sort_key(x[0]))
+        for title, id_ in items:
+            item = QListWidgetItem(title)
+            item.setData(Qt.UserRole, id_)
+            d.list.addItem(item)
+
+        d.resize(QSize(450, 400))
+        ret = d.exec_()
+        d.list.doubleClicked.disconnect()
+        if ret != d.Accepted:
+            return
+
+        items = list(d.list.selectedItems())
+        if not items:
+            return
+        item = items[-1]
+        id_ = unicode(item.data(Qt.UserRole).toString())
+        title = unicode(item.data(Qt.DisplayRole).toString()).rpartition(' [')[0]
+        profile = get_builtin_recipe_by_id(id_)
+        if profile is None:
+            raise Exception('Something weird happened')
+
+        if self._model.has_title(title):
+            if question_dialog(self, _('Replace recipe?'),
+                _('A custom recipe named %s already exists. Do you want to '
+                    'replace it?')%title):
+                self._model.replace_by_title(title, profile)
            else:
-                self.model.add(title, profile)
+                return
+        else:
+            self.model.add(title, profile)

        self.clear()

--- a/src/calibre/gui2/layout.py
+++ b/src/calibre/gui2/layout.py
@ -8,9 +8,9 @@ __docformat__ = 'restructuredtext en'
 from functools import partial

 from PyQt4.Qt import QIcon, Qt, QWidget, QToolBar, QSize, \
-    pyqtSignal, QToolButton, QPushButton, \
-    QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup, \
-    QMenu
+    pyqtSignal, QToolButton, QMenu, QCheckBox, \
+    QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup
+

 from calibre.constants import __appname__
 from calibre.gui2.search_box import SearchBox2, SavedSearchBox
@ -178,7 +178,9 @@ class SearchBar(QWidget): # {{{
        x.setToolTip(_("<p>Search the list of books by title, author, publisher, tags, comments, etc.<br><br>Words separated by spaces are ANDed"))
        l.addWidget(x)

-        self.search_button = QPushButton(_('&Go!'))
+        self.search_button = QToolButton()
+        self.search_button.setToolButtonStyle(Qt.ToolButtonTextOnly)
+        self.search_button.setText(_('&Go!'))
        l.addWidget(self.search_button)
        self.search_button.setSizePolicy(QSizePolicy.Minimum,
                QSizePolicy.Minimum)
@ -192,6 +194,12 @@ class SearchBar(QWidget): # {{{
        l.addWidget(x)
        x.setToolTip(_("Reset Quick Search"))

+        x = parent.search_highlight_only = QCheckBox()
+        x.setText(_('&Highlight'))
+        x.setToolTip(_('Highlight matched books in the book list, instead '
+            'of restricting the book list to the matches.'))
+        l.addWidget(x)
+
        x = parent.saved_search = SavedSearchBox(self)
        x.setMaximumSize(QSize(150, 16777215))
        x.setMinimumContentsLength(15)
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@ -10,7 +10,7 @@ from contextlib import closing
 from operator import attrgetter

 from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \
-        QModelIndex, QVariant, QDate
+        QModelIndex, QVariant, QDate, QColor

 from calibre.gui2 import NONE, config, UNDEFINED_QDATE
 from calibre.utils.pyparsing import ParseException
@ -93,6 +93,9 @@ class BooksModel(QAbstractTableModel): # {{{
        self.bool_no_icon = QIcon(I('list_remove.png'))
        self.bool_blank_icon = QIcon(I('blank.png'))
        self.device_connected = False
+        self.rows_matching = set()
+        self.lowest_row_matching = None
+        self.highlight_only = False
        self.read_config()

    def change_alignment(self, colname, alignment):
@ -229,9 +232,27 @@ class BooksModel(QAbstractTableModel): # {{{
            self.endInsertRows()
            self.count_changed()

+    def set_highlight_only(self, toWhat):
+        self.highlight_only = toWhat
+        if self.last_search:
+            self.research()
+
    def search(self, text, reset=True):
        try:
-            self.db.search(text)
+            if self.highlight_only:
+                self.db.search('')
+                if not text:
+                    self.rows_matching = set()
+                    self.lowest_row_matching = None
+                else:
+                    self.rows_matching = self.db.search(text, return_matches=True)
+                    if self.rows_matching:
+                        self.lowest_row_matching = self.db.row(self.rows_matching[0])
+                    self.rows_matching = set(self.rows_matching)
+            else:
+                self.rows_matching = set()
+                self.lowest_row_matching = None
+                self.db.search(text)
        except ParseException as e:
            self.searched.emit(e.msg)
            return
@ -337,8 +358,9 @@ class BooksModel(QAbstractTableModel): # {{{
            name, val = mi.format_field(key)
            if mi.metadata_for_field(key)['datatype'] == 'comments':
                name += ':html'
-            if val:
+            if val and name not in data:
                data[name] = val
+
        return data


@ -651,6 +673,9 @@ class BooksModel(QAbstractTableModel): # {{{
            return NONE
        if role in (Qt.DisplayRole, Qt.EditRole):
            return self.column_to_dc_map[col](index.row())
+        elif role == Qt.BackgroundColorRole:
+            if self.id(index) in self.rows_matching:
+                return QColor('lightgreen')
        elif role == Qt.DecorationRole:
            if self.column_to_dc_decorator_map[col] is not None:
                return self.column_to_dc_decorator_map[index.column()](index.row())
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -680,8 +680,14 @@ class BooksView(QTableView): # {{{
    def set_editable(self, editable, supports_backloading):
        self._model.set_editable(editable)

+    def search_proxy(self, txt):
+        self._model.search(txt)
+        if self._model.lowest_row_matching is not None:
+            self.select_rows([self._model.lowest_row_matching], using_ids=False)
+        self.setFocus(Qt.OtherFocusReason)
+
    def connect_to_search_box(self, sb, search_done):
-        sb.search.connect(self._model.search)
+        sb.search.connect(self.search_proxy)
        self._search_done = search_done
        self._model.searched.connect(self.search_done)

--- a/src/calibre/gui2/preferences/plugins.py
+++ b/src/calibre/gui2/preferences/plugins.py
@ -15,7 +15,8 @@ from calibre.gui2.preferences.plugins_ui import Ui_Form
 from calibre.customize.ui import initialized_plugins, is_disabled, enable_plugin, \
                                 disable_plugin, plugin_customization, add_plugin, \
                                 remove_plugin
-from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files
+from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files, \
+        question_dialog

 class PluginModel(QAbstractItemModel): # {{{

@ -76,6 +77,16 @@ class PluginModel(QAbstractItemModel): # {{{
                    return self.index(j, 0, parent)
        return QModelIndex()

+    def plugin_to_index_by_properties(self, plugin):
+        for i, category in enumerate(self.categories):
+            parent = self.index(i, 0, QModelIndex())
+            for j, p in enumerate(self._data[category]):
+                if plugin.name == p.name and plugin.type == p.type and \
+                        plugin.author == p.author and plugin.version == p.version:
+                    return self.index(j, 0, parent)
+        return QModelIndex()
+
+
    def refresh_plugin(self, plugin, rescan=False):
        if rescan:
            self.populate()
@ -132,7 +143,6 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.toggle_plugin_button.clicked.connect(self.toggle_plugin)
        self.customize_plugin_button.clicked.connect(self.customize_plugin)
        self.remove_plugin_button.clicked.connect(self.remove_plugin)
-        self.button_plugin_browse.clicked.connect(self.find_plugin)
        self.button_plugin_add.clicked.connect(self.add_plugin)

    def toggle_plugin(self, *args):
@ -149,23 +159,39 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.modify_plugin(op='remove')

    def add_plugin(self):
-        path = unicode(self.plugin_path.text())
-        if path and os.access(path, os.R_OK) and path.lower().endswith('.zip'):
-            add_plugin(path)
+        path = choose_files(self, 'add a plugin dialog', _('Add plugin'),
+                filters=[(_('Plugins'), ['zip'])], all_files=False,
+                    select_only_single_file=True)
+        if not path:
+            return
+        path = path[0]
+        if path and  os.access(path, os.R_OK) and path.lower().endswith('.zip'):
+            if not question_dialog(self, _('Are you sure?'), '<p>' + \
+                    _('Installing plugins is a <b>security risk</b>. '
+                    'Plugins can contain a virus/malware. '
+                        'Only install it if you got it from a trusted source.'
+                        ' Are you sure you want to proceed?'),
+                    show_copy_button=False):
+                return
+            plugin = add_plugin(path)
            self._plugin_model.populate()
            self._plugin_model.reset()
            self.changed_signal.emit()
-            self.plugin_path.setText('')
+            info_dialog(self, _('Success'),
+                    _('Plugin <b>{0}</b> successfully installed under <b>'
+                        ' {1} plugins</b>. You may have to restart calibre '
+                        'for the plugin to take effect.').format(plugin.name, plugin.type),
+                    show=True, show_copy_button=False)
+            idx = self._plugin_model.plugin_to_index_by_properties(plugin)
+            if idx.isValid():
+                self.plugin_view.scrollTo(idx,
+                        self.plugin_view.PositionAtCenter)
+                self.plugin_view.scrollTo(idx,
+                        self.plugin_view.PositionAtCenter)
        else:
            error_dialog(self, _('No valid plugin path'),
                         _('%s is not a valid plugin path')%path).exec_()

-    def find_plugin(self):
-        path = choose_files(self, 'choose plugin dialog', _('Choose plugin'),
-                            filters=[('Plugins', ['zip'])], all_files=False,
-                            select_only_single_file=True)
-        if path:
-            self.plugin_path.setText(path[0])

    def modify_plugin(self, op=''):
        index = self.plugin_view.currentIndex()
@ -191,10 +217,13 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
                if plugin.do_user_config():
                    self._plugin_model.refresh_plugin(plugin)
            elif op == 'remove':
+                msg = _('Plugin {0} successfully removed').format(plugin.name)
                if remove_plugin(plugin):
                    self._plugin_model.populate()
                    self._plugin_model.reset()
                    self.changed_signal.emit()
+                    info_dialog(self, _('Success'), msg, show=True,
+                            show_copy_button=False)
                else:
                    error_dialog(self, _('Cannot remove builtin plugin'),
                         plugin.name + _(' cannot be removed. It is a '
--- a/src/calibre/gui2/preferences/plugins.ui
+++ b/src/calibre/gui2/preferences/plugins.ui
@ -72,64 +72,14 @@
    </layout>
   </item>
   <item>
-    <widget class="QGroupBox" name="groupBox_4">
-     <property name="title">
-      <string>Add new plugin</string>
+    <widget class="QPushButton" name="button_plugin_add">
+     <property name="text">
+      <string>&amp;Add a new plugin</string>
+     </property>
+     <property name="icon">
+      <iconset resource="../../../../resources/images.qrc">
+       <normaloff>:/images/plugins.png</normaloff>:/images/plugins.png</iconset>
     </property>
-     <layout class="QVBoxLayout" name="verticalLayout_5">
-      <item>
-       <layout class="QHBoxLayout" name="horizontalLayout_5">
-        <item>
-         <widget class="QLabel" name="label_14">
-          <property name="text">
-           <string>Plugin &amp;file:</string>
-          </property>
-          <property name="buddy">
-           <cstring>plugin_path</cstring>
-          </property>
-         </widget>
-        </item>
-        <item>
-         <widget class="QLineEdit" name="plugin_path"/>
-        </item>
-        <item>
-         <widget class="QToolButton" name="button_plugin_browse">
-          <property name="text">
-           <string>...</string>
-          </property>
-          <property name="icon">
-           <iconset resource="../../../../resources/images.qrc">
-            <normaloff>:/images/document_open.png</normaloff>:/images/document_open.png</iconset>
-          </property>
-         </widget>
-        </item>
-       </layout>
-      </item>
-      <item>
-       <layout class="QHBoxLayout" name="horizontalLayout_4">
-        <item>
-         <spacer name="horizontalSpacer_2">
-          <property name="orientation">
-           <enum>Qt::Horizontal</enum>
-          </property>
-          <property name="sizeHint" stdset="0">
-           <size>
-            <width>40</width>
-            <height>20</height>
-           </size>
-          </property>
-         </spacer>
-        </item>
-        <item>
-         <widget class="QPushButton" name="button_plugin_add">
-          <property name="text">
-           <string>&amp;Add</string>
-          </property>
-         </widget>
-        </item>
-       </layout>
-      </item>
-     </layout>
    </widget>
   </item>
  </layout>
--- a/src/calibre/gui2/preferences/toolbar.py
+++ b/src/calibre/gui2/preferences/toolbar.py
@ -37,7 +37,10 @@ class BaseModel(QAbstractListModel):
                    dont_remove_from=set(['toolbar-device']))
        if name is None:
            return FakeAction('--- '+_('Separator')+' ---', None)
-        return gui.iactions[name]
+        try:
+            return gui.iactions[name]
+        except:
+            return None

    def rowCount(self, parent):
        return len(self._data)
@ -124,7 +127,8 @@ class CurrentModel(BaseModel):
        BaseModel.__init__(self)
        self.gprefs_name = 'action-layout-'+key
        current = gprefs[self.gprefs_name]
-        self._data =  [self.name_to_action(x, gui) for x in current]
+        self._data = [self.name_to_action(x, gui) for x in current]
+        self._data = [x for x in self._data if x is not None]
        self.key = key
        self.gui = gui

--- a/src/calibre/gui2/search_box.py
+++ b/src/calibre/gui2/search_box.py
@ -16,6 +16,7 @@ from calibre.gui2 import config
 from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor
 from calibre.gui2.dialogs.search import SearchDialog
+from calibre.utils.config import dynamic
 from calibre.utils.search_query_parser import saved_searches
 from calibre.utils.icu import sort_key

@ -375,6 +376,9 @@ class SearchBoxMixin(object): # {{{
            unicode(self.search.toolTip())))
        self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip())
        self.clear_button.setStatusTip(self.clear_button.toolTip())
+        self.search_highlight_only.stateChanged.connect(self.highlight_only_changed)
+        self.search_highlight_only.setChecked(
+                            dynamic.get('search_highlight_only', False))

    def focus_search_box(self, *args):
        self.search.setFocus(Qt.OtherFocusReason)
@ -401,6 +405,11 @@ class SearchBoxMixin(object): # {{{
    def focus_to_library(self):
        self.current_view().setFocus(Qt.OtherFocusReason)

+    def highlight_only_changed(self, toWhat):
+        dynamic.set('search_highlight_only', toWhat)
+        self.current_view().model().set_highlight_only(toWhat)
+        self.focus_to_library()
+
    # }}}

 class SavedSearchBoxMixin(object): # {{{
--- a/src/calibre/gui2/shortcuts.py
+++ b/src/calibre/gui2/shortcuts.py
@ -150,7 +150,7 @@ class Delegate(QStyledItemDelegate):
        custom = []
        if editor.custom.isChecked():
            for x in ('1', '2'):
-                sc = getattr(editor, 'shortcut'+x)
+                sc = getattr(editor, 'shortcut'+x, None)
                if sc is not None:
                    custom.append(sc)

@ -266,6 +266,11 @@ class ShortcutConfig(QWidget):
        self.view.scrollTo(index)


+    @property
+    def is_editing(self):
+        return self.view.state() == self.view.EditingState
+
+
 if __name__ == '__main__':
    from calibre.gui2 import is_ok_to_use_qt
    from calibre.gui2.viewer.keys import SHORTCUTS
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -19,7 +19,7 @@ from PyQt4.Qt import Qt, SIGNAL, QTimer, \
                     QMessageBox, QHelpEvent

 from calibre import  prints
-from calibre.constants import __appname__, isosx, DEBUG
+from calibre.constants import __appname__, isosx
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.config import prefs, dynamic
 from calibre.utils.ipc.server import Server
@ -103,7 +103,15 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
        self.gui_debug = gui_debug
        acmap = OrderedDict()
        for action in interface_actions():
-            ac = action.load_actual_plugin(self)
+            try:
+                ac = action.load_actual_plugin(self)
+            except:
+                # Ignore errors in loading user supplied plugins
+                import traceback
+                traceback.print_exc()
+                if ac.plugin_path is None:
+                    raise
+
            ac.plugin_path = action.plugin_path
            ac.interface_action_base_plugin = action
            if ac.name in acmap:
@ -460,12 +468,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
        try:
            if 'calibre.ebooks.DRMError' in job.details:
                if not minz:
-                    d = error_dialog(self, _('Conversion Error'),
-                        _('<p>Could not convert: %s<p>It is a '
-                        '<a href="%s">DRM</a>ed book. You must first remove the '
-                        'DRM using third party tools.')%\
-                            (job.description.split(':')[-1],
-                                'http://bugs.calibre-ebook.com/wiki/DRM'))
+                    from calibre.gui2.dialogs.drm_error import DRMErrorMessage
+                    d = DRMErrorMessage(self, job.description.split(':')[-1])
                    d.setModal(False)
                    d.show()
                    self._modeless_dialogs.append(d)
@ -582,9 +586,6 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
            # Goes here, because if cf is valid, db is valid.
            db.prefs['field_metadata'] = db.field_metadata.all_metadata()
            db.commit_dirty_cache()
-            if DEBUG and db.gm_count > 0:
-                print 'get_metadata cache: {0:d} calls, {1:4.2f}% misses'.format(
-                        db.gm_count, (db.gm_missed*100.0)/db.gm_count)
        for action in self.iactions.values():
            if not action.shutting_down():
                return
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -120,6 +120,13 @@ class ConfigDialog(QDialog, Ui_Dialog):


    def accept(self, *args):
+        if self.shortcut_config.is_editing:
+            from calibre.gui2 import info_dialog
+            info_dialog(self, _('Still editing'),
+                    _('You are in the middle of editing a keyboard shortcut'
+                        ' first complete that, by clicking outside the '
+                        ' shortcut editing box.'), show=True)
+            return
        c = config()
        c.set('serif_family', unicode(self.serif_family.currentFont().family()))
        c.set('sans_family', unicode(self.sans_family.currentFont().family()))
@ -279,7 +286,7 @@ class Document(QWebPage): # {{{

    @pyqtSignature("")
    def init_hyphenate(self):
-        if self.hyphenate:
+        if self.hyphenate and getattr(self, 'loaded_lang', ''):
            self.javascript('do_hyphenation("%s")'%self.loaded_lang)

    def after_load(self):
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -26,6 +26,7 @@ from calibre.gui2.search_box import SearchBox2
 from calibre.ebooks.metadata import MetaInformation
 from calibre.customize.ui import available_input_formats
 from calibre.gui2.viewer.dictionary import Lookup
+from calibre import as_unicode

 class TOCItem(QStandardItem):

@ -626,13 +627,12 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
            QApplication.processEvents()
        if worker.exception is not None:
            if isinstance(worker.exception, DRMError):
-                error_dialog(self, _('DRM Error'),
-                        _('<p>This book is protected by <a href="%s">DRM</a>')
-                        %'http://wiki.mobileread.com/wiki/DRM').exec_()
+                from calibre.gui2.dialogs.drm_error import DRMErrorMessage
+                DRMErrorMessage(self).exec_()
            else:
                r = getattr(worker.exception, 'reason', worker.exception)
                error_dialog(self, _('Could not open ebook'),
-                        unicode(r), det_msg=worker.traceback, show=True)
+                        as_unicode(r), det_msg=worker.traceback, show=True)
            self.close_progress_indicator()
        else:
            self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:])
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -411,7 +411,8 @@ class ResultCache(SearchQueryParser): # {{{
            if isinstance(location, list):
                if allow_recursion:
                    for loc in location:
-                        matches |= self.get_matches(loc, query, allow_recursion=False)
+                        matches |= self.get_matches(loc, query, candidates,
+                                                    allow_recursion=False)
                    return matches
                raise ParseException(query, len(query), 'Recursive query group detected', self)

@ -419,11 +420,11 @@ class ResultCache(SearchQueryParser): # {{{
                fm = self.field_metadata[location]
                # take care of dates special case
                if fm['datatype'] == 'datetime':
-                    return self.get_dates_matches(location, query.lower())
+                    return self.get_dates_matches(location, query.lower(), candidates)

                # take care of numbers special case
                if fm['datatype'] in ('rating', 'int', 'float'):
-                    return self.get_numeric_matches(location, query.lower())
+                    return self.get_numeric_matches(location, query.lower(), candidates)

                # take care of the 'count' operator for is_multiples
                if fm['is_multiple'] and \
@ -431,7 +432,8 @@ class ResultCache(SearchQueryParser): # {{{
                        query[1:1] in '=<>!':
                    vf = lambda item, loc=fm['rec_index'], ms=fm['is_multiple']:\
                            len(item[loc].split(ms)) if item[loc] is not None else 0
-                    return self.get_numeric_matches(location, query[1:], val_func=vf)
+                    return self.get_numeric_matches(location, query[1:],
+                                                    candidates, val_func=vf)

            # everything else, or 'all' matches
            matchkind = CONTAINS_MATCH
@ -598,7 +600,6 @@ class ResultCache(SearchQueryParser): # {{{

    def set(self, row, col, val, row_is_id=False):
        id = row if row_is_id else self._map_filtered[row]
-        self._data[id][self.FIELD_MAP['all_metadata']] = None
        self._data[id][col] = val

    def get(self, row, col, row_is_id=False):
@ -629,7 +630,6 @@ class ResultCache(SearchQueryParser): # {{{
                self._data[id] = CacheRow(db, self.composites,
                        db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0])
                self._data[id].append(db.book_on_device_string(id))
-                self._data[id].append(None)
            except IndexError:
                return None
        try:
@ -646,7 +646,6 @@ class ResultCache(SearchQueryParser): # {{{
            self._data[id] = CacheRow(db, self.composites,
                        db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0])
            self._data[id].append(db.book_on_device_string(id))
-            self._data[id].append(None)
        self._map[0:0] = ids
        self._map_filtered[0:0] = ids

@ -671,7 +670,6 @@ class ResultCache(SearchQueryParser): # {{{
        for item in self._data:
            if item is not None:
                item.append(db.book_on_device_string(item[0]))
-                item.append(None)
        self._map = [i[0] for i in self._data if i is not None]
        if field is not None:
            self.sort(field, ascending)
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -1524,19 +1524,32 @@ class EPUB_MOBI(CatalogPlugin):
                    this_title['formats'] = formats

                # Add user notes to be displayed in header
-                # Special case handling for datetime fields
+                # Special case handling for datetime fields and lists
                if self.opts.header_note_source_field:
                    field_md = self.__db.metadata_for_field(self.opts.header_note_source_field)
                    notes = self.__db.get_field(record['id'],
                                        self.opts.header_note_source_field,
                                        index_is_id=True)
-                    if notes and field_md['datatype'] == 'datetime':
-                        # Reformat date fields to match UI presentation: dd MMM YYYY
-                        notes = format_date(notes,'dd MMM yyyy')
-
                    if notes:
+                        if field_md['datatype'] == 'text':
+                            if isinstance(notes,list):
+                                notes = ' &middot; '.join(notes)
+                        elif field_md['datatype'] == 'datetime':
+                            notes = format_date(notes,'dd MMM yyyy')
+                        elif field_md['datatype'] == 'composite':
+                            m = re.match(r'\[(.+)\]$', notes)
+                            if m is not None:
+                                # Sniff for special pseudo-list string "[<item, item>]"
+                                bracketed_content = m.group(1)
+                                if ',' in bracketed_content:
+                                    # Recast the comma-separated items as a list
+                                    items = bracketed_content.split(',')
+                                    items = [i.strip() for i in items]
+                                    notes = ' &middot; '.join(items)
+                                else:
+                                    notes = bracketed_content
                        this_title['notes'] = {'source':field_md['name'],
-                                               'content':notes}
+                                                   'content':notes}

                titles.append(this_title)

--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -298,10 +298,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                            base,
                            prefer_custom=True)

-        self.FIELD_MAP['ondevice'] = base+1
-        self.field_metadata.set_field_record_index('ondevice', base+1, prefer_custom=False)
-        self.FIELD_MAP['all_metadata'] = base+2
-        self.field_metadata.set_field_record_index('all_metadata', base+2, prefer_custom=False)
+        self.FIELD_MAP['ondevice'] = base = base+1
+        self.field_metadata.set_field_record_index('ondevice', base, prefer_custom=False)

        script = '''
        DROP VIEW IF EXISTS meta2;
@ -343,10 +341,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        self.has_id  = self.data.has_id
        self.count   = self.data.count

-        # Count times get_metadata is called, and how many times in the cache
-        self.gm_count  = 0
-        self.gm_missed = 0
-
        for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
                     'publisher', 'rating', 'series', 'series_index', 'tags',
                     'title', 'timestamp', 'uuid', 'pubdate', 'ondevice'):
@ -690,19 +684,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        '''
        row = self.data._data[idx] if index_is_id else self.data[idx]
        fm = self.FIELD_MAP
-
-        self.gm_count += 1
-        mi = row[self.FIELD_MAP['all_metadata']]
-        if mi is not None:
-            if get_cover:
-                # Always get the cover, because the value can be wrong if the
-                # original mi was from the OPF
-                mi.cover = self.cover(idx, index_is_id=index_is_id, as_path=True)
-            return mi
-
-        self.gm_missed += 1
        mi = Metadata(None)
-        self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id)

        aut_list = row[fm['au_map']]
        aut_list = [p.split(':::') for p in aut_list.split(':#:')]
@ -724,6 +706,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        formats = row[fm['formats']]
        if not formats:
            formats = None
+        else:
+            formats = formats.split(',')
        mi.formats = formats
        tags = row[fm['tags']]
        if tags:
@ -1387,7 +1371,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            if r is not None:
                if (now - r[self.FIELD_MAP['timestamp']]) > delta:
                    tags = r[self.FIELD_MAP['tags']]
-                    if tags and tag in tags.lower():
+                    if tags and tag in [x.strip() for x in
+                            tags.lower().split(',')]:
                        yield r[self.FIELD_MAP['id']]

    def get_next_series_num_for(self, series):
--- a/src/calibre/library/field_metadata.py
+++ b/src/calibre/library/field_metadata.py
@ -162,15 +162,6 @@ class FieldMetadata(dict):
                           'search_terms':['tags', 'tag'],
                           'is_custom':False,
                           'is_category':True}),
-            ('all_metadata',{'table':None,
-                             'column':None,
-                             'datatype':None,
-                             'is_multiple':None,
-                             'kind':'field',
-                             'name':None,
-                             'search_terms':[],
-                             'is_custom':False,
-                             'is_category':False}),
            ('author_sort',{'table':None,
                            'column':None,
                            'datatype':'text',
--- a/src/calibre/trac/bzr_commit_plugin.py
+++ b/src/calibre/trac/bzr_commit_plugin.py
@ -110,6 +110,7 @@ class cmd_commit(_cmd_commit):
            suffix = 'The fix will be in the next release.'
        action = action+'ed'
        msg = '%s in branch %s. %s'%(action, nick, suffix)
+        msg = msg.replace('Fixesed', 'Fixed')
        server = xmlrpclib.ServerProxy(url)
        server.ticket.update(int(bug), msg,
                             {'status':'closed', 'resolution':'fixed'},
--- a/src/calibre/utils/cleantext.py
+++ b/src/calibre/utils/cleantext.py
@ -3,7 +3,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2010, sengian <sengian1@gmail.com>'
 __docformat__ = 'restructuredtext en'

-import re
+import re, htmlentitydefs

 _ascii_pat = None

@ -21,3 +21,32 @@ def clean_ascii_chars(txt, charlist=None):
        pat = re.compile(u'|'.join(map(unichr, charlist)))
    return pat.sub('', txt)

+##
+# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
+# Removes HTML or XML character references and entities from a text string.
+#
+# @param text The HTML (or XML) source text.
+# @return The plain text, as a Unicode string, if necessary.
+
+def unescape(text, rm=False, rchar=u''):
+    def fixup(m, rm=rm, rchar=rchar):
+        text = m.group(0)
+        if text[:2] == "&#":
+            # character reference
+            try:
+                if text[:3] == "&#x":
+                    return unichr(int(text[3:-1], 16))
+                else:
+                    return unichr(int(text[2:-1]))
+            except ValueError:
+                pass
+        else:
+            # named entity
+            try:
+                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+            except KeyError:
+                pass
+        if rm:
+            return rchar #replace by char
+        return text # leave as is
+    return re.sub("&#?\w+;", fixup, text)
--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@ -18,6 +18,24 @@ class _Parser(object):
    LEX_NUM = 4
    LEX_EOF = 5

+    def _python(self, func):
+        locals = {}
+        exec func in locals
+        if 'evaluate' not in locals:
+            self.error('no evaluate function in python')
+        try:
+            result = locals['evaluate'](self.parent.kwargs)
+            if isinstance(result, (float, int)):
+                result = unicode(result)
+            elif isinstance(result, list):
+                result = ','.join(result)
+            elif isinstance(result, str):
+                result = unicode(result)
+            return result
+        except Exception as e:
+            self.error('python function threw exception: ' + e.msg)
+
+
    def _strcmp(self, x, y, lt, eq, gt):
        v = strcmp(x, y)
        if v < 0:
@ -79,6 +97,7 @@ class _Parser(object):
            'field'    : (1, lambda s, x: s.parent.get_value(x, [], s.parent.kwargs)),
            'multiply' : (2, partial(_math, op='*')),
            'print'    : (-1, _print),
+            'python'   : (1, _python),
            'strcat'   : (-1, _concat),
            'strcmp'   : (5, _strcmp),
            'substr'   : (3, lambda s, x, y, z: x[int(y): len(x) if int(z) == 0 else int(z)]),
@ -362,7 +381,7 @@ class TemplateFormatter(string.Formatter):
                (r'\'.*?((?<!\\)\')',   lambda x,t: (3, t[1:-1])),
                (r'\n#.*?(?=\n)',       None),
                (r'\s',                 None)
-        ])
+        ], flags=re.DOTALL)

    def _eval_program(self, val, prog):
        # keep a cache of the lex'ed program under the theory that re-lexing
--- a/src/calibre/utils/magick/draw.py
+++ b/src/calibre/utils/magick/draw.py
@ -92,7 +92,10 @@ def identify_data(data):
    or raises an Exception if data is not an image.
    '''
    img = Image()
-    img.load(data)
+    if hasattr(img, 'identify'):
+        img.identify(data)
+    else:
+        img.load(data)
    width, height = img.size
    fmt = img.format
    return (width, height, fmt)
--- a/src/calibre/utils/magick/magick.c
+++ b/src/calibre/utils/magick/magick.c
@ -456,6 +456,26 @@ magick_Image_load(magick_Image *self, PyObject *args, PyObject *kwargs) {

 // }}}

+// Image.identify {{{
+static PyObject *
+magick_Image_identify(magick_Image *self, PyObject *args, PyObject *kwargs) {
+    const char *data;
+	Py_ssize_t dlen;
+    MagickBooleanType res;
+    
+    NULL_CHECK(NULL)
+    if (!PyArg_ParseTuple(args, "s#", &data, &dlen)) return NULL;
+
+    res = MagickPingImageBlob(self->wand, data, dlen);
+
+    if (!res)
+        return magick_set_exception(self->wand);
+
+    Py_RETURN_NONE;
+}
+
+// }}}
+
 // Image.open {{{
 static PyObject *
 magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) {
@ -993,6 +1013,10 @@ static PyMethodDef magick_Image_methods[] = {
    {"destroy", (PyCFunction)magick_Image_destroy, METH_VARARGS,
    "Destroy the underlying ImageMagick Wand. WARNING: After using this method, all methods on this object will raise an exception."},

+    {"identify", (PyCFunction)magick_Image_identify, METH_VARARGS,
+     "Identify an image from a byte buffer (string)"
+    },
+
    {"load", (PyCFunction)magick_Image_load, METH_VARARGS,
     "Load an image from a byte buffer (string)"
    },
--- a/src/calibre/utils/wmf/init.py
+++ b/src/calibre/utils/wmf/init.py
@ -5,5 +5,52 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

+import glob
+
+from calibre.constants import plugins, iswindows, filesystem_encoding
+from calibre.ptempfile import TemporaryDirectory
+from calibre import CurrentDir
+from calibre.utils.magick import Image, PixelWand
+
+class Unavailable(Exception):
+    pass
+
+class NoRaster(Exception):
+    pass
+
+def extract_raster_image(wmf_data):
+    try:
+        wmf, wmf_err = plugins['wmf']
+    except KeyError:
+        raise Unavailable('libwmf not available on this platform')
+    if wmf_err:
+        raise Unavailable(wmf_err)
+
+    if iswindows:
+        import sys, os
+        appdir = sys.app_dir
+        if isinstance(appdir, unicode):
+            appdir = appdir.encode(filesystem_encoding)
+        fdir = os.path.join(appdir, 'wmffonts')
+        wmf.set_font_dir(fdir)
+
+    data = ''
+
+    with TemporaryDirectory('wmf2png') as tdir:
+        with CurrentDir(tdir):
+            wmf.render(wmf_data)
+
+            images = list(sorted(glob.glob('*.png')))
+            if not images:
+                raise NoRaster('No raster images in WMF')
+            data = open(images[0], 'rb').read()
+
+    im = Image()
+    im.load(data)
+    pw = PixelWand()
+    pw.color = '#ffffff'
+    im.rotate(pw, 180)
+
+    return im.export('png')


--- a/src/calibre/utils/wmf/wmf.c
+++ b/src/calibre/utils/wmf/wmf.c
@ -4,6 +4,7 @@

 #include <libwmf/api.h>
 #include <libwmf/svg.h>
+//#include <libwmf/gd.h>

 typedef struct {
    char *data;
@ -13,7 +14,7 @@ typedef struct {

 //This code is taken mostly from the Abiword wmf plugin

-
+// Buffer read {{{
 // returns unsigned char cast to int, or EOF
 static int wmf_WMF_read(void * context) {
    char c;
@ -22,11 +23,11 @@ static int wmf_WMF_read(void * context) {
 	if (info->pos == info->len)
 		return EOF;

-	c = info->data[pos];
+	c = info->data[info->pos];

 	info->pos++;

-	return (int)c;
+	return (int)((unsigned char)c);
 }

 // returns (-1) on error, else 0
@ -44,8 +45,17 @@ static long wmf_WMF_tell(void * context) {

 	return (long) info->pos;
 }
+// }}}


+char _png_name_buf[100];
+char *wmf_png_name(void *ctxt) {
+    int *num = (int*)ctxt;
+    *num = *num + 1;
+    snprintf(_png_name_buf, 90, "%04d.png", *num);
+    return _png_name_buf;
+}
+
 #define CLEANUP if(API) { if (stream) wmf_free(API, stream); wmf_api_destroy(API); };

 static PyObject *
@ -66,9 +76,9 @@ wmf_render(PyObject *self, PyObject *args) {

 	unsigned int max_width  = 1600;
 	unsigned int max_height = 1200;
-	unsigned long max_flags = 0;

 	static const char* Default_Description = "wmf2svg";
+    int fname_counter = 0;

 	wmf_error_t err;

@ -125,6 +135,8 @@ wmf_render(PyObject *self, PyObject *args) {
 	ddata->Description = (char *)Default_Description;

 	ddata->bbox = bbox;
+    ddata->image.context = (void *)&fname_counter;
+    ddata->image.name = wmf_png_name;

 	wmf_display_size(API, &disp_width, &disp_height, 96, 96);

@ -156,9 +168,9 @@ wmf_render(PyObject *self, PyObject *args) {
 		ddata->height = (unsigned int) ceil ((double) wmf_height);
 	}

-	ddata->flags |= WMF_SVG_INLINE_IMAGES;
-
-	ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER;
+    // Needs GD
+	//ddata->flags |= WMF_SVG_INLINE_IMAGES;
+	//ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER;

    err = wmf_play(API, 0, &(bbox));

@ -178,11 +190,32 @@ wmf_render(PyObject *self, PyObject *args) {
    return ans;
 }

+#ifdef _WIN32
+void set_libwmf_fontdir(const char *);
+
+static PyObject *
+wmf_setfontdir(PyObject *self, PyObject *args) {
+    char *path;
+    if (!PyArg_ParseTuple(args, "s", &path))
+        return NULL;
+    set_libwmf_fontdir(path);
+
+    Py_RETURN_NONE;
+}
+#endif
+
+
+

 static PyMethodDef wmf_methods[] = {
    {"render", wmf_render, METH_VARARGS,
-        "render(path) -> Render wmf as svg."
+        "render(data) -> Render wmf as svg."
    },
+#ifdef _WIN32
+    {"set_font_dir", wmf_setfontdir, METH_VARARGS,
+        "set_font_dir(path) -> Set the path to the fonts dir on windows, must be called at least once before using render()"
+    },
+#endif

    {NULL}  /* Sentinel */
 };
--- a/src/calibre/utils/zipfile.py
+++ b/src/calibre/utils/zipfile.py
@ -982,9 +982,12 @@ class ZipFile:
            zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])

        if fname != zinfo.orig_filename:
-            raise BadZipfile, \
-                      'File name in directory "%s" and header "%s" differ.' % (
-                          zinfo.orig_filename, fname)
+            print ('WARNING: Header (%r) and directory (%r) filenames do not'
+                    ' match inside ZipFile')%(fname, zinfo.orig_filename)
+            print 'Using directory filename %r'%zinfo.orig_filename
+            #raise BadZipfile, \
+            #          'File name in directory "%r" and header "%r" differ.' % (
+            #              zinfo.orig_filename, fname)

        # check for encrypted flag & handle password
        is_encrypted = zinfo.flag_bits & 0x1
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@ -108,7 +108,6 @@ def download_builtin_recipe(urn):
    br = browser()
    return br.open_novisit('http://status.calibre-ebook.com/recipe/'+urn).read()

-
 def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
    for x in get_builtin_recipe_collection():
        if x.get('title') == title:
@ -127,6 +126,24 @@ def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
                        'Failed to download recipe, using builtin version')
            return P('recipes/%s.recipe'%urn, data=True)

+def get_builtin_recipe_by_id(id_, log=None, download_recipe=False):
+    for x in get_builtin_recipe_collection():
+        if x.get('id') == id_:
+            urn = x.get('id')[8:]
+            if download_recipe:
+                try:
+                    if log is not None:
+                        log('Trying to get latest version of recipe:', urn)
+                    return download_builtin_recipe(urn)
+                except:
+                    if log is None:
+                        import traceback
+                        traceback.print_exc()
+                    else:
+                        log.exception(
+                        'Failed to download recipe, using builtin version')
+            return P('recipes/%s.recipe'%urn, data=True)
+
 class SchedulerConfig(object):

    def __init__(self):