Sync to trunk.

2025-07-09 03:04:10 -04:00 · 2011-01-13 07:12:06 -05:00 · 2011-01-13 07:12:06 -05:00 · 87d5f40d96
commit 87d5f40d96
parent 06723a0748 d51bd60c9c
69 changed files with 2603 additions and 1784 deletions
--- a/resources/images/document-encrypt.png
+++ b/resources/images/document-encrypt.png
--- a/resources/images/news/zerohedge.png
+++ b/resources/images/news/zerohedge.png
--- a/resources/recipes/expansion_spanish.recipe
+++ b/resources/recipes/expansion_spanish.recipe
@ -1,59 +1,79 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__author__    = 'Gerardo Diez'
 __copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
 description   = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
 __docformat__ = 'restructuredtext en'
 '''
-www.expansion.com
+expansion.es
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Publico(BasicNewsRecipe):
    title               =u'Expansion.com'
    __author__      ='Gerardo Diez'
    publisher       =u'Unidad Editorial Información Económica, S.L.'
    category                ='finances, catalunya'
    oldest_article      =1
    max_articles_per_feed   =100
    simultaneous_downloads  =10
    cover_url       =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
    timefmt         ='[%A, %d %B, %Y]'
    encoding        ='latin'
    language        ='es'
    remove_javascript   =True
    no_stylesheets      =True
    keep_only_tags      =dict(name='div', attrs={'class':['noticia primer_elemento']})
    remove_tags         =[
                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
                dict(name='span', attrs={'class':['comentarios']}),
                dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
                dict(name='div', attrs={'id':['comentarios_lectores_listado']})
                            ]
    feeds               =[
                (u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
                (u'Portada: Bolsas', u'http://estaticos.expansion.com/rss/mercados.xml'),
                (u'Divisas', u'http://estaticos.expansion.com/rss/mercadosdivisas.xml'),
                (u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
                (u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
                (u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),
-from calibre.web.feeds.news import BasicNewsRecipe
+                (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
-from calibre.ebooks.BeautifulSoup import Tag
+                (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
                (u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
                (u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
                (u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
                (u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),
-class Expansion(BasicNewsRecipe):
+                (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
-    title                 = 'Diario Expansion'
+                (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
-    __author__            = 'Darko Miletic'
+                (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
-    description           = 'Lider de informacion de mercados, economica y politica'
+                (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
-    publisher             = 'expansion.com'
+                (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
-    category              = 'news, politics, Spain'
+                (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
-    oldest_article        = 2
+                (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
-    max_articles_per_feed = 100
+                (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
-    no_stylesheets        = True
+                (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
-    use_embedded_content  = False
+                (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
-    delay                 = 1
+                (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
-    encoding              = 'iso-8859-15'
+                (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),
    language = 'es'
-    direction             = 'ltr'
+                (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
                (u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
                (u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),
-    html2lrf_options = [
+                (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
-                          '--comment'  , description
+                (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
-                        , '--category' , category
+                (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),
-                        , '--publisher', publisher
+
                (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
                (u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
                (u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
                (u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),
                (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
                (u'Catalu&ntilde;a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
                (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
                ]
    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
    feeds              = [
                            (u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178')
                           ,(u'Temas del dia'   , u'http://rss.expansion.com/rss/descarga.htm?data2=178')
                         ]
    keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
    remove_tags        = [
                             dict(name=['object','link','script'])
                            ,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
                         ]
    remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
    def preprocess_html(self, soup):
        soup.html['dir' ] = self.direction
        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
        soup.head.insert(0,mcharset)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/msnbc.recipe
+++ b/resources/recipes/msnbc.recipe
@ -1,10 +1,9 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 msnbc.msn.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class MsNBC(BasicNewsRecipe):
@ -19,7 +18,16 @@ class MsNBC(BasicNewsRecipe):
    publisher              = 'msnbc.com'
    category               = 'news, USA, world'
    language               = 'en'
-    extra_css              = ' body{ font-family: sans-serif } .head{font-family: serif; font-size: xx-large; font-weight: bold; color: #CC0000} .abstract{font-weight: bold} .source{font-size: small} .updateTime{font-size: small} '
+    extra_css              = """
                                body{ font-family: Georgia,Times,serif }
                                .hide{display: none}
                                .caption{font-family: Arial,sans-serif; font-size: x-small}
                                .entry-summary{font-family: Arial,sans-serif}
                                .copyright{font-size: 0.95em; font-style: italic}
                                .source-org{font-size: small; font-family: Arial,sans-serif}
                                img{display: block; margin-bottom: 0.5em}
                                span.byline{display: none}
                            """
    conversion_options = {
                             'comments' : description
@ -28,14 +36,20 @@ class MsNBC(BasicNewsRecipe):
                            ,'publisher': publisher
                         }
-    preprocess_regexps = [
+    remove_tags_before = dict(name='h1', attrs={'id':'headline'})
-        (re.compile(r'</style></head>', re.DOTALL|re.IGNORECASE),lambda match: '</style>')
+    remove_tags_after = dict(name='span', attrs={'class':['copyright','Linear copyright']})
-       ,(re.compile(r'<div class="head">', re.DOTALL|re.IGNORECASE),lambda match: '</head><body><div class="head">'),
+    keep_only_tags=[
                      dict(attrs={'id':['headline','deck','byline','source','intelliTXT']})
                     ,dict(attrs={'class':['gl_headline','articleText','drawer-content Linear','v-center3','byline','textBodyBlack']})
                   ]
    remove_attributes=['property','lang','rel','xmlns:fb','xmlns:v','xmlns:dc','xmlns:dcmitype','xmlns:og','xmlns:media','xmlns:vcard','typeof','itemscope','itemtype','itemprop','about','type','size','width','height','onreadystatechange','data','border','hspace','vspace']
    remove_tags      = [
                          dict(name=['iframe','object','link','embed','meta','table'])
                         ,dict(name='span', attrs={'class':['copyright','Linear copyright']})
                         ,dict(name='div', attrs={'class':'social'})
                       ]
    remove_tags_before = dict(name='div', attrs={'class':'head'})
    remove_tags_after = dict(name='div', attrs={'class':'copyright'})
    remove_tags      = [dict(name=['iframe','object','link','script','form'])]
    feeds = [
               (u'US News'       , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml'      )
@ -48,11 +62,26 @@ class MsNBC(BasicNewsRecipe):
              ,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml'      )
            ]
    def print_version(self, url):
        return url + 'print/1/displaymode/1098/'
    def preprocess_html(self, soup):
-        for item in soup.head.findAll('div'):
+        for item in soup.body.findAll('html'):
            item.name='div'
        for item in soup.body.findAll('div'):
            if item.has_key('id') and item['id'].startswith('vine-'):
               item.extract()
            if item.has_key('class') and ( item['class'].startswith('ad') or item['class'].startswith('vine')):
               item.extract()
        for item in soup.body.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        for item in soup.body.findAll('ol'):
            if item.has_key('class') and item['class'].startswith('grid'):
               item.extract()
        for item in soup.body.findAll('span'):
            if ( item.has_key('id') and item['id'].startswith('byLine') and item.string is None) or ( item.has_key('class') and item['class'].startswith('inline') ):
               item.extract()
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -685,3 +685,28 @@ class NYTimes(BasicNewsRecipe):
            divTag.replaceWith(tag)
        return soup
    def populate_article_metadata(self, article, soup, first):
        shortparagraph = ""
        try:
            if len(article.text_summary.strip()) == 0:
                articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
                if articlebodies:
                    for articlebody in articlebodies:
                        if articlebody:
                            paras = articlebody.findAll('p')
                            for p in paras:
                                refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
                                #account for blank paragraphs and short paragraphs by appending them to longer ones
                                if len(refparagraph) > 0:
                                    if len(refparagraph) > 70: #approximately one line of text
                                        article.summary = article.text_summary = shortparagraph + refparagraph
                                        return
                                    else:
                                        shortparagraph = refparagraph + " "
                                        if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
                                            shortparagraph = shortparagraph + "- "
        except:
            self.log("Error creating article descriptions")
            return
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -685,4 +685,27 @@ class NYTimes(BasicNewsRecipe):
            divTag.replaceWith(tag)
        return soup
    def populate_article_metadata(self, article, soup, first):
        shortparagraph = ""
        try:
            if len(article.text_summary.strip()) == 0:
                articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
                if articlebodies:
                    for articlebody in articlebodies:
                        if articlebody:
                            paras = articlebody.findAll('p')
                            for p in paras:
                                refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
                                #account for blank paragraphs and short paragraphs by appending them to longer ones
                                if len(refparagraph) > 0:
                                    if len(refparagraph) > 70: #approximately one line of text
                                        article.summary = article.text_summary = shortparagraph + refparagraph
                                        return
                                    else:
                                        shortparagraph = refparagraph + " "
                                        if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
                                            shortparagraph = shortparagraph + "- "
        except:
            self.log("Error creating article descriptions")
            return
--- a/resources/recipes/technology_review.recipe
+++ b/resources/recipes/technology_review.recipe
@ -35,7 +35,6 @@ class TechnologyReview(BasicNewsRecipe):
    def get_article_url(self, article):
        return article.get('guid', article.get('id', None))
    def print_version(self, url):
        baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
        split1 = string.split(url,"/")
@ -43,3 +42,25 @@ class TechnologyReview(BasicNewsRecipe):
        split2= string.split(xxx,"/")
        s =  baseurl + split2[0]
        return s
    def postprocess_html(self,soup, True):
        #remove picture
        headerhtml = soup.find(True, {'class':'header'})
        headerhtml.replaceWith("")
        #remove close button
        closehtml = soup.find(True, {'class':'close'})
        closehtml.replaceWith("")
        #remove banner advertisement
        bannerhtml = soup.find(True, {'class':'bannerad'})
        bannerhtml.replaceWith("")
        #thanks kiklop74!  This code removes all links from the text
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup
--- a/resources/recipes/tyzden.recipe
+++ b/resources/recipes/tyzden.recipe
@ -28,7 +28,7 @@ class TyzdenRecipe(BasicNewsRecipe):
    if (weeknum > 1):
        weeknum -= 1
-    title = u'.tyzden ' + str(weeknum) + '/' + str(year)
+    title = u'tyzden'
    base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum)
    base_url = base_url_path + '.html'
--- a/resources/recipes/wired_daily.recipe
+++ b/resources/recipes/wired_daily.recipe
@ -2,8 +2,10 @@
 __license__   = 'GPL v3'
 __docformat__ = 'restructuredtext en'
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.chardet import xml_to_unicode
 class Wired_Daily(BasicNewsRecipe):
@ -15,30 +17,43 @@ class Wired_Daily(BasicNewsRecipe):
    no_stylesheets = True
    preprocess_regexps = [(re.compile(r'<head.*</head>', re.DOTALL), lambda m:
        '<head></head>')]
    remove_tags_before = dict(name='div', id='content')
-    remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
+    remove_tags = [dict(id=['header', 'commenting_module', 'post_nav',
-        'footer', 'advertisement', 'blog_subscription_unit',
+        'social_tools', 'sidebar', 'footer', 'social_wishlist', 'pgwidget',
-        'brightcove_component']),
+        'outerWrapper', 'inf_widget']),
-        {'class':'entryActions'},
+        {'class':['entryActions', 'advertisement', 'entryTags']},
-        dict(name=['noscript', 'script'])]
+        dict(name=['noscript', 'script']),
        dict(name='h4', attrs={'class':re.compile(r'rat\d+')}),
        {'class':lambda x: x and x.startswith('contentjump')},
        dict(name='li', attrs={'class':['entryCategories', 'entryEdit']})]
    feeds = [
        ('Top News', 'http://feeds.wired.com/wired/index'),
-        ('Culture', 'http://feeds.wired.com/wired/culture'),
+        ('Product Reviews',
-        ('Software', 'http://feeds.wired.com/wired/software'),
+            'http://www.wired.com/reviews/feeds/latestProductsRss'),
-        ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
+        ('Autopia', 'http://www.wired.com/autopia/feed/'),
-        ('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
+        ('Danger Room', 'http://www.wired.com/dangerroom/feed/'),
-        ('Cars', 'http://feeds.wired.com/wired/cars'),
+        ('Epicenter', 'http://www.wired.com/epicenter/feed/'),
-        ('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
+        ('Gadget Lab', 'http://www.wired.com/gadgetlab/feed/'),
-        ('Gaming', 'http://feeds.wired.com/wired/gaming'),
+        ('Geek Dad', 'http://www.wired.com/geekdad/feed/'),
-        ('Science', 'http://feeds.wired.com/wired/science'),
+        ('Playbook', 'http://www.wired.com/playbook/feed/'),
-        ('Med Tech', 'http://feeds.wired.com/wired/medtech'),
+        ('Rawfile', 'http://www.wired.com/rawfile/feed/'),
-        ('Politics', 'http://feeds.wired.com/wired/politics'),
+        ('This Day in Tech', 'http://www.wired.com/thisdayintech/feed/'),
-        ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
+        ('Threat Level', 'http://www.wired.com/threatlevel/feed/'),
-        ('Commentary', 'http://feeds.wired.com/wired/commentary'),
+        ('Underwire', 'http://www.wired.com/underwire/feed/'),
        ('Web Monkey', 'http://www.webmonkey.com/feed/'),
        ('Science', 'http://www.wired.com/wiredscience/feed/'),
        ]
    def populate_article_metadata(self, article, soup, first):
        if article.text_summary:
            article.text_summary = xml_to_unicode(article.text_summary,
                    resolve_entities=True)[0]
    def print_version(self, url):
-        return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
+        return url + '/all/1'
--- a/resources/recipes/zerohedge.recipe
+++ b/resources/recipes/zerohedge.recipe
@ -0,0 +1,33 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.zerohedge.com
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class ZeroHedge(BasicNewsRecipe):
    title                  = 'Zero Hedge'
    __author__             = 'Darko Miletic'
    description            = 'On a long enough timeline the survival rate for everyone drops to zero'
    oldest_article         = 10
    max_articles_per_feed  = 100
    no_stylesheets         = True
    use_embedded_content   = True
    encoding               = 'utf8'
    publisher              = 'zero hedge'
    category               = 'news, USA, world, economy, politics'
    language               = 'en'
    masthead_url           = 'http://www.zerohedge.com/themes/newsflash/logo.png'
    publication_type       = 'blog'
    extra_css              = 'body{ font-family: sans-serif }'
    conversion_options = {
                             'comments' : description
                            ,'tags'     : category
                            ,'language' : language
                            ,'publisher': publisher
                         }
    feeds = [(u'Articles', u'http://feeds.feedburner.com/zerohedge/feed')]
--- a/resources/templates/rtf.xsl
+++ b/resources/templates/rtf.xsl
@ -287,7 +287,7 @@
                <xsl:value-of select="count(preceding::rtf:footnote) + 1"/>
                <xsl:text>]</xsl:text>
            </xsl:when>
-            <xsl:when test="(@superscript = 'true')">
+            <xsl:when test="(@superscript)">
                <xsl:element name="sup">
                    <xsl:element name="span">
                        <xsl:attribute name="class">
@ -297,7 +297,7 @@
                    </xsl:element>
                </xsl:element>
            </xsl:when>
-            <xsl:when test="(@underscript = 'true')">
+            <xsl:when test="(@underscript or @subscript)">
                <xsl:element name="sub">
                    <xsl:element name="span">
                        <xsl:attribute name="class">
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -459,6 +459,18 @@ def force_unicode(obj, enc=preferred_encoding):
                        obj = obj.decode('utf-8')
    return obj
 def as_unicode(obj, enc=preferred_encoding):
    if not isbytestring(obj):
        try:
            obj = unicode(obj)
        except:
            try:
                obj = str(obj)
            except:
                obj = repr(obj)
    return force_unicode(obj, enc=enc)
 def human_readable(size):
    """ Convert a size in bytes into a human readable form """
--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@ -91,3 +91,19 @@ class NOOK_COLOR(NOOK):
    EBOOK_DIR_MAIN = 'My Files/Books'
    '''
    def create_upload_path(self, path, mdata, fname, create_dirs=True):
        filepath = NOOK.create_upload_path(self, path, mdata, fname,
                create_dirs=create_dirs)
        edm = self.EBOOK_DIR_MAIN.replace('/', os.sep)
        npath = os.path.join(edm, _('News')) + os.sep
        if npath in filepath:
            filepath = filepath.replace(npath, os.sep.join('My Files',
                'Magazines')+os.sep)
            filedir = os.path.dirname(filepath)
            if create_dirs and not os.path.exists(filedir):
                os.makedirs(filedir)
        return filepath
    '''
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -88,6 +88,7 @@ class Plumber(object):
        self.ui_reporter = report_progress
        self.abort_after_input_dump = abort_after_input_dump
        # Pipeline options {{{
        # Initialize the conversion options that are independent of input and
        # output formats. The input and output plugins can still disable these
        # options via recommendations.
@ -527,6 +528,7 @@ OptionRecommendation(name='timestamp',
    help=_('Set the book timestamp (used by the date column in calibre).')),
 ]
        # }}}
        input_fmt = os.path.splitext(self.input)[1]
        if not input_fmt:
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -16,7 +16,6 @@ import uuid
 from lxml import etree
 from calibre import guess_type
 from calibre import prepare_string_for_xml
 from calibre.constants import __appname__, __version__
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@ -10,7 +10,8 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors
 title_pat    = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
 author_pat   = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
 comment_pat  = re.compile(r'\{\\info.*?\{\\subject(.*?)(?<!\\)\}', re.DOTALL)
-category_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL)
+tags_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL)
 publisher_pat = re.compile(r'\{\\info.*?\{\\manager(.*?)(?<!\\)\}', re.DOTALL)
 def get_document_info(stream):
    """
@ -82,61 +83,73 @@ def decode(raw, codec):
 def get_metadata(stream):
    """ Return metadata as a L{MetaInfo} object """
    title, author, comment, category = None, None, None, None
    stream.seek(0)
    if stream.read(5) != r'{\rtf':
-        return MetaInformation(None, None)
+        return MetaInformation(_('Unknown'))
    block = get_document_info(stream)[0]
    if not block:
-        return MetaInformation(None, None)
+        return MetaInformation(_('Unknown'))
    stream.seek(0)
    cpg = detect_codepage(stream)
    stream.seek(0)
    title_match = title_pat.search(block)
-    if title_match:
+    if title_match is not None:
        title = decode(title_match.group(1).strip(), cpg)
    else:
        title = _('Unknown')
    author_match = author_pat.search(block)
-    if author_match:
+    if author_match is not None:
        author = decode(author_match.group(1).strip(), cpg)
-    comment_match = comment_pat.search(block)
+    else:
-    if comment_match:
+        author = None
-        comment = decode(comment_match.group(1).strip(), cpg)
+    mi = MetaInformation(title)
    category_match = category_pat.search(block)
    if category_match:
        category = decode(category_match.group(1).strip(), cpg)
    mi = MetaInformation(title, author)
    if author:
        mi.authors = string_to_authors(author)
    comment_match = comment_pat.search(block)
    if comment_match is not None:
        comment = decode(comment_match.group(1).strip(), cpg)
        mi.comments = comment
-    mi.category = category
+    tags_match = tags_pat.search(block)
    if tags_match is not None:
        tags = decode(tags_match.group(1).strip(), cpg)
        mi.tags = tags
    publisher_match = publisher_pat.search(block)
    if publisher_match is not None:
        publisher = decode(publisher_match.group(1).strip(), cpg)
        mi.publisher = publisher
    return mi
 def create_metadata(stream, options):
-    md = r'{\info'
+    md = [r'{\info']
    if options.title:
        title = options.title.encode('ascii', 'ignore')
-        md += r'{\title %s}'%(title,)
+        md.append(r'{\title %s}'%(title,))
    if options.authors:
        au = options.authors
        if not isinstance(au, basestring):
            au = u', '.join(au)
        author = au.encode('ascii', 'ignore')
-        md += r'{\author %s}'%(author,)
+        md.append(r'{\author %s}'%(author,))
    if options.get('category', None):
        category = options.category.encode('ascii', 'ignore')
        md += r'{\category %s}'%(category,)
    comp = options.comment if hasattr(options, 'comment') else options.comments
    if comp:
        comment = comp.encode('ascii', 'ignore')
-        md += r'{\subject %s}'%(comment,)
+        md.append(r'{\subject %s}'%(comment,))
-    if len(md) > 6:
+    if options.publisher:
-        md += '}'
+        publisher = options.publisher.encode('ascii', 'ignore')
        md.append(r'{\manager %s}'%(publisher,))
    if options.tags:
        tags = u', '.join(options.tags)
        tags = tags.encode('ascii', 'ignore')
        md.append(r'{\category %s}'%(tags,))
    if len(md) > 1:
        md.append('}')
        stream.seek(0)
        src   = stream.read()
-        ans = src[:6] + md + src[6:]
+        ans = src[:6] + u''.join(md) + src[6:]
        stream.seek(0)
        stream.write(ans)
@ -156,7 +169,7 @@ def set_metadata(stream, options):
        base_pat = r'\{\\name(.*?)(?<!\\)\}'
        title = options.title
-        if title != None:
+        if title is not None:
            title = title.encode('ascii', 'replace')
            pat = re.compile(base_pat.replace('name', 'title'), re.DOTALL)
            if pat.search(src):
@ -164,7 +177,7 @@ def set_metadata(stream, options):
            else:
                src = add_metadata_item(src, 'title', title)
        comment = options.comments
-        if comment != None:
+        if comment is not None:
            comment = comment.encode('ascii', 'replace')
            pat = re.compile(base_pat.replace('name', 'subject'), re.DOTALL)
            if pat.search(src):
@ -172,7 +185,7 @@ def set_metadata(stream, options):
            else:
                src = add_metadata_item(src, 'subject', comment)
        author = options.authors
-        if author != None:
+        if author is not None:
            author =  ', '.join(author)
            author = author.encode('ascii', 'ignore')
            pat = re.compile(base_pat.replace('name', 'author'), re.DOTALL)
@ -180,14 +193,23 @@ def set_metadata(stream, options):
                src = pat.sub(r'{\\author ' + author + r'}', src)
            else:
                src = add_metadata_item(src, 'author', author)
-        category = options.get('category', None)
+        tags = options.tags
-        if category != None:
+        if tags is not None:
-            category = category.encode('ascii', 'replace')
+            tags =  ', '.join(tags)
            tags = tags.encode('ascii', 'replace')
            pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL)
            if pat.search(src):
-                src = pat.sub(r'{\\category ' + category + r'}', src)
+                src = pat.sub(r'{\\category ' + tags + r'}', src)
            else:
-                src = add_metadata_item(src, 'category', category)
+                src = add_metadata_item(src, 'category', tags)
        publisher = options.publisher
        if publisher is not None:
            publisher = publisher.encode('ascii', 'replace')
            pat = re.compile(base_pat.replace('name', 'manager'), re.DOTALL)
            if pat.search(src):
                src = pat.sub(r'{\\manager ' + publisher + r'}', src)
            else:
                src = add_metadata_item(src, 'manager', publisher)
        stream.seek(pos + olen)
        after = stream.read()
        stream.seek(pos)
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -77,7 +77,15 @@ class RTFInput(InputFormatPlugin):
    def generate_xml(self, stream):
        from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
-        ofile = 'out.xml'
+        ofile = 'dataxml.xml'
        run_lev, debug_dir = 1, None
        if getattr(self.opts, 'debug_pipeline', None) is not None:
            try:
                os.mkdir(debug_dir)
                debug_dir = 'rtfdebug'
                run_lev = 4
            except:
                pass
        parser = ParseRtf(
            in_file    = stream,
            out_file   = ofile,
@ -115,43 +123,45 @@ class RTFInput(InputFormatPlugin):
            # Write or do not write paragraphs. Default is 0.
            empty_paragraphs = 1,
            #debug
            deb_dir = debug_dir,
            run_level = run_lev,
        )
        parser.parse_rtf()
-        ans = open('out.xml').read()
+        with open(ofile, 'rb') as f:
-        os.remove('out.xml')
+            return f.read()
        return ans
    def extract_images(self, picts):
        import imghdr
        self.log('Extracting images...')
        with open(picts, 'rb') as f:
            raw = f.read()
        picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
        hex = re.compile(r'[^a-fA-F0-9]')
        encs = [hex.sub('', pict) for pict in picts]
        count = 0
        raw = open(picts, 'rb').read()
        starts = []
        for match in re.finditer(r'\{\\pict([^}]+)\}', raw):
            starts.append(match.start(1))
        imap = {}
-
+        for enc in encs:
        for start in starts:
            pos, bc = start, 1
            while bc > 0:
                if raw[pos] == '}': bc -= 1
                elif raw[pos] == '{': bc += 1
                pos += 1
            pict = raw[start:pos+1]
            enc = re.sub(r'[^a-zA-Z0-9]', '', pict)
            if len(enc) % 2 == 1:
                enc = enc[:-1]
            data = enc.decode('hex')
            fmt = imghdr.what(None, data)
            if fmt is None:
                fmt = 'wmf'
            count += 1
-            name = (('%4d'%count).replace(' ', '0'))+'.wmf'
+            name = '%04d.%s' % (count, fmt)
-            open(name, 'wb').write(data)
+            with open(name, 'wb') as f:
                f.write(data)
            imap[count] = name
            #open(name+'.hex', 'wb').write(enc)
        return self.convert_images(imap)
    def convert_images(self, imap):
-        for count, val in imap.items():
+        self.default_img = None
        for count, val in imap.iteritems():
            try:
                imap[count] = self.convert_image(val)
            except:
@ -159,11 +169,35 @@ class RTFInput(InputFormatPlugin):
        return imap
    def convert_image(self, name):
-        from calibre.utils.magick import Image
+        if not name.endswith('.wmf'):
-        img = Image()
+            return name
-        img.open(name)
+        try:
            return self.rasterize_wmf(name)
        except:
            self.log.exception('Failed to convert WMF image %r'%name)
        return self.replace_wmf(name)
    def replace_wmf(self, name):
        from calibre.ebooks import calibre_cover
        if self.default_img is None:
            self.default_img = calibre_cover('Conversion of WMF images is not supported',
            'Use Microsoft Word or OpenOffice to save this RTF file'
            ' as HTML and convert that in calibre.', title_size=36,
            author_size=20)
        name = name.replace('.wmf', '.jpg')
-        img.save(name)
+        with open(name, 'wb') as f:
            f.write(self.default_img)
        return name
    def rasterize_wmf(self, name):
        raise ValueError('Conversion of WMF images not supported')
        from calibre.utils.wmf import extract_raster_image
        with open(name, 'rb') as f:
            data = f.read()
        data = extract_raster_image(data)
        name = name.replace('.wmf', '.jpg')
        with open(name, 'wb') as f:
            f.write(data)
        return name
@ -192,27 +226,27 @@ class RTFInput(InputFormatPlugin):
        css += '\n'+'\n'.join(font_size_classes)
        css += '\n' +'\n'.join(color_classes)
-        for cls, val in border_styles.items():
+        for cls, val in border_styles.iteritems():
            css += '\n\n.%s {\n%s\n}'%(cls, val)
        with open('styles.css', 'ab') as f:
            f.write(css)
-    def preprocess(self, fname):
+    # def preprocess(self, fname):
-        self.log('\tPreprocessing to convert unicode characters')
+        # self.log('\tPreprocessing to convert unicode characters')
-        try:
+        # try:
-            data = open(fname, 'rb').read()
+            # data = open(fname, 'rb').read()
-            from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
+            # from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
-            tokenizer = RtfTokenizer(data)
+            # tokenizer = RtfTokenizer(data)
-            tokens = RtfTokenParser(tokenizer.tokens)
+            # tokens = RtfTokenParser(tokenizer.tokens)
-            data = tokens.toRTF()
+            # data = tokens.toRTF()
-            fname = 'preprocessed.rtf'
+            # fname = 'preprocessed.rtf'
-            with open(fname, 'wb') as f:
+            # with open(fname, 'wb') as f:
-                f.write(data)
+                # f.write(data)
-        except:
+        # except:
-            self.log.exception(
+            # self.log.exception(
-            'Failed to preprocess RTF to convert unicode sequences, ignoring...')
+            # 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
-        return fname
+        # return fname
    def convert_borders(self, doc):
        border_styles = []
@ -249,17 +283,14 @@ class RTFInput(InputFormatPlugin):
        self.log = log
        self.log('Converting RTF to XML...')
        #Name of the preprocesssed RTF file
-        fname = self.preprocess(stream.name)
+        # fname = self.preprocess(stream.name)
        try:
-            xml = self.generate_xml(fname)
+            xml = self.generate_xml(stream.name)
        except RtfInvalidCodeException, e:
            raise
            raise ValueError(_('This RTF file has a feature calibre does not '
            'support. Convert it to HTML first and then try it.\n%s')%e)
        '''dataxml = open('dataxml.xml', 'w')
        dataxml.write(xml)
        dataxml.close'''
        d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
        if d:
            imap = {}
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -17,7 +17,8 @@
 #########################################################################
 # $Revision: 1.41 $
 # $Date: 2006/03/24 23:50:07 $
-import sys,os
+import sys, os
 from calibre.ebooks.rtf2xml import headings_to_sections, \
    line_endings, footnote, fields_small, default_encoding, \
    make_lists, preamble_div, header, colors, group_borders, \
@ -90,7 +91,6 @@ class ParseRtf:
                out_file = '',
                out_dir = None,
                dtd = '',
                #debug = 0, #why? calibre
                deb_dir = None,
                convert_symbol = None,
                convert_wingdings = None,
@ -107,6 +107,7 @@ class ParseRtf:
                no_dtd = 0,
                char_data = '',
                ):
        """
        Requires:
        'file' --file to parse
@ -119,12 +120,11 @@ class ParseRtf:
            script tries to output to directory where is script is exectued.)
            'deb_dir' --debug directory. If a debug_dir is provided, the script
            will copy each run through as a file to examine in the debug_dir
            'perl_script'--use perl to make tokens. This runs just a bit faster.
            (I will probably phase this out.)
            'check_brackets' -- make sure the brackets match up after each run
            through a file. Only for debugging.
        Returns: Nothing
        """
        self.__file = in_file
        self.__out_file = out_file
        self.__out_dir = out_dir
@ -132,7 +132,7 @@ class ParseRtf:
        self.__dtd_path = dtd
        self.__check_file(in_file,"file_to_parse")
        self.__char_data = char_data
-        self.__debug_dir = deb_dir #self.__debug_dir = debug calibre
+        self.__debug_dir = deb_dir
        self.__check_dir(self.__temp_dir)
        self.__copy = self.__check_dir(self.__debug_dir)
        self.__convert_caps = convert_caps
@ -155,25 +155,24 @@ class ParseRtf:
        if hasattr(the_file, 'read'): return
        if the_file == None:
            if type == "file_to_parse":
-                message = "You must provide a file for the script to work"
+                msg = "\nYou must provide a file for the script to work"
            msg = message
            raise RtfInvalidCodeException, msg
        elif os.path.exists(the_file):
            pass # do nothing
        else:
-            message = "The file '%s' cannot be found" % the_file
+            msg = "\nThe file '%s' cannot be found" % the_file
            msg = message
            raise RtfInvalidCodeException, msg
    def __check_dir(self, the_dir):
        """Check to see if directory exists"""
        if not the_dir :
            return
        dir_exists = os.path.isdir(the_dir)
        if not dir_exists:
-            message = "%s is not a directory" % the_dir
+            msg = "\n%s is not a directory" % the_dir
            msg = message
            raise RtfInvalidCodeException, msg
        return 1
    def parse_rtf(self):
        """
        Parse the file by calling on other classes.
@ -194,13 +193,14 @@ class ParseRtf:
            copy_obj.set_dir(self.__debug_dir)
            copy_obj.remove_files()
            copy_obj.copy_file(self.__temp_file, "original_file")
-        # new as of 2005-08-02. Do I want this?
+        # Function to check if bracket are well handled
        if self.__debug_dir or self.__run_level > 2:
            self.__check_brack_obj = check_brackets.CheckBrackets\
            (file = self.__temp_file,
                bug_handler = RtfInvalidCodeException,
                    )
-        # convert Macintosh line endings to Unix line endings
+        #convert Macintosh and Windows line endings to Unix line endings
        #why do this if you don't wb after?
        line_obj = line_endings.FixLineEndings(
                in_file = self.__temp_file,
                bug_handler = RtfInvalidCodeException,
@ -208,13 +208,13 @@ class ParseRtf:
                run_level = self.__run_level,
                replace_illegals = self.__replace_illegals,
                )
-        return_value = line_obj.fix_endings()
+        return_value = line_obj.fix_endings() #calibre return what?
        self.__return_code(return_value)
        tokenize_obj = tokenize.Tokenize(
                bug_handler = RtfInvalidCodeException,
                in_file = self.__temp_file,
                copy = self.__copy,
-                run_level = self.__run_level,)
+                run_level = self.__run_level)
        tokenize_obj.tokenize()
        process_tokens_obj = process_tokens.ProcessTokens(
            in_file = self.__temp_file,
@ -230,11 +230,24 @@ class ParseRtf:
                os.remove(self.__temp_file)
            except OSError:
                pass
            #Check to see if the file is correctly encoded
            encode_obj = default_encoding.DefaultEncoding(
            in_file = self.__temp_file,
            run_level = self.__run_level,
            bug_handler = RtfInvalidCodeException,
            check_raw = True,
            )
            platform, code_page, default_font_num = encode_obj.find_default_encoding()
            check_encoding_obj = check_encoding.CheckEncoding(
                    bug_handler = RtfInvalidCodeException,
                        )
-            check_encoding_obj.check_encoding(self.__file)
+            enc = encode_obj.get_codepage()
-            sys.stderr.write('File "%s" does not appear to be RTF.\n' % self.__file if isinstance(self.__file, str) else self.__file.encode('utf-8'))
+            if enc != 'mac_roman':
                enc = 'cp' + enc
            if check_encoding_obj.check_encoding(self.__file, enc):
                file_name = self.__file if isinstance(self.__file, str) \
                                    else self.__file.encode('utf-8')
                msg = 'File %s does not appear to be correctly encoded.\n' % file_name
                raise InvalidRtfException, msg
        delete_info_obj = delete_info.DeleteInfo(
            in_file = self.__temp_file,
@ -508,6 +521,7 @@ class ParseRtf:
                indent = self.__indent,
                run_level = self.__run_level,
                no_dtd = self.__no_dtd,
                encoding = encode_obj.get_codepage(),
                bug_handler = RtfInvalidCodeException,
                )
        tags_obj.convert_to_tags()
@ -520,35 +534,28 @@ class ParseRtf:
        output_obj.output()
        os.remove(self.__temp_file)
        return self.__exit_level
    def __bracket_match(self, file_name):
        if self.__run_level > 2:
            good_br, msg =  self.__check_brack_obj.check_brackets()
            if good_br:
                pass
-                # sys.stderr.write( msg + ' in ' + file_name + "\n")
+                #sys.stderr.write( msg + ' in ' + file_name + "\n")
            else:
-                msg += msg +  " in file '" + file_name + "'\n"
+                msg = '%s in file %s\n' % (msg, file_name)
                raise RtfInvalidCodeException, msg
    def __return_code(self, num):
      if num == None:
          return
      if int(num) > self.__exit_level:
          self.__exit_level = num
    def __make_temp_file(self,file):
        """Make a temporary file to parse"""
        write_file="rtf_write_file"
        read_obj = file if hasattr(file, 'read') else open(file,'r')
-        write_obj = open(write_file, 'w')
+        with open(write_file, 'wb') as write_obj:
-        line = "dummy"
+            for line in read_obj:
-        while line:
+                write_obj.write(line)
            line = read_obj.read(1000)
            write_obj.write(line )
        write_obj.close()
        return write_file
    """
 mi<tg<open______<style-sheet\n
 mi<tg<close_____<style-sheet\n
 mi<tg<open-att__<footnote<num>1\n
 mi<tg<empty-att_<page-definition<margin>33\n
 mi<tg<empty_____<para\n
 """
--- a/src/calibre/ebooks/rtf2xml/check_brackets.py
+++ b/src/calibre/ebooks/rtf2xml/check_brackets.py
@ -24,38 +24,38 @@ class CheckBrackets:
        self.__ob_count = 0
        self.__cb_count = 0
        self.__open_bracket_num = []
    def open_brack(self, line):
        num = line[-5:-1]
        self.__open_bracket_num.append(num)
        self.__bracket_count += 1
    def close_brack(self, line):
        num = line[-5:-1]
        ##self.__open_bracket_num.append(num)
        try:
            last_num = self.__open_bracket_num.pop()
        except:
-            return 0
+            return False
        if num != last_num:
-            return 0
+            return False
        self.__bracket_count -= 1
-        return 1
+        return True
    def check_brackets(self):
        read_obj = open(self.__file, 'r')
        line = 'dummy'
        line_count = 0
-        while line:
+        with open(self.__file, 'r') as read_obj:
            for line in read_obj:
                line_count += 1
            line = read_obj.readline()
                self.__token_info = line[:16]
                if self.__token_info == 'ob<nu<open-brack':
                    self.open_brack(line)
                if self.__token_info == 'cb<nu<clos-brack':
-                right_count = self.close_brack(line)
+                    if not self.close_brack(line):
-                if not right_count:
+                        return (False, "closed bracket doesn't match, line %s" % line_count)
-                    return (0, "closed bracket doesn't match, line %s" % line_count)
+
        read_obj.close()
        if self.__bracket_count != 0:
-            msg = 'At end of file open and closed brackets don\'t match\n'
+            msg = ('At end of file open and closed brackets don\'t match\n' \
-            msg = msg + 'total number of brackets is %s' % self.__bracket_count
+                        'total number of brackets is %s') % self.__bracket_count
-            return (0, msg)
+            return (False, msg)
-        return (1, "brackets match!")
+        return (True, "Brackets match!")
--- a/src/calibre/ebooks/rtf2xml/check_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/check_encoding.py
@ -1,8 +1,11 @@
 #!/usr/bin/env python
 import sys
 class CheckEncoding:
    def __init__(self, bug_handler):
        self.__bug_handler = bug_handler
    def __get_position_error(self, line, encoding, line_num):
        char_position = 0
        for char in line:
@ -12,21 +15,23 @@ class CheckEncoding:
            except UnicodeError, msg:
                sys.stderr.write('line: %s char: %s\n' %  (line_num, char_position))
                sys.stderr.write(str(msg) + '\n')
-    def check_encoding(self, path, encoding='us-ascii'):
+
-        read_obj = open(path, 'r')
+    def check_encoding(self, path, encoding='us-ascii', verbose=True):
        line_to_read = 1
        line_num = 0
-        while line_to_read:
+        with open(path, 'r') as read_obj:
            for line in read_obj:
                line_num += 1
            line_to_read = read_obj.readline()
            line = line_to_read
                try:
                    line.decode(encoding)
                except UnicodeError:
                    if verbose:
                        if len(line) < 1000:
                            self.__get_position_error(line, encoding, line_num)
                        else:
-                    sys.stderr.write('line: %d has bad encoding\n'%line_num)
+                            sys.stderr.write('line: %d has bad encoding\n' % line_num)
                    return True
        return False
 if __name__ == '__main__':
    check_encoding_obj = CheckEncoding()
    check_encoding_obj.check_encoding(sys.argv[1])
--- a/src/calibre/ebooks/rtf2xml/combine_borders.py
+++ b/src/calibre/ebooks/rtf2xml/combine_borders.py
@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import os, tempfile
 from calibre.ebooks.rtf2xml import copy
 class CombineBorders:
    """Combine borders in RTF tokens to make later processing easier"""
    def __init__(self,
@ -32,19 +34,21 @@ class CombineBorders:
        self.__state = 'default'
        self.__bord_pos = 'default'
        self.__bord_att = []
    def found_bd(self, line):
        #cw<bd<bor-t-r-vi
        self.__state = 'border'
        self.__bord_pos = line[6:16]
    def __default_func(self, line):
        #cw<bd<bor-t-r-vi
        if self.__first_five == 'cw<bd':
            self.found_bd(line)
            return ''
        return line
    def end_border(self, line, write_obj):
-        joiner = "|"
+        border_string = "|".join(self.__bord_att)
        border_string = joiner.join(self.__bord_att)
        self.__bord_att = []
        write_obj.write('cw<bd<%s<nu<%s\n' % (self.__bord_pos,
                                                border_string))
@ -54,6 +58,7 @@ class CombineBorders:
            self. found_bd(line)
        else:
            write_obj.write(line)
    def add_to_border_desc(self, line):
        #cw<bt<bdr-hair__<nu<true
        #cw<bt<bdr-linew<nu<0.50
@ -65,26 +70,22 @@ class CombineBorders:
        else:
            num = ':' + num
        self.__bord_att.append(border_desc + num)
    def __border_func(self, line, write_obj):
        if self.__first_five != 'cw<bt':
            self.end_border(line, write_obj)
        else:
            self.add_to_border_desc(line)
    def combine_borders(self):
-        read_obj = open(self.__file, 'r')
+        with open(self.__file, 'r') as read_obj:
-        write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as write_obj:
-        line_to_read = 'dummy'
+                for line in read_obj:
        while line_to_read:
            line_to_read = read_obj.readline()
            line = line_to_read
                    self.__first_five = line[0:5]
                    if self.__state == 'border':
                        self.__border_func(line, write_obj)
                    else:
-                to_print = self.__default_func(line)
+                        write_obj.write(self.__default_func(line))
                write_obj.write(to_print)
        read_obj.close()
        write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "combine_borders.data")
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@ -1,6 +1,9 @@
-import os, tempfile
+import os, tempfile, sys
-from calibre.ebooks.rtf2xml import copy
+
 from calibre.ebooks.rtf2xml import copy, check_encoding
 public_dtd = 'rtf2xml1.0.dtd'
 class ConvertToTags:
    """
    Convert file to XML
@ -10,6 +13,7 @@ class ConvertToTags:
            bug_handler,
            dtd_path,
            no_dtd,
            encoding,
            indent = None,
            copy = None,
            run_level = 1,
@ -29,9 +33,14 @@ class ConvertToTags:
        self.__copy = copy
        self.__dtd_path = dtd_path
        self.__no_dtd = no_dtd
        if encoding != 'mac_roman':
            self.__encoding = 'cp' + encoding
        else:
            self.__encoding = 'mac_roman'
        self.__indent = indent
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
    def __initiate_values(self):
        """
        Set values, including those for the dictionary.
@ -61,6 +70,7 @@ class ConvertToTags:
        'tx<ut<__________'  :   self.__text_func,
        'mi<tg<empty_____'  :   self.__empty_func,
        }
    def __open_func(self, line):
        """
        Print the opening tag and newlines when needed.
@ -73,6 +83,7 @@ class ConvertToTags:
        if info in self.__two_new_line:
            self.__write_extra_new_line()
        self.__write_obj.write('<%s>' % info)
    def __empty_func(self, line):
        """
        Print out empty tag and newlines when needed.
@ -85,10 +96,11 @@ class ConvertToTags:
            self.__write_new_line()
        if info in self.__two_new_line:
            self.__write_extra_new_line()
    def __open_att_func(self, line):
        """
        Process lines for open tags that have attributes.
-        The important infor is between [17:-1]. Take this info and split it
+        The important info is between [17:-1]. Take this info and split it
        with the delimeter '<'. The first token in this group is the element
        name. The rest are attributes, separated fromt their values by '>'. So
        read each token one at a time, and split them by '>'.
@ -119,6 +131,7 @@ class ConvertToTags:
            self.__write_new_line()
        if element_name in self.__two_new_line:
            self.__write_extra_new_line()
    def __empty_att_func(self, line):
        """
        Same as the __open_att_func, except a '/' is placed at the end of the tag.
@ -143,6 +156,7 @@ class ConvertToTags:
            self.__write_new_line()
        if element_name in self.__two_new_line:
            self.__write_extra_new_line()
    def __close_func(self, line):
        """
        Print out the closed tag and new lines, if appropriate.
@ -156,6 +170,7 @@ class ConvertToTags:
            self.__write_new_line()
        if info in self.__two_new_line:
            self.__write_extra_new_line()
    def __text_func(self, line):
        """
        Simply print out the information between [17:-1]
@ -163,6 +178,7 @@ class ConvertToTags:
        #tx<nu<__________<Normal;
        # change this!
        self.__write_obj.write(line[17:-1])
    def __write_extra_new_line(self):
        """
        Print out extra new lines if the new lines have not exceeded two. If
@ -172,8 +188,10 @@ class ConvertToTags:
            return
        if self.__new_line < 2:
            self.__write_obj.write('\n')
    def __default_func(self, line):
        pass
    def __write_new_line(self):
        """
        Print out a new line if a new line has not already been printed out.
@ -183,11 +201,23 @@ class ConvertToTags:
        if not self.__new_line:
            self.__write_obj.write('\n')
            self.__new_line += 1
    def __write_dec(self):
        """
        Write the XML declaration at the top of the document.
        """
        #keep maximum compatibility with previous version
        check_encoding_obj = check_encoding.CheckEncoding(
                    bug_handler=self.__bug_handler)
        if not check_encoding_obj.check_encoding(self.__file, verbose=False):
            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
        elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
            self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
        else:
            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
            sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
                    ' hope for the best')
        self.__new_line = 0
        self.__write_new_line()
        if self.__no_dtd:
@ -207,6 +237,7 @@ class ConvertToTags:
            )
        self.__new_line = 0
        self.__write_new_line()
    def convert_to_tags(self):
        """
        Read in the file one line at a time. Get the important info, between
@ -222,18 +253,14 @@ class ConvertToTags:
            an empty tag function.
            """
        self.__initiate_values()
        read_obj = open(self.__file, 'r')
        self.__write_obj = open(self.__write_to, 'w')
        self.__write_dec()
-        line_to_read = 1
+        with open(self.__file, 'r') as read_obj:
-        while line_to_read:
+            for line in read_obj:
            line_to_read = read_obj.readline()
            line = line_to_read
                self.__token_info = line[:16]
                action = self.__state_dict.get(self.__token_info)
-            if action != None:
+                if action is not None:
                    action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
--- a/src/calibre/ebooks/rtf2xml/copy.py
+++ b/src/calibre/ebooks/rtf2xml/copy.py
@ -23,6 +23,7 @@ class Copy:
    def __init__(self, bug_handler, file = None, deb_dir = None, ):
        self.__file = file
        self.__bug_handler = bug_handler
    def set_dir(self, deb_dir):
        """Set the temporary directory to write files to"""
        if deb_dir is None:
@ -33,19 +34,11 @@ class Copy:
            message = "%(deb_dir)s is not a directory" % vars()
            raise self.__bug_handler , message
        Copy.__dir = deb_dir
    def remove_files(self ):
        """Remove files from directory"""
        self.__remove_the_files(Copy.__dir)
-        """
+
        list_of_files = os.listdir(Copy.__dir)
        list_of_files = os.listdir(the_dir)
        for file in list_of_files:
            rem_file = os.path.join(Copy.__dir,file)
            if os.path.isdir(rem_file):
                self.remove_files(rem_file)
            else:
                os.remove(rem_file)
        """
    def __remove_the_files(self, the_dir):
        """Remove files from directory"""
        list_of_files = os.listdir(the_dir)
@ -58,6 +51,7 @@ class Copy:
                    os.remove(rem_file)
                except OSError:
                    pass
    def copy_file(self, file, new_file):
        """
        Copy the file to a new name
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@ -1,61 +1,142 @@
 #########################################################################
 #                                                                       #
 #                                                                       #
 #   copyright 2002 Paul Henry Tremblay                                  #
 #                                                                       #
 #   This program is distributed in the hope that it will be useful,     #
 #   but WITHOUT ANY WARRANTY; without even the implied warranty of      #
 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU    #
 #   General Public License for more details.                            #
 #                                                                       #
 #   You should have received a copy of the GNU General Public License   #
 #   along with this program; if not, write to the Free Software         #
 #   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA            #
 #   02111-1307 USA                                                      #
 #                                                                       #
 #                                                                       #
 #########################################################################
 '''
 Codepages as to RTF 1.9.1:
    437	United States IBM
    708	Arabic (ASMO 708)
    709	Arabic (ASMO 449+, BCON V4)
    710	Arabic (transparent Arabic)
    711	Arabic (Nafitha Enhanced)
    720	Arabic (transparent ASMO)
    819	Windows 3.1 (United States and Western Europe)
    850	IBM multilingual
    852	Eastern European
    860	Portuguese
    862	Hebrew
    863	French Canadian
    864	Arabic
    865	Norwegian
    866	Soviet Union
    874	Thai
    932	Japanese
    936	Simplified Chinese
    949	Korean
    950	Traditional Chinese
    1250	Eastern European
    1251	Cyrillic
    1252	Western European
    1253	Greek
    1254	Turkish
    1255	Hebrew
    1256	Arabic
    1257	Baltic
    1258	Vietnamese
    1361	Johab
    10000	MAC Roman
    10001	MAC Japan
    10004	MAC Arabic
    10005	MAC Hebrew
    10006	MAC Greek
    10007	MAC Cyrillic
    10029	MAC Latin2
    10081	MAC Turkish
    57002	Devanagari
    57003	Bengali
    57004	Tamil
    57005	Telugu
    57006	Assamese
    57007	Oriya
    57008	Kannada
    57009	Malayalam
    57010	Gujarati
    57011	Punjabi
 '''
 import re
 class DefaultEncoding:
    """
    Find the default encoding for the doc
    """
-    def __init__(self, in_file, bug_handler, run_level = 1,):
+    def __init__(self, in_file, bug_handler, run_level = 1, check_raw = False):
        """
        Required:
            'file'
        Returns:
            nothing
            """
        self.__file = in_file
        self.__bug_handler = bug_handler
        self.__platform = 'Windows'
        self.__default_num = 'not-defined'
        self.__code_page = '1252'
        self.__datafetched = False
        self.__fetchraw = check_raw
    def find_default_encoding(self):
-        platform = 'Windows'
+        if not self.__datafetched:
-        default_num = 'not-defined'
+            self._encoding()
-        code_page = 'ansicpg1252'
+            self.__datafetched = True
-        read_obj = open(self.__file, 'r')
+        if self.__platform == 'Macintosh':
-        line_to_read = 1
+            code_page = self.__code_page
-        while line_to_read:
+        else:
-            line_to_read = read_obj.readline()
+            code_page = 'ansicpg' + self.__code_page
-            line = line_to_read
+        return self.__platform, code_page, self.__default_num
    def get_codepage(self):
        if not self.__datafetched:
            self._encoding()
            self.__datafetched = True
        return self.__code_page
    def get_platform(self):
        if not self.__datafetched:
            self._encoding()
            self.__datafetched = True
        return self.__platform
    def _encoding(self):
        with open(self.__file, 'r') as read_obj:
            if not self.__fetchraw:
                for line in read_obj:
                    self.__token_info = line[:16]
                    if self.__token_info == 'mi<mk<rtfhed-end':
                        break
                    if self.__token_info == 'cw<ri<ansi-codpg':
                        #cw<ri<ansi-codpg<nu<10000
-                num = line[20:-1]
+                        self.__code_page = line[20:-1] if int(line[20:-1]) \
-                if not num:
+                                            else '1252'
                    num = '1252'
                code_page = 'ansicpg' + num
                    if self.__token_info == 'cw<ri<macintosh_':
-                platform = 'Macintosh'
+                        self.__platform = 'Macintosh'
                        self.__code_page = 'mac_roman'
                    elif self.__token_info == 'cw<ri<pc________':
                        self.__platform = 'IBMPC'
                        self.__code_page = '437'
                    elif self.__token_info == 'cw<ri<pca_______':
                        self.__platform = 'OS/2'
                        self.__code_page = '850'
                    if self.__token_info == 'cw<ri<deflt-font':
-                default_num = line[20:-1]
+                        self.__default_num = line[20:-1]
                        #cw<ri<deflt-font<nu<0
-            #action = self.__state_dict.get(self.__state)
+            else:
-            #if action == None:
+                fenc = re.compile(r'\\(mac|pc|ansi|pca)[\\ \{\}\t\n]+')
-                #print self.__state
+                fenccp = re.compile(r'\\ansicpg(\d+)[\\ \{\}\t\n]+')
-            #action(line)
+                for line in read_obj:
-        read_obj.close()
+                    if fenccp.search(line):
-        if platform == 'Macintosh':
+                        cp = fenccp.search(line).group(1)
-            code_page = 'mac_roman'
+                        if not int(cp):
-        return platform, code_page, default_num
+                            self.__code_page = cp
                        break
                    if fenc.search(line):
                        enc = fenc.search(line).group(1)
                        if enc == 'mac':
                            self.__code_page = 'mac_roman'
                        elif enc == 'pc':
                            self.__code_page = '437'
                        elif enc == 'pca':
                            self.__code_page = '850'
 # if __name__ == '__main__':
    # encode_obj = DefaultEncoding(
            # in_file = sys.argv[1],
            # bug_handler = Exception,
            # check_raw = True,
            # )
    # print encode_obj.get_codepage()
--- a/src/calibre/ebooks/rtf2xml/delete_info.py
+++ b/src/calibre/ebooks/rtf2xml/delete_info.py
@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import sys, os, tempfile
 from calibre.ebooks.rtf2xml import copy
 class DeleteInfo:
    """Delelet unecessary destination groups"""
    def __init__(self,
@ -29,17 +31,18 @@ class DeleteInfo:
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__write_to = tempfile.mktemp()
-        self.__bracket_count=0
+        self.__bracket_count= 0
        self.__ob_count = 0
        self.__cb_count = 0
-        self.__after_asterisk = 0
+        # self.__after_asterisk = False
-        self.__delete = 0
+        # self.__delete = 0
        self.__initiate_allow()
        self.__ob = 0
-        self.__write_cb = 0
+        self.__write_cb = False
        self.__run_level = run_level
-        self.__found_delete = 0
+        self.__found_delete = False
-        self.__list = 0
+        # self.__list = False
    def __initiate_allow(self):
        """
        Initiate a list of destination groups which should be printed out.
@ -69,6 +72,7 @@ class DeleteInfo:
            'delete'            : self.__delete_func,
            'list'              : self.__list_func,
        }
    def __default_func(self,line):
        """Handle lines when in no special state. Look for an asterisk to
        begin a special state. Otherwise, print out line."""
@ -81,27 +85,29 @@ class DeleteInfo:
            if self.__ob:
                self.__write_obj.write(self.__ob)
            self.__ob = line
-            return 0
+            return False
        else:
            # write previous bracket, since didn't fine asterisk
            if self.__ob:
                self.__write_obj.write(self.__ob)
                self.__ob = 0
-            return 1
+            return True
    def __delete_func(self,line):
        """Handle lines when in delete state. Don't print out lines
        unless the state has ended."""
        if self.__delete_count == self.__cb_count:
            self.__state = 'default'
            if self.__write_cb:
-                self.__write_cb = 0
+                self.__write_cb = True
-                return 1
+                return True
-            return 0
+            return False
    def __asterisk_func(self,line):
        """
        Determine whether to delete info in group
        Note on self.__cb flag.
-        If you find that you are in a delete group, and the preivous
+        If you find that you are in a delete group, and the previous
        token in not an open bracket (self.__ob = 0), that means
        that the delete group is nested inside another acceptable
        detination group. In this case, you have alrady written
@ -110,21 +116,21 @@ class DeleteInfo:
        """
        # Test for {\*}, in which case don't enter
        # delete state
-        self.__after_asterisk = 0 # only enter this function once
+        # self.__after_asterisk = False # only enter this function once
-        self.__found_delete = 1
+        self.__found_delete = True
        if self.__token_info == 'cb<nu<clos-brack':
            if self.__delete_count == self.__cb_count:
                self.__state = 'default'
                self.__ob = 0
                # changed this because haven't printed out start
-                return 0
+                return False
            else:
                # not sure what happens here!
                # believe I have a '{\*}
                if self.__run_level > 3:
                    msg = 'flag problem\n'
                    raise self.__bug_handler, msg
-                return 1
+                return True
        elif self.__token_info in self.__allowable :
            if self.__ob:
                self.__write_obj.write(self.__ob)
@ -132,61 +138,61 @@ class DeleteInfo:
                self.__state = 'default'
            else:
                pass
-            return 1
+            return True
        elif self.__token_info == 'cw<ls<list______':
            self.__ob = 0
            self.__found_list_func(line)
        elif self.__token_info in self.__not_allowable:
            if not self.__ob:
-                self.__write_cb = 1
+                self.__write_cb = True
            self.__ob = 0
            self.__state = 'delete'
            self.__cb_count = 0
-            return 0
+            return False
        else:
            if self.__run_level > 5:
-                msg = 'After an asterisk, and found neither an allowable or non-allowble token\n'
+                msg = ('After an asterisk, and found neither an allowable or non-allowable token\n\
-                msg += 'token is "%s"\n' % self.__token_info
+                            token is "%s"\n') % self.__token_info
-                raise self.__bug_handler
+                raise self.__bug_handler, msg
            if not self.__ob:
-                self.__write_cb = 1
+                self.__write_cb = True
            self.__ob = 0
            self.__state = 'delete'
            self.__cb_count = 0
-            return 0
+            return False
    def __found_list_func(self, line):
        """
        print out control words in this group
        """
        self.__state = 'list'
    def __list_func(self, line):
        """
        Check to see if the group has ended.
-        Return 1 for all control words.
+        Return True for all control words.
-        Return 0 otherwise.
+        Return False otherwise.
        """
        if self.__delete_count == self.__cb_count and self.__token_info ==\
            'cb<nu<clos-brack':
            self.__state = 'default'
            if self.__write_cb:
-                self.__write_cb = 0
+                self.__write_cb = False
-                return 1
+                return True
-            return 0
+            return False
        elif line[0:2] == 'cw':
-            return 1
+            return True
        else:
-            return 0
+            return False
    def delete_info(self):
        """Main method for handling other methods. Read one line in at
-        a time, and determine wheter to print the line based on the state."""
+        a time, and determine whether to print the line based on the state."""
-        line_to_read = 'dummy'
+        with open(self.__file, 'r') as read_obj:
-        read_obj = open(self.__file, 'r')
+            with open(self.__write_to, 'w') as self.__write_obj:
-        self.__write_obj = open(self.__write_to, 'w')
+                for line in read_obj:
        while line_to_read:
                    #ob<nu<open-brack<0001
-            to_print =1
+                    to_print = True
            line_to_read = read_obj.readline()
            line = line_to_read
                    self.__token_info = line[:16]
                    if self.__token_info == 'ob<nu<open-brack':
                        self.__ob_count = line[-5:-1]
@ -194,23 +200,19 @@ class DeleteInfo:
                        self.__cb_count = line[-5:-1]
                    action = self.__state_dict.get(self.__state)
                    if not action:
-                sys.stderr.write('No action in dictionary state is "%s" \n'
+                        sys.stderr.write(_('No action in dictionary state is "%s" \n')
                                % self.__state)
                    to_print = action(line)
-            """
+                    # if self.__after_asterisk:
-            if self.__after_asterisk:
+                        # to_print = self.__asterisk_func(line)
-                to_print = self.__asterisk_func(line)
+                    # elif self.__list:
-            elif self.__list:
+                        # self.__in_list_func(line)
-                self.__in_list_func(line)
+                    # elif self.__delete:
-            elif self.__delete:
+                        # to_print = self.__delete_func(line)
-                to_print = self.__delete_func(line)
+                    # else:
-            else:
+                        # to_print = self.__default_func(line)
                to_print = self.__default_func(line)
            """
                    if to_print:
                        self.__write_obj.write(line)
        self.__write_obj.close()
        read_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "delete_info.data")
--- a/src/calibre/ebooks/rtf2xml/footnote.py
+++ b/src/calibre/ebooks/rtf2xml/footnote.py
@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import os, tempfile
 from calibre.ebooks.rtf2xml import copy
 class Footnote:
    """
    Two public methods are available. The first separates all of the
@ -35,6 +37,7 @@ class Footnote:
        self.__copy = copy
        self.__write_to = tempfile.mktemp()
        self.__found_a_footnote = 0
    def __first_line_func(self, line):
        """
        Print the tag info for footnotes.  Check whether footnote is an
@ -47,6 +50,7 @@ class Footnote:
            self.__write_to_foot_obj.write(
            'mi<tg<open-att__<footnote<num>%s\n' % self.__footnote_count)
        self.__first_line = 0
    def __in_footnote_func(self, line):
        """Handle all tokens that are part of footnote"""
        if self.__first_line:
@ -68,6 +72,7 @@ class Footnote:
            'mi<mk<footnt-clo\n')
        else:
            self.__write_to_foot_obj.write(line)
    def __found_footnote(self, line):
        """ Found a footnote"""
        self.__found_a_footnote = 1
@ -81,6 +86,7 @@ class Footnote:
        'mi<mk<footnt-ind<%04d\n' % self.__footnote_count)
        self.__write_to_foot_obj.write(
        'mi<mk<footnt-ope<%04d\n' % self.__footnote_count)
    def __default_sep(self, line):
        """Handle all tokens that are not footnote tokens"""
        if self.__token_info == 'cw<nt<footnote__':
@ -91,6 +97,7 @@ class Footnote:
            self.__write_obj.write(
                'tx<nu<__________<%s\n' % num
            )
    def __initiate_sep_values(self):
        """
        initiate counters for separate_footnotes method.
@ -102,6 +109,7 @@ class Footnote:
        self.__in_footnote = 0
        self.__first_line = 0 #have not processed the first line of footnote
        self.__footnote_count = 0
    def separate_footnotes(self):
        """
        Separate all the footnotes in an RTF file and put them at the bottom,
@ -111,14 +119,11 @@ class Footnote:
        bottom of the main file.
        """
        self.__initiate_sep_values()
        read_obj = open(self.__file)
        self.__write_obj = open(self.__write_to, 'w')
        self.__footnote_holder = tempfile.mktemp()
-        self.__write_to_foot_obj = open(self.__footnote_holder, 'w')
+        with open(self.__file) as read_obj:
-        line_to_read = 1
+            with open(self.__write_to, 'w') as self.__write_obj:
-        while line_to_read:
+                with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
-            line_to_read = read_obj.readline()
+                    for line in read_obj:
            line = line_to_read
                        self.__token_info = line[:16]
                        # keep track of opening and closing brackets
                        if self.__token_info == 'ob<nu<open-brack':
@ -131,11 +136,8 @@ class Footnote:
                        # not in the middle of footnote text
                        else:
                            self.__default_sep(line)
-        self.__write_obj.close()
+        with open(self.__footnote_holder, 'r') as read_obj:
-        read_obj.close()
+            with open(self.__write_to, 'a') as write_obj:
        self.__write_to_foot_obj.close()
        read_obj = open(self.__footnote_holder, 'r')
        write_obj = open(self.__write_to, 'a')
                write_obj.write(
                    'mi<mk<sect-close\n'
                    'mi<mk<body-close\n'
@ -143,26 +145,24 @@ class Footnote:
                    'mi<tg<close_____<body\n'
                    'mi<tg<close_____<doc\n'
                    'mi<mk<footnt-beg\n')
-        line = 1
+                for line in read_obj:
        while line:
            line = read_obj.readline()
                    write_obj.write(line)
                write_obj.write(
                'mi<mk<footnt-end\n')
        read_obj.close()
        write_obj.close()
        os.remove(self.__footnote_holder)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "footnote_separate.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
    def update_info(self, file, copy):
        """
        Unused method
        """
        self.__file = file
        self.__copy = copy
    def __get_foot_body_func(self, line):
        """
        Process lines in main body and look for beginning of footnotes.
@ -172,6 +172,7 @@ class Footnote:
            self.__state = 'foot'
        else:
            self.__write_obj.write(line)
    def __get_foot_foot_func(self, line):
        """
        Copy footnotes from bottom of file to a separate, temporary file.
@ -180,6 +181,7 @@ class Footnote:
            self.__state = 'body'
        else:
            self.__write_to_foot_obj.write(line)
    def __get_footnotes(self):
        """
        Private method to remove footnotes from main file.  Read one line from
@ -188,21 +190,16 @@ class Footnote:
        These two functions do the work of separating the footnotes form the
        body.
        """
-        read_obj = open(self.__file)
+        with open(self.__file) as read_obj:
-        self.__write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as self.__write_obj:
-            # self.__write_to = "footnote_info.data"
+                with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
-        self.__write_to_foot_obj = open(self.__footnote_holder, 'w')
+                    for line in read_obj:
        line = 1
        while line:
            line = read_obj.readline()
                        self.__token_info = line[:16]
                        if self.__state == 'body':
                            self.__get_foot_body_func(line)
                        elif self.__state == 'foot':
                            self.__get_foot_foot_func(line)
-        read_obj.close()
+
        self.__write_obj.close()
        self.__write_to_foot_obj.close()
    def __get_foot_from_temp(self, num):
        """
        Private method for joining footnotes to body. This method reads from
@ -213,9 +210,7 @@ class Footnote:
        look_for = 'mi<mk<footnt-ope<' + num + '\n'
        found_foot = 0
        string_to_return = ''
-        line = 1
+        for line in self.__read_from_foot_obj:
        while line:
            line = self.__read_from_foot_obj.readline()
            if found_foot:
                if line == 'mi<mk<footnt-clo\n':
                    return string_to_return
@ -223,6 +218,7 @@ class Footnote:
            else:
                if line == look_for:
                    found_foot = 1
    def __join_from_temp(self):
        """
        Private method for rejoining footnotes to body.  Read from the
@ -232,16 +228,14 @@ class Footnote:
        print out to the third file.
        If no footnote marker is found, simply print out the token (line).
        """
-        self.__read_from_foot_obj = open(self.__footnote_holder, 'r')
+        with open(self.__footnote_holder, 'r') as self.__read_from_foot_obj:
-        read_obj = open(self.__write_to, 'r')
+            with open(self.__write_to, 'r') as read_obj:
-        self.__write_obj = open(self.__write_to2, 'w')
+                with open(self.__write_to2, 'w') as self.__write_obj:
-        line = 1
+                    for line in read_obj:
        while line:
            line = read_obj.readline()
                        if line[:16] == 'mi<mk<footnt-ind':
                            line = self.__get_foot_from_temp(line[17:-1])
                        self.__write_obj.write(line)
-        read_obj.close()
+
    def join_footnotes(self):
        """
        Join the footnotes from the bottom of the file and put them in their
@ -258,8 +252,8 @@ class Footnote:
        self.__state = 'body'
        self.__get_footnotes()
        self.__join_from_temp()
-        self.__write_obj.close()
+        # self.__write_obj.close()
-        self.__read_from_foot_obj.close()
+        # self.__read_from_foot_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to2, "footnote_joined.data")
--- a/src/calibre/ebooks/rtf2xml/get_char_map.py
+++ b/src/calibre/ebooks/rtf2xml/get_char_map.py
@ -43,16 +43,18 @@ class GetCharMap:
    def get_char_map(self, map):
        if map == 'ansicpg0':
            map = 'ansicpg1250'
-        found_map = 0
+        if map in ('ansicpg10000', '10000'):
            map = 'mac_roman'
        found_map = False
        map_dict = {}
        self.__char_file.seek(0)
-        for line in self.__char_file.readlines():
+        for line in self.__char_file:
            if not line.strip(): continue
            begin_element = '<%s>' % map;
            end_element = '</%s>' % map
            if not found_map:
                if begin_element in line:
-                    found_map = 1
+                    found_map = True
            else:
                if end_element in line:
                    break
@ -62,8 +64,7 @@ class GetCharMap:
        if not found_map:
-            msg = 'no map found\n'
+            msg = 'no map found\nmap is "%s"\n'%(map,)
            msg += 'map is "%s"\n'%(map,)
            raise self.__bug_handler, msg
        return map_dict
--- a/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
+++ b/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
@ -57,7 +57,7 @@ class Hex2Utf8:
        """
        self.__file = in_file
        self.__copy = copy
-        if area_to_convert != 'preamble' and area_to_convert != 'body':
+        if area_to_convert not in ('preamble', 'body'):
            msg = (
            'Developer error! Wrong flag.\n'
            'in module "hex_2_utf8.py\n'
@ -79,7 +79,8 @@ class Hex2Utf8:
        self.__write_to = tempfile.mktemp()
        self.__bug_handler = bug_handler
        self.__invalid_rtf_handler = invalid_rtf_handler
-    def update_values(  self,
+
    def update_values(self,
                        file,
                        area_to_convert,
                        char_file,
@ -132,6 +133,7 @@ class Hex2Utf8:
        # self.__convert_symbol = 0
        # self.__convert_wingdings = 0
        # self.__convert_zapf = 0
    def __initiate_values(self):
        """
        Required:
@ -191,6 +193,7 @@ class Hex2Utf8:
            'body'          :       self.__body_func,
            'mi<mk<body-open_'  :   self.__found_body_func,
            'tx<hx<__________'  :   self.__hex_text_func,
            # 'tx<nu<__________'  :   self.__text_func,
            }
        self.__body_state_dict = {
            'preamble'      :       self.__preamble_for_body_func,
@ -209,6 +212,7 @@ class Hex2Utf8:
        }
        self.__caps_list = ['false']
        self.__font_list = ['not-defined']
    def __hex_text_func(self, line):
        """
        Required:
@ -218,12 +222,12 @@ class Hex2Utf8:
            token is in the dictionary, then check if the value starts with a
            "&". If it does, then tag the result as utf text. Otherwise, tag it
            as normal text.
-            If the nex_num is not in the dictionary, then a mistake has been
+            If the hex_num is not in the dictionary, then a mistake has been
            made.
            """
        hex_num = line[17:-1]
        converted = self.__current_dict.get(hex_num)
-        if converted != None:
+        if converted is not None:
            # tag as utf-8
            if converted[0:1] == "&":
                font = self.__current_dict_name
@ -263,42 +267,43 @@ class Hex2Utf8:
                    # msg += 'dictionary is %s\n' % self.__current_dict_name
                    msg = 'Character "&#x%s;" does not appear to be valid (or is a control character)\n' % token
                    raise self.__bug_handler, msg
    def __found_body_func(self, line):
        self.__state = 'body'
        self.__write_obj.write(line)
    def __body_func(self, line):
        """
        When parsing preamble
        """
        self.__write_obj.write(line)
    def __preamble_func(self, line):
        action = self.__preamble_state_dict.get(self.__token_info)
-        if action != None:
+        if action is not None:
            action(line)
        else:
            self.__write_obj.write(line)
    def __convert_preamble(self):
        self.__state = 'preamble'
        read_obj = open(self.__file, 'r')
        self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
+        with open(self.__file, 'r') as read_obj:
-        while line_to_read:
+           for line in read_obj:
            line_to_read = read_obj.readline()
            line = line_to_read
                self.__token_info = line[:16]
                action = self.__preamble_state_dict.get(self.__state)
-            if action == None:
+                if action is None:
-                sys.stderr.write('error no state found in hex_2_utf8',
+                    sys.stderr.write(_('error no state found in hex_2_utf8'),
                    self.__state
                    )
                action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
    def __preamble_for_body_func(self, line):
        """
        Required:
@ -311,6 +316,7 @@ class Hex2Utf8:
        if self.__token_info == 'mi<mk<body-open_':
            self.__found_body_func(line)
        self.__write_obj.write(line)
    def __body_for_body_func(self, line):
        """
        Required:
@ -321,10 +327,11 @@ class Hex2Utf8:
            Used when parsing the body.
        """
        action = self.__in_body_dict.get(self.__token_info)
-        if action != None:
+        if action is not None:
            action(line)
        else:
            self.__write_obj.write(line)
    def __start_font_func(self, line):
        """
        Required:
@ -348,6 +355,7 @@ class Hex2Utf8:
        else:
            self.__current_dict_name = 'default'
            self.__current_dict = self.__def_dict
    def __end_font_func(self, line):
        """
        Required:
@ -376,6 +384,7 @@ class Hex2Utf8:
        else:
            self.__current_dict_name = 'default'
            self.__current_dict = self.__def_dict
    def __start_special_font_func_old(self, line):
        """
        Required:
@ -398,6 +407,7 @@ class Hex2Utf8:
            self.__current_dict.append(self.__dingbats_dict)
            self.__special_fonts_found += 1
            self.__current_dict_name = 'Zapf Dingbats'
    def __end_special_font_func(self, line):
        """
        Required:
@ -416,6 +426,7 @@ class Hex2Utf8:
            self.__current_dict.pop()
            self.__special_fonts_found -= 1
            self.__dict_name = 'default'
    def __start_caps_func_old(self, line):
        """
        Required:
@ -427,6 +438,7 @@ class Hex2Utf8:
            self.__in_caps to 1
        """
        self.__in_caps = 1
    def __start_caps_func(self, line):
        """
        Required:
@ -440,6 +452,7 @@ class Hex2Utf8:
        self.__in_caps = 1
        value = line[17:-1]
        self.__caps_list.append(value)
    def __end_caps_func(self, line):
        """
        Required:
@ -455,7 +468,8 @@ class Hex2Utf8:
        else:
            sys.stderr.write('Module is hex_2_utf8\n')
            sys.stderr.write('method is __end_caps_func\n')
-            sys.stderr.write('caps list should be more than one?\n')
+            sys.stderr.write('caps list should be more than one?\n') #self.__in_caps not set
    def __text_func(self, line):
        """
        Required:
@ -466,9 +480,8 @@ class Hex2Utf8:
            if in caps, convert. Otherwise, print out.
        """
        text = line[17:-1]
-        if self.__current_dict_name == 'Symbol'\
+        # print line
-          or self.__current_dict_name == 'Wingdings'\
+        if self.__current_dict_name in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
          or self.__current_dict_name == 'Zapf Dingbats':
            the_string = ''
            for letter in text:
                hex_num = hex(ord(letter))
@ -477,21 +490,21 @@ class Hex2Utf8:
                hex_num = hex_num[2:]
                hex_num = '\'%s' % hex_num
                converted = self.__current_dict.get(hex_num)
-                if converted == None:
+                if converted is None:
                    sys.stderr.write('module is hex_2_ut8\n')
                    sys.stderr.write('method is __text_func\n')
                    sys.stderr.write('no hex value for "%s"\n' % hex_num)
                else:
                    the_string += converted
            self.__write_obj.write('tx<nu<__________<%s\n' % the_string)
            # print the_string
        else:
            if self.__caps_list[-1] == 'true' \
                and self.__convert_caps\
-                and self.__current_dict_name != 'Symbol'\
+                and self.__current_dict_name not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
                and self.__current_dict_name != 'Wingdings'\
                and self.__current_dict_name != 'Zapf Dingbats':
                text = text.upper()
            self.__write_obj.write('tx<nu<__________<%s\n' % text)
    def __utf_to_caps_func(self, line):
        """
        Required:
@ -506,6 +519,7 @@ class Hex2Utf8:
            # utf_text = utf_text.upper()
            utf_text = self.__utf_token_to_caps_func(utf_text)
        self.__write_obj.write('tx<ut<__________<%s\n' % utf_text)
    def __utf_token_to_caps_func(self, char_entity):
        """
        Required:
@ -530,28 +544,26 @@ class Hex2Utf8:
            return char_entity
        else:
            return converted
    def __convert_body(self):
        self.__state = 'body'
-        read_obj = open(self.__file, 'r')
+        with open(self.__file, 'r') as read_obj:
            self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
+            for line in read_obj:
        while line_to_read:
            line_to_read = read_obj.readline()
            line = line_to_read
                self.__token_info = line[:16]
                action = self.__body_state_dict.get(self.__state)
-            if action == None:
+                if action is None:
                    sys.stderr.write('error no state found in hex_2_utf8',
                    self.__state
                    )
                action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "body_utf_convert.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
    def convert_hex_2_utf8(self):
        self.__initiate_values()
        if self.__area_to_convert == 'preamble':
--- a/src/calibre/ebooks/rtf2xml/inline.py
+++ b/src/calibre/ebooks/rtf2xml/inline.py
@ -1,5 +1,7 @@
 import sys, os, tempfile
 from calibre.ebooks.rtf2xml import copy
 """
 States.
 1. default
@ -36,6 +38,7 @@ class Inline:
        self.__copy = copy
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
    def __initiate_values(self):
        """
        Initiate all values.
@ -51,7 +54,6 @@ class Inline:
            'tx<ut<__________'  :       self.__found_text_func,
            'mi<mk<inline-fld'  :       self.__found_text_func,
            'text'              :       self.__found_text_func,
            'cw<nu<hard-lineb'  :       self.__found_text_func, #calibre
            'cb<nu<clos-brack'  :       self.__close_bracket_func,
            'mi<mk<par-end___'  :       self.__end_para_func,
            'mi<mk<footnt-ope'  :       self.__end_para_func,
@ -63,7 +65,6 @@ class Inline:
            'tx<hx<__________'  :       self.__found_text_func,
            'tx<ut<__________'  :       self.__found_text_func,
            'text'              :       self.__found_text_func,
            'cw<nu<hard-lineb'  :       self.__found_text_func, #calibre
            'mi<mk<inline-fld'  :       self.__found_text_func,
            'ob<nu<open-brack':         self.__found_open_bracket_func,
            'mi<mk<par-end___'  :       self.__end_para_func,
@ -110,6 +111,7 @@ class Inline:
            'underlined'    :   'underlined',
        }
        self.__caps_list = ['false']
    def __set_list_func(self, line):
        """
        Requires:
@ -128,6 +130,7 @@ class Inline:
                self.__place = 'in_list'
                self.__inline_list = self.__list_inline_list
                self.__groups_in_waiting = self.__groups_in_waiting_list
    def __default_func(self, line):
        """
        Requires:
@ -140,8 +143,8 @@ class Inline:
        action = self.__default_dict.get(self.__token_info)
        if action:
            action(line)
        if self.__token_info != 'cw<nu<hard-lineb': #calibre
        self.__write_obj.write(line)
    def __found_open_bracket_func(self, line):
        """
        Requires:
@ -156,6 +159,7 @@ class Inline:
        self.__groups_in_waiting[0] += 1
        self.__inline_list.append({})
        self.__inline_list[-1]['contains_inline'] = 0
    def __after_open_bracket_func(self, line):
        """
        Requires:
@ -176,6 +180,7 @@ class Inline:
                self.__state = 'default' #  a non control word?
                action(line)
        self.__write_obj.write(line)
    def __handle_control_word(self, line):
        """
        Required:
@ -206,6 +211,7 @@ class Inline:
                elif char_value == 'Zapf Dingbats':
                    self.__write_obj.write('mi<mk<font-dingb\n')
            """
    def __close_bracket_func(self, line):
        """
        Requires:
@ -244,6 +250,7 @@ class Inline:
        self.__inline_list.pop()
        if self.__groups_in_waiting[0] != 0:
            self.__groups_in_waiting[0] -= 1
    def __found_text_func(self, line):
        """
        Required:
@ -257,7 +264,6 @@ class Inline:
                Text can mark the start of a paragraph.
                If already in a paragraph, check to see if any groups are waiting
                to be added. If so, use another method to write these groups.
            3. If not check if hardline break, then write
        """
        if self.__place == 'in_list':
            self.__write_inline()
@ -265,10 +271,7 @@ class Inline:
            if not self.__in_para:
                self.__in_para = 1
                self.__start_para_func(line)
-            else:
+            elif self.__groups_in_waiting[0] != 0:
                if self.__token_info == 'cw<nu<hard-lineb': #calibre
                    self.__write_obj.write('mi<tg<empty_____<hardline-break\n')
                if self.__groups_in_waiting[0] != 0:
                    self.__write_inline()
    def __write_inline(self):
@ -314,6 +317,7 @@ class Inline:
                            self.__write_obj.write('<%s>%s' % (the_key, the_dict[the_key]))
                    self.__write_obj.write('\n')
        self.__groups_in_waiting[0] = 0
    def __end_para_func(self, line):
        """
        Requires:
@ -342,6 +346,7 @@ class Inline:
                    self.__write_obj.write('mi<mk<caps-end__\n')
                self.__write_obj.write('mi<tg<close_____<inline\n')
        self.__in_para = 0
    def __start_para_func(self, line):
        """
        Requires:
@ -369,12 +374,14 @@ class Inline:
                        self.__write_obj.write('<%s>%s' % (the_key, the_dict[the_key]))
                self.__write_obj.write('\n')
        self.__groups_in_waiting[0] = 0
    def __found_field_func(self, line):
        """
        Just a default function to make sure I don't prematurely exit
        default state
        """
        pass
    def form_tags(self):
        """
        Requires:
@ -386,12 +393,9 @@ class Inline:
            the state.
        """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
+        with open(self.__file, 'r') as read_obj:
-        self.__write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as self.__write_obj:
-        line_to_read = 1
+                for line in read_obj:
        while line_to_read:
            line_to_read = read_obj.readline()
            line = line_to_read
                    token = line[0:-1]
                    self.__token_info = ''
                    if token == 'tx<mc<__________<rdblquote'\
@ -406,12 +410,10 @@ class Inline:
                        self.__token_info = line[:16]
                    self.__set_list_func(line)
                    action = self.__state_dict.get(self.__state)
-            if action == None:
+                    if action is None:
                        sys.stderr.write('No matching state in module inline_for_lists.py\n')
                        sys.stderr.write(self.__state + '\n')
                    action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "inline.data")
--- a/src/calibre/ebooks/rtf2xml/line_endings.py
+++ b/src/calibre/ebooks/rtf2xml/line_endings.py
@ -15,8 +15,11 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import os, tempfile, re
+import os, tempfile
 from calibre.ebooks.rtf2xml import copy
 from calibre.utils.cleantext import clean_ascii_chars
 class FixLineEndings:
    """Fix line endings"""
    def __init__(self,
@ -32,34 +35,21 @@ class FixLineEndings:
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
        self.__replace_illegals = replace_illegals
    def fix_endings(self):
-        ##tempFileName = tempfile.mktemp()
+        #read
-        illegal_regx = re.compile( '\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0B|\x0E|\x0F|\x10|\x11|\x12|\x13')
+        with open(self.__file, 'r') as read_obj:
-        #nums = [0, 1, 2, 3, 4, 5, 6, 7, 8,  11,  14, 15, 16, 17, 18, 19]
+            input_file = read_obj.read()
-        """
+        #calibre go from win and mac to unix
-read_obj = open(self.__file, 'r')
+        input_file = input_file.replace ('\r\n', '\n')
-line = read_obj.read(1000)
+        input_file = input_file.replace ('\r', '\n')
-regexp = re.compile(r"\r")
+        #remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27
 macintosh = regexp.search(line)
 read_obj.close()
        """
        # always check since I have to get rid of illegal characters
        macintosh = 1
        if macintosh:
            line = 1
            read_obj = open(self.__file, 'r')
            write_obj = open(self.__write_to, 'w')
            while line:
                line = read_obj.read(1000)
                # line = re.sub(regexp,"\n",line)
                line = line.replace ('\r', '\n')
        if self.__replace_illegals:
-                    line = re.sub(illegal_regx, '', line)
+            input_file = clean_ascii_chars(input_file)
-                    # for num in nums:
+        #write
-                        # line = line.replace(chr(num), '')
+        with open(self.__write_to, 'wb') as write_obj:
-                write_obj.write(line )
+            write_obj.write(input_file)
-            read_obj.close()
+        #copy
            write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "line_endings.data")
--- a/src/calibre/ebooks/rtf2xml/pict.py
+++ b/src/calibre/ebooks/rtf2xml/pict.py
@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import sys, os, tempfile
 from calibre.ebooks.rtf2xml import copy
 class Pict:
    """Process graphic information"""
    def __init__(self,
@ -36,13 +38,11 @@ class Pict:
        self.__ob_count = 0
        self.__cb_count = 0
        self.__pict_count = 0
-        self.__in_pict = 0
+        self.__in_pict = False
-        self.__already_found_pict = 0
+        self.__already_found_pict = False
        self.__orig_file = orig_file
        self.__initiate_pict_dict()
        self.__out_file = out_file
        # this is left over
        self.__no_ask = 1
    def __initiate_pict_dict(self):
        self.__pict_dict = {
@ -71,26 +71,15 @@ class Pict:
                self.__out_file))
        else:
            dir_name = os.path.dirname(self.__orig_file)
        # self.__output_to_file_func()
        self.__dir_name = base_name + "_rtf_pict_dir/"
        self.__dir_name = os.path.join(dir_name, self.__dir_name)
        if not os.path.isdir(self.__dir_name):
            try:
                os.mkdir(self.__dir_name)
            except OSError, msg:
-                msg = str(msg)
+                msg = "%sCouldn't make directory '%s':\n" % (str(msg), self.__dir_name)
                msg += "Couldn't make directory '%s':\n" % (self.__dir_name)
                raise self.__bug_handler
        else:
            if self.__no_ask:
                user_response = 'r'
            else:
                msg = 'Do you want to remove all files in %s?\n' % self.__dir_name
                msg += 'Type "r" to remove.\n'
                msg +=  'Type any other key to keep files in place.\n'
                sys.stderr.write(msg)
                user_response = raw_input()
            if user_response == 'r':
            if self.__run_level > 1:
                sys.stderr.write('Removing files from old pict directory...\n')
            all_files = os.listdir(self.__dir_name)
@ -107,21 +96,18 @@ class Pict:
        """Create a file for all the pict data to be written to.
        """
        self.__pict_file = os.path.join(self.__dir_name, 'picts.rtf')
        write_pic_obj = open(self.__pict_file, 'w')
        write_pic_obj.close()
        self.__write_pic_obj = open(self.__pict_file, 'a')
    def __in_pict_func(self, line):
        if self.__cb_count == self.__pict_br_count:
-            self.__in_pict = 0
+            self.__in_pict = False
            self.__write_pic_obj.write("}\n")
-            return 1
+            return True
        else:
            action = self.__pict_dict.get(self.__token_info)
            if action:
-                line = action(line)
+                self.__write_pic_obj.write(action(line))
-                self.__write_pic_obj.write(line)
+            return False
            return 0
    def __default(self, line, write_obj):
        """Determine if each token marks the beginning of pict data.
@ -142,32 +128,27 @@ class Pict:
            write_obj.write('mi<mk<pict-end__\n')
            if not self.__already_found_pict:
                self.__create_pict_file()
-                self.__already_found_pict=1;
+                self.__already_found_pict=True;
                self.__print_rtf_header()
            self.__in_pict = 1
            self.__pict_br_count = self.__ob_count
            self.__cb_count = 0
            self.__write_pic_obj.write("{\\pict\n")
-            return 0
+            return False
-        return 1
+        return True
    def __print_rtf_header(self):
        """Print to pict file the necessary RTF data for the file to be
        recognized as an RTF file.
        """
-        self.__write_pic_obj.write("{\\rtf1 \n")
+        self.__write_pic_obj.write("{\\rtf1 \n{\\fonttbl\\f0\\null;} \n")
-        self.__write_pic_obj.write("{\\fonttbl\\f0\\null;} \n")
+        self.__write_pic_obj.write("{\\colortbl\\red255\\green255\\blue255;} \n\\pard \n")
        self.__write_pic_obj.write("{\\colortbl\\red255\\green255\\blue255;} \n")
        self.__write_pic_obj.write("\\pard \n")
    def process_pict(self):
        self.__make_dir()
-        read_obj = open(self.__file)
+        with open(self.__file) as read_obj:
-        write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as write_obj:
-        line_to_read = 'dummy'
+                for line in read_obj:
        while line_to_read:
            line_to_read = read_obj.readline()
            line = line_to_read
                    self.__token_info = line[:16]
                    if self.__token_info == 'ob<nu<open-brack':
                        self.__ob_count = line[-5:-1]
@ -184,11 +165,13 @@ class Pict:
                if self.__already_found_pict:
                    self.__write_pic_obj.write("}\n")
                    self.__write_pic_obj.close()
        read_obj.close()
        write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "pict.data")
            try:
                copy_obj.copy_file(self.__pict_file, "pict.rtf")
            except:
                pass
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
        if self.__pict_count == 0:
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import os, re, tempfile
 from calibre.ebooks.rtf2xml import copy, check_brackets
 class ProcessTokens:
    """
    Process each token on a line and add information that will be useful for
@ -41,9 +43,11 @@ class ProcessTokens:
        self.__bracket_count=0
        self.__exception_handler = exception_handler
        self.__bug_handler = bug_handler
    def compile_expressions(self):
        self.__num_exp = re.compile(r"([a-zA-Z]+)(.*)")
        self.__utf_exp = re.compile(r'(&.*?;)')
    def initiate_token_dict(self):
        self.__return_code = 0
        self.dict_token={
@ -66,6 +70,7 @@ class ProcessTokens:
        ';'                  :	('mc', ';', self.ms_sub_func),
        # this must be wrong
        '-'                  :	('mc', '-', self.ms_sub_func),
        'line'               :  ('mi', 'hardline-break', self.hardline_func), #calibre
        # misc => ml
        '*'                  :	('ml', 'asterisk__', self.default_func),
        ':'                  :	('ml', 'colon_____', self.default_func),
@ -73,7 +78,6 @@ class ProcessTokens:
        'backslash'          :	('nu', '\\', self.text_func),
        'ob'                 :	('nu', '{', self.text_func),
        'cb'                 :	('nu', '}', self.text_func),
        'line'               :  ('nu', 'hard-lineb', self.default_func), #calibre
        #'line'               :  ('nu', ' ', self.text_func), calibre
        # paragraph formatting => pf
        'page'               :  ('pf', 'page-break', self.default_func),
@ -159,6 +163,8 @@ class ProcessTokens:
        'rtf'                :	('ri', 'rtf_______', self.default_func),
        'deff'               :	('ri', 'deflt-font', self.default_func),
        'mac'                :	('ri', 'macintosh_', self.default_func),
        'pc'                 :	('ri', 'pc________', self.default_func),
        'pca'                :	('ri', 'pca_______', self.default_func),
        'ansi'               :	('ri', 'ansi______', self.default_func),
        'ansicpg'            :	('ri', 'ansi-codpg', self.default_func),
        # notes => nt
@ -595,30 +601,37 @@ class ProcessTokens:
        num = num[1:] # chop off leading 0, which I added
        num = num.upper() # the mappings store hex in caps
        return 'tx<hx<__________<\'%s\n' % num # add an ' for the mappings
    def ms_sub_func(self, pre, token, num):
        return 'tx<mc<__________<%s\n' % token
    def hardline_func(self, pre, token, num):
        return 'mi<tg<empty_____<%s\n' % token
    def default_func(self, pre, token, num):
-        if num == None:
+        if num is None:
            num = 'true'
        return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
    def __list_type_func(self, pre, token, num):
        type = 'arabic'
-        if num == None:
+        if num is None:
            type = 'Arabic'
        else:
            try:
                num = int(num)
            except ValueError:
                if self.__run_level > 3:
-                    msg = 'number "%s" cannot be converted to integer\n' % num
+                    msg = 'Number "%s" cannot be converted to integer\n' % num
                    raise self.__bug_handler, msg
            type = self.__number_type_dict.get(num)
-            if type == None:
+            if type is None:
                if self.__run_level > 3:
                    msg = 'No type for "%s" in self.__number_type_dict\n'
                    raise self.__bug_handler
                type = 'Arabic'
        return 'cw<%s<%s<nu<%s\n' % (pre, token, type)
    def __language_func(self, pre, token, num):
        lang_name = self.__language_dict.get(int(re.search('[0-9]+', num).group()))
        if not lang_name:
@ -627,31 +640,36 @@ class ProcessTokens:
                msg = 'No entry for number "%s"' % num
                raise self.__bug_handler, msg
        return 'cw<%s<%s<nu<%s\n' % (pre, token, lang_name)
    def two_part_func(self, pre, token, num):
        list = token.split("<")
        token = list[0]
        num = list[1]
        return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
        ##return 'cw<nu<nu<nu<%s>num<%s\n' % (token, num)
    def divide_by_2(self, pre, token, num):
        num = self.divide_num(num, 2)
        return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
        ##return 'cw<nu<nu<nu<%s>%s<%s\n' % (token, num, token)
    def divide_by_20(self, pre, token, num):
        num = self.divide_num(num, 20)
        return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
        ##return 'cw<nu<nu<nu<%s>%s<%s\n' % (token, num, token)
    def text_func(self, pre, token, num=None):
        return 'tx<nu<__________<%s\n' % token
    def ob_func(self, pre, token, num=None):
        self.__bracket_count += 1
        ##return 'ob<%04d\n' % self.__bracket_count
        return 'ob<nu<open-brack<%04d\n' % self.__bracket_count
    def cb_func(self, pre, token, num=None):
        ##line = 'cb<%04d\n' % self.__bracket_count
        line = 'cb<nu<clos-brack<%04d\n' % self.__bracket_count
        self.__bracket_count -= 1
        return line
    def color_func(self, pre, token, num):
        third_field = 'nu'
        if num[-1] == ';':
@ -662,6 +680,7 @@ class ProcessTokens:
            num = "0" + num
        return 'cw<%s<%s<%s<%s\n' % (pre, token, third_field, num)
        ##return 'cw<cl<%s<nu<nu<%s>%s<%s\n' % (third_field, token, num, token)
    def bool_st_func(self, pre, token, num):
        if num is None or num == '' or num == '1':
            return 'cw<%s<%s<nu<true\n' % (pre, token)
@ -670,24 +689,23 @@ class ProcessTokens:
            return 'cw<%s<%s<nu<false\n' % (pre, token)
                ##return 'cw<nu<nu<nu<%s>false<%s\n' % (token, token)
        else:
-            msg = 'boolean should have some value module process tokens\n'
+            msg = "boolean should have some value module process tokens\ntoken is %s\n'%s'\n" % (token, num)
            msg += 'token is ' + token + "\n"
            msg += "'" + num + "'" + "\n"
            raise self.__bug_handler, msg
    def __no_sup_sub_func(self, pre, token, num):
        the_string = 'cw<ci<subscript_<nu<false\n'
        the_string += 'cw<ci<superscrip<nu<false\n'
        return the_string
    def divide_num(self, numerator, denominator):
        try:
-            numerator = float(re.search('[0-9.]+', numerator).group())            
+            #calibre why ignore negative number? Wrong in case of \fi
            numerator = float(re.search('[0-9.\-]+', numerator).group())
        except TypeError, msg:
            if self.__run_level > 3:
-                msg = 'no number to process?\n'
+                msg = ('No number to process?\nthis indicates that the token \(\\li\) \
-                msg += 'this indicates that the token '
+                should have a number and does not\nnumerator is \
-                msg += ' \(\\li\) should have a number and does not\n'
+                "%s"\ndenominator is "%s"\n') % (numerator, denominator)
                msg += 'numerator is "%s"\n' % numerator
                msg += 'denominator is "%s"\n' % denominator
                raise self.__bug_handler, msg
            if 5 > self.__return_code:
                self.__return_code = 5
@ -698,9 +716,10 @@ class ProcessTokens:
        if string_num[-2:] == ".0":
            string_num = string_num[:-2]
        return string_num
    def split_let_num(self, token):
        match_obj = re.search(self.__num_exp,token)
-        if match_obj != None:
+        if match_obj is not None:
            first = match_obj.group(1)
            second = match_obj.group(2)
            if not second:
@ -714,6 +733,7 @@ class ProcessTokens:
                raise self.__bug_handler
            return token, 0
        return first, second
    def convert_to_hex(self,number):
        """Convert a string to uppercase hexidecimal"""
        num = int(number)
@ -722,6 +742,7 @@ class ProcessTokens:
            return hex_num
        except:
            raise self.__bug_handler
    def process_cw(self, token):
        """Change the value of the control word by determining what dictionary
        it belongs to"""
@ -737,69 +758,41 @@ class ProcessTokens:
        pre, token, action = self.dict_token.get(token, (None, None, None))
        if action:
            return action(pre, token, num)
-    # unused function
+
    def initiate_token_actions(self):
        self.action_for_token={
        '{'     :   self.ob_func,
        '}'     :   self.cb_func,
        '\\'    :   self.process_cw,
        }
    # unused function
    def evaluate_token(self,token):
        """Evaluate tokens. Return a value if the token is not a
        control word. Otherwise, pass token onto another method
        for further evaluation."""
        token, action = self.dict_token.get(token[0:1])
        if action:
            line = action(token)
            return line
        else :
            return  'tx<nu<nu<nu<nu<%s\n' % token
    def __check_brackets(self, in_file):
        self.__check_brack_obj = check_brackets.CheckBrackets\
            (file = in_file)
        good_br =  self.__check_brack_obj.check_brackets()[0]
        if not good_br:
            return 1
    def process_tokens(self):
        """Main method for handling other methods. """
        first_token = 0
        second_token = 0
        read_obj = open(self.__file, 'r')
        write_obj = open(self.__write_to, 'w')
        line_to_read = "dummy"
        line_count = 0
-        while line_to_read:
+        with open(self.__file, 'r') as read_obj:
-            line_to_read = read_obj.readline()
+            with open(self.__write_to, 'wb') as write_obj:
-            token = line_to_read
+                for line in read_obj:
-            token = token.replace("\n","")
+                    token = line.replace("\n","")
            if not token:
                continue
                    line_count += 1
                    if line_count == 1 and token != '\\{':
                            msg = 'Invalid RTF: document doesn\'t start with {\n'
                            raise self.__exception_handler, msg
                    elif line_count == 2 and token[0:4] != '\\rtf':
                            msg = 'Invalid RTF: document doesn\'t start with \\rtf \n'
                            raise self.__exception_handler, msg
                    the_index = token.find('\\ ')
                    if token is not None and  the_index > -1:
                        msg = 'Invalid RTF: token "\\ " not valid.\n'
                        raise self.__exception_handler, msg
                    elif token[:1] == "\\":
                        try:
                            token.decode('us-ascii')
                        except UnicodeError, msg:
-                msg = str(msg)
+                            msg = 'Invalid RTF: Tokens not ascii encoded.\n%s' % str(msg)
                msg += 'Invalid RTF: File not ascii encoded.\n'
                            raise self.__exception_handler, msg
            if not first_token:
                if token != '\\{':
                    msg = 'Invalid RTF: document doesn\'t start with {\n'
                    raise self.__exception_handler, msg
                first_token = 1
            elif first_token and not second_token:
                if token[0:4] != '\\rtf':
                    msg ='Invalid RTF: document doesn\'t start with \\rtf \n'
                    raise self.__exception_handler, msg
                second_token = 1
            ##token = self.evaluate_token(token)
            the_index = token.find('\\ ')
            if token != None and  the_index > -1:
                msg ='Invalid RTF: token "\\ " not valid. \n'
                raise self.__exception_handler, msg
            elif token[0:1] == "\\":
                        line = self.process_cw(token)
-                if line != None:
+                        if line is not None:
                            write_obj.write(line)
                    else:
                        fields = re.split(self.__utf_exp, token)
@ -810,16 +803,17 @@ class ProcessTokens:
                                write_obj.write('tx<ut<__________<%s\n' % field)
                            else:
                                write_obj.write('tx<nu<__________<%s\n' % field)
-        read_obj.close()
+
        write_obj.close()
        if not line_count:
-            msg ='Invalid RTF: file appears to be empty. \n'
+            msg = 'Invalid RTF: file appears to be empty.\n'
            raise self.__exception_handler, msg
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "processed_tokens.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
        bad_brackets = self.__check_brackets(self.__file)
        if bad_brackets:
            msg = 'Invalid RTF: document does not have matching brackets.\n'
--- a/src/calibre/ebooks/rtf2xml/replace_illegals.py
+++ b/src/calibre/ebooks/rtf2xml/replace_illegals.py
@ -16,7 +16,10 @@
 #                                                                       #
 #########################################################################
 import os, tempfile
 from calibre.ebooks.rtf2xml import copy
 from calibre.utils.cleantext import clean_ascii_chars
 class ReplaceIllegals:
    """
    reaplace illegal lower ascii characters
@ -30,21 +33,14 @@ class ReplaceIllegals:
        self.__copy = copy
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
    def replace_illegals(self):
        """
        """
-        nums = [0, 1, 2, 3, 4, 5, 6, 7, 8,  11,  13, 14, 15, 16, 17, 18, 19]
+        with open(self.__file, 'r') as read_obj:
-        read_obj = open(self.__file, 'r')
+            with open(self.__write_to, 'w') as write_obj:
-        write_obj = open(self.__write_to, 'w')
+                for line in read_obj:
-        line_to_read = 1
+                    write_obj.write(clean_ascii_chars(line))
        while line_to_read:
            line_to_read = read_obj.readline()
            line = line_to_read
            for num in nums:
                line = line.replace(chr(num), '')
            write_obj.write(line)
        read_obj.close()
        write_obj.close()
        copy_obj = copy.Copy()
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "replace_illegals.data")
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@ -16,7 +16,10 @@
 #                                                                       #
 #########################################################################
 import os, re, tempfile
 from calibre.ebooks.rtf2xml import copy
 from calibre.utils.mreplace import MReplace
 class Tokenize:
    """Tokenize RTF into one line per field. Each line will contain information useful for the rest of the script"""
    def __init__(self,
@ -28,89 +31,175 @@ class Tokenize:
        self.__file = in_file
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__special_tokens = [ '_', '~', "'", '{', '}' ]
        self.__write_to = tempfile.mktemp()
-    def __from_ms_to_utf8(self,match_obj):
+        self.__compile_expressions()
        #variables
        self.__uc_char = 0
        self.__uc_bin = False
        self.__uc_value = [1]
    def __reini_utf8_counters(self):
        self.__uc_char = 0
        self.__uc_bin = False
    def __remove_uc_chars(self, startchar, token):
        for i in xrange(startchar, len(token)):
            if token[i] == " ":
                continue
            elif self.__uc_char:
                self.__uc_char -= 1
            else:
                return token[i:]
        #if only " " and char to skip
        return ''
    def __unicode_process(self, token):
        #change scope in
        if token == '\{':
            self.__uc_value.append(self.__uc_value[-1])
            #basic error handling
            self.__reini_utf8_counters()
            return token
        #change scope out
        elif token == '\}':
            self.__uc_value.pop()
            self.__reini_utf8_counters()
            return token
        #add a uc control
        elif token[:3] == '\uc':
            self.__uc_value[-1] = int(token[3:])
            self.__reini_utf8_counters()
            return token
        #bin data to slip
        elif self.__uc_bin:
            self.__uc_bin = False
            return ''
        #uc char to remove
        elif self.__uc_char:
            #handle \bin tag in case of uc char to skip
            if token[:4] == '\bin':
                self.__uc_char -=1
                self.__uc_bin = True
                return ''
            elif token[:1] == "\\" :
                self.__uc_char -=1
                return ''
            else:
                return self.__remove_uc_chars(0, token)
        #go for real \u token
        match_obj = self.__utf_exp.match(token)
        if match_obj is not None:
            self.__reini_utf8_counters()
            #get value and handle negative case
            uni_char = int(match_obj.group(1))
            uni_len = len(match_obj.group(1)) + 2
            if uni_char < 0:
                uni_char += 65536
-        return   '&#x' + str('%X' % uni_char) + ';'
+            uni_char = unichr(uni_char).encode('ascii', 'xmlcharrefreplace')
-    def __neg_unicode_func(self, match_obj):
+            self.__uc_char = self.__uc_value[-1]
-        neg_uni_char = int(match_obj.group(1)) * -1
+            #there is only an unicode char
-        # sys.stderr.write(str( neg_uni_char))
+            if len(token)<= uni_len:
-        uni_char = neg_uni_char + 65536
+                return uni_char
-        return   '&#x' + str('%X' % uni_char) + ';'
+            #an unicode char and something else
-    def __sub_line_reg(self,line):
+            #must be after as it is splited on \
-        line = line.replace("\\\\", "\\backslash ")
+            #necessary? maybe for \bin?
-        line = line.replace("\\~", "\\~ ")
+            elif not self.__uc_char:
-        line = line.replace("\\;", "\\; ")
+                return uni_char + token[uni_len:]
-        line = line.replace("&", "&amp;")
+            #if not uc0 and chars
        line = line.replace("<", "&lt;")
        line = line.replace(">", "&gt;")
        line = line.replace("\\~", "\\~ ")
        line = line.replace("\\_", "\\_ ")
        line = line.replace("\\:", "\\: ")
        line = line.replace("\\-", "\\- ")
        # turn into a generic token to eliminate special
        # cases and make processing easier
        line = line.replace("\\{", "\\ob ")
        # turn into a generic token to eliminate special
        # cases and make processing easier
        line = line.replace("\\}", "\\cb ")
        # put a backslash in front of to eliminate special cases and
        # make processing easier
        line = line.replace("{", "\\{")
        # put a backslash in front of to eliminate special cases and
        # make processing easier
        line = line.replace("}", "\\}")
        line = re.sub(self.__utf_exp, self.__from_ms_to_utf8, line)
        # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
        line = re.sub(self.__ms_hex_exp, "\\mshex0\g<1> ", line)
        ##line = line.replace("\\backslash", "\\\\")
        # this is for older RTF
        line = re.sub(self.__par_exp, '\\par ', line)
        return line
    def __compile_expressions(self):
        self.__ms_hex_exp = re.compile(r"\\\'(..)")
        self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}")
        self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)")
        self.__par_exp = re.compile(r'\\$')
        self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
        ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
    def __create_tokens(self):
        self.__compile_expressions()
        read_obj = open(self.__file, 'r')
        write_obj = open(self.__write_to, 'w')
        line_to_read = "dummy"
        while line_to_read:
            line_to_read = read_obj.readline()
            line = line_to_read
            line = line.replace("\n", "")
            line =  self.__sub_line_reg(line)
            tokens = re.split(self.__splitexp, line)
            ##print tokens
            for token in tokens:
                if token != "":
                    write_obj.write(token + "\n")
                    """
                    match_obj = re.search(self.__mixed_exp, token)
                    if match_obj != None:
                        first = match_obj.group(1)
                        second = match_obj.group(2)
                        write_obj.write(first + "\n")
                        write_obj.write(second + "\n")
            else:
-                        write_obj.write(token + "\n")
+                return uni_char + self.__remove_uc_chars(uni_len, token)
-                    """
+        #default
-        read_obj.close()
+        return token
-        write_obj.close()
+
    def __sub_reg_split(self,input_file):
        input_file = self.__replace_spchar.mreplace(input_file)
        input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
        input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
        #remove \n in bin data
        input_file = self.__bin_exp.sub(lambda x: \
                                        x.group().replace('\n', '') + '\n', input_file)
        #split
        tokens = re.split(self.__splitexp, input_file)
        #remove empty tokens and \n
        return filter(lambda x: len(x) > 0 and x != '\n', tokens)
        #input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
        # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
        # this is for older RTF
        #line = re.sub(self.__par_exp, '\\par ', line)
        #return filter(lambda x: len(x) > 0, \
            #(self.__remove_line.sub('', x) for x in tokens))
    def __compile_expressions(self):
        SIMPLE_RPL = {
            "\\\\": "\\backslash ",
            "\\~": "\\~ ",
            "\\;": "\\; ",
            "&": "&amp;",
            "<": "&lt;",
            ">": "&gt;",
            "\\~": "\\~ ",
            "\\_": "\\_ ",
            "\\:": "\\: ",
            "\\-": "\\- ",
            # turn into a generic token to eliminate special
            # cases and make processing easier
            "\\{": "\\ob ",
            # turn into a generic token to eliminate special
            # cases and make processing easier
            "\\}": "\\cb ",
            # put a backslash in front of to eliminate special cases and
            # make processing easier
            "{": "\\{",
            # put a backslash in front of to eliminate special cases and
            # make processing easier
            "}": "\\}",
            # this is for older RTF
            r'\\$': '\\par ',
            }
        self.__replace_spchar = MReplace(SIMPLE_RPL)
        #add ;? in case of char following \u
        self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
        self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
        self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
        #manage upr/ud situations
        self.__utf_ud = re.compile(r"\\{[\n ]?\\upr[\n ]?(?:\\{.*?\\})[\n ]?" + \
                       r"\\{[\n ]?\\*[\n ]?\\ud[\n ]?(\\{.*?\\})[\n ]?\\}[\n ]?\\}")
        #add \n in split for whole file reading
        #why keep backslash whereas \is replaced before?
        #remove \n from endline char
        self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
        #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
        #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
        #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
        #self.__par_exp = re.compile(r'\\$')
        #self.__remove_line = re.compile(r'\n+')
        #self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
        ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
    def tokenize(self):
-        """Main class for handling other methods. Reads in one line \
+        """Main class for handling other methods. Reads the file \
-        at a time, usues method self.sub_line to make basic substitutions,\
+        , uses method self.sub_reg to make basic substitutions,\
-        uses ? to process tokens"""
+        and process tokens by itself"""
-        self.__create_tokens()
+        #read
        with open(self.__file, 'r') as read_obj:
            input_file = read_obj.read()
        #process simple replacements and split giving us a correct list
        #remove '' and \n in the process
        tokens = self.__sub_reg_split(input_file)
        #correct unicode
        tokens = map(self.__unicode_process, tokens)
        #remove empty items created by removing \uc
        tokens = filter(lambda x: len(x) > 0, tokens)
        #write
        with open(self.__write_to, 'wb') as write_obj:
            write_obj.write('\n'.join(tokens))
        #Move and copy
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "tokenize.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
        #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
--- a/src/calibre/ebooks/textile/init.py
+++ b/src/calibre/ebooks/textile/init.py
@ -1,3 +1,6 @@
 from functions import textile, textile_restricted, Textile
 if False:
    textile, textile_restricted, Textile
 __all__ = ['textile', 'textile_restricted']
--- a/src/calibre/ebooks/textile/functions.py
+++ b/src/calibre/ebooks/textile/functions.py
@ -425,7 +425,7 @@ class Textile(object):
        text = text.split('\n\n')
        tag = 'p'
-        atts = cite = graf = ext = ''
+        atts = cite = graf = ext = c1 = ''
        out = []
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -1,4 +1,8 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Read content from txt file.
@ -7,15 +11,10 @@ Read content from txt file.
 import os, re
 from calibre import prepare_string_for_xml, isbytestring
 from calibre.ebooks.markdown import markdown
 from calibre.ebooks.textile import textile
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor
 from calibre.ebooks.conversion.preprocess import DocAnalysis
-
+from calibre.utils.cleantext import clean_ascii_chars
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
@ -35,9 +34,7 @@ def clean_txt(txt):
    # Remove excessive line breaks.
    txt = re.sub('\n{3,}', '\n\n', txt)
    #remove ASCII invalid chars : 0 to 8 and 11-14 to 24
-    chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19))
+    txt = clean_ascii_chars(txt)
    illegal_chars = re.compile(u'|'.join(map(unichr, chars)))
    txt = illegal_chars.sub('', txt)
    return txt
@ -75,6 +72,7 @@ def convert_heuristic(txt, title='', epub_split_size_kb=0):
    return tp.convert(txt, title, epub_split_size_kb)
 def convert_markdown(txt, title='', disable_toc=False):
    from calibre.ebooks.markdown import markdown
    md = markdown.Markdown(
          extensions=['footnotes', 'tables', 'toc'],
          extension_configs={"toc": {"disable_toc": disable_toc}},
@ -82,6 +80,7 @@ def convert_markdown(txt, title='', disable_toc=False):
    return HTML_TEMPLATE % (title, md.convert(txt))
 def convert_textile(txt, title=''):
    from calibre.ebooks.textile import textile
    html = textile(txt, encoding='utf-8')
    return HTML_TEMPLATE % (title, html)
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -269,10 +269,14 @@ def question_dialog(parent, title, msg, det_msg='', show_copy_button=True,
    return d.exec_() == yes_button
-def info_dialog(parent, title, msg, det_msg='', show=False):
+def info_dialog(parent, title, msg, det_msg='', show=False,
        show_copy_button=True):
    d = MessageBox(QMessageBox.Information, title, msg, QMessageBox.Ok,
                    parent, det_msg)
    d.setIconPixmap(QPixmap(I('dialog_information.png')))
    if not show_copy_button:
        d.cb.setVisible(False)
    if show:
        return d.exec_()
    return d
--- a/src/calibre/gui2/catalog/catalog_bibtex.py
+++ b/src/calibre/gui2/catalog/catalog_bibtex.py
@ -27,14 +27,17 @@ class PluginWidget(QWidget, Ui_Form):
    def __init__(self, parent=None):
        QWidget.__init__(self, parent)
        self.setupUi(self)
        from calibre.library.catalog import FIELDS
        self.all_fields = []
        for x in FIELDS :
            if x != 'all':
                self.all_fields.append(x)
                QListWidgetItem(x, self.db_fields)
    def initialize(self, name, db): #not working properly to update
        from calibre.library.catalog import FIELDS
        self.all_fields = [x for x in FIELDS if x != 'all']
        #add custom columns
        self.all_fields.extend([x for x in sorted(db.custom_field_keys())])
        #populate
        for x in self.all_fields:
            QListWidgetItem(x, self.db_fields)
        self.name = name
        fields = gprefs.get(name+'_db_fields', self.all_fields)
        # Restore the activated db_fields from last use
--- a/src/calibre/gui2/dialogs/drm_error.py
+++ b/src/calibre/gui2/dialogs/drm_error.py
@ -0,0 +1,21 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from PyQt4.Qt import QDialog
 from calibre.gui2.dialogs.drm_error_ui import Ui_Dialog
 class DRMErrorMessage(QDialog, Ui_Dialog):
    def __init__(self, parent=None, title=None):
        QDialog.__init__(self, parent)
        self.setupUi(self)
        if title is not None:
            t = unicode(self.msg.text())
            self.msg.setText('<h2>%s</h2>%s'%(title, t))
        self.resize(self.sizeHint())
--- a/src/calibre/gui2/dialogs/drm_error.ui
+++ b/src/calibre/gui2/dialogs/drm_error.ui
@ -0,0 +1,102 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <ui version="4.0">
 <class>Dialog</class>
 <widget class="QDialog" name="Dialog">
  <property name="geometry">
   <rect>
    <x>0</x>
    <y>0</y>
    <width>417</width>
    <height>235</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>This book is DRMed</string>
  </property>
  <layout class="QGridLayout" name="gridLayout">
   <item row="0" column="0">
    <widget class="QLabel" name="label">
     <property name="sizePolicy">
      <sizepolicy hsizetype="Preferred" vsizetype="Preferred">
       <horstretch>0</horstretch>
       <verstretch>0</verstretch>
      </sizepolicy>
     </property>
     <property name="maximumSize">
      <size>
       <width>132</width>
       <height>16777215</height>
      </size>
     </property>
     <property name="text">
      <string/>
     </property>
     <property name="pixmap">
      <pixmap resource="../../../../resources/images.qrc">:/images/document-encrypt.png</pixmap>
     </property>
    </widget>
   </item>
   <item row="0" column="1">
    <widget class="QLabel" name="msg">
     <property name="text">
      <string>&lt;p&gt;This book is locked by &lt;b&gt;DRM&lt;/b&gt;. To learn more about DRM and why you cannot read or convert this book in calibre, 
 &lt;a href=&quot;http://bugs.calibre-ebook.com/wiki/DRM&quot;&gt;click here&lt;/a&gt;.</string>
     </property>
     <property name="wordWrap">
      <bool>true</bool>
     </property>
     <property name="openExternalLinks">
      <bool>true</bool>
     </property>
    </widget>
   </item>
   <item row="1" column="0" colspan="2">
    <widget class="QDialogButtonBox" name="buttonBox">
     <property name="orientation">
      <enum>Qt::Horizontal</enum>
     </property>
     <property name="standardButtons">
      <set>QDialogButtonBox::Close</set>
     </property>
    </widget>
   </item>
  </layout>
 </widget>
 <resources>
  <include location="../../../../resources/images.qrc"/>
 </resources>
 <connections>
  <connection>
   <sender>buttonBox</sender>
   <signal>accepted()</signal>
   <receiver>Dialog</receiver>
   <slot>accept()</slot>
   <hints>
    <hint type="sourcelabel">
     <x>248</x>
     <y>254</y>
    </hint>
    <hint type="destinationlabel">
     <x>157</x>
     <y>274</y>
    </hint>
   </hints>
  </connection>
  <connection>
   <sender>buttonBox</sender>
   <signal>rejected()</signal>
   <receiver>Dialog</receiver>
   <slot>reject()</slot>
   <hints>
    <hint type="sourcelabel">
     <x>316</x>
     <y>260</y>
    </hint>
    <hint type="destinationlabel">
     <x>286</x>
     <y>274</y>
    </hint>
   </hints>
  </connection>
 </connections>
 </ui>
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -15,7 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_string
 from calibre.ebooks.metadata.book.base import composite_formatter
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.gui2.custom_column_widgets import populate_metadata_page
-from calibre.gui2 import error_dialog
+from calibre.gui2 import error_dialog, ResizableDialog
 from calibre.gui2.progress_indicator import ProgressIndicator
 from calibre.utils.config import dynamic
 from calibre.utils.titlecase import titlecase
@ -49,7 +49,7 @@ def get_cover_data(path):
-class MyBlockingBusy(QDialog):
+class MyBlockingBusy(QDialog): # {{{
    do_one_signal = pyqtSignal()
@ -241,8 +241,9 @@ class MyBlockingBusy(QDialog):
        self.current_index += 1
        self.do_one_signal.emit()
    # }}}
-class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
+class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
    s_r_functions = {       ''              : lambda x: x,
                            _('Lower Case') : lambda x: icu_lower(x),
@ -261,9 +262,8 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
                        ]
    def __init__(self, window, rows, model, tab):
-        QDialog.__init__(self, window)
+        ResizableDialog.__init__(self, window)
        Ui_MetadataBulkDialog.__init__(self)
        self.setupUi(self)
        self.model = model
        self.db = model.db
        self.ids = [self.db.id(r) for r in rows]
--- a/src/calibre/gui2/dialogs/metadata_bulk.ui
+++ b/src/calibre/gui2/dialogs/metadata_bulk.ui
@ -6,8 +6,8 @@
   <rect>
    <x>0</x>
    <y>0</y>
-    <width>752</width>
+    <width>850</width>
-    <height>633</height>
+    <height>650</height>
   </rect>
  </property>
  <property name="windowTitle">
@ -17,8 +17,8 @@
   <iconset resource="../../../../resources/images.qrc">
    <normaloff>:/images/edit_input.png</normaloff>:/images/edit_input.png</iconset>
  </property>
-  <layout class="QVBoxLayout" name="verticalLayout">
+  <layout class="QGridLayout" name="gridLayout_2">
-   <item>
+   <item row="0" column="0">
    <widget class="QLabel" name="box_title">
     <property name="text">
      <string/>
@ -28,11 +28,27 @@
     </property>
    </widget>
   </item>
-   <item>
+   <item row="1" column="0">
-    <layout class="QVBoxLayout">
+    <widget class="QScrollArea" name="scrollArea">
-     <property name="spacing">
+     <property name="frameShape">
-      <number>6</number>
+      <enum>QFrame::NoFrame</enum>
     </property>
     <property name="lineWidth">
      <number>0</number>
     </property>
     <property name="widgetResizable">
      <bool>true</bool>
     </property>
     <widget class="QWidget" name="scrollAreaWidgetContents">
      <property name="geometry">
       <rect>
        <x>0</x>
        <y>0</y>
        <width>842</width>
        <height>589</height>
       </rect>
      </property>
      <layout class="QVBoxLayout" name="verticalLayout_2">
       <property name="margin">
        <number>0</number>
       </property>
@ -784,8 +800,8 @@ not multiple and the destination field is multiple</string>
               <rect>
                <x>0</x>
                <y>0</y>
-              <width>726</width>
+                <width>197</width>
-              <height>334</height>
+                <height>60</height>
               </rect>
              </property>
              <layout class="QGridLayout" name="testgrid">
@ -838,8 +854,10 @@ not multiple and the destination field is multiple</string>
        </widget>
       </item>
      </layout>
     </widget>
    </widget>
   </item>
-   <item>
+   <item row="2" column="0">
    <widget class="QDialogButtonBox" name="button_box">
     <property name="orientation">
      <enum>Qt::Horizontal</enum>
@ -893,7 +911,6 @@ not multiple and the destination field is multiple</string>
  <tabstop>swap_title_and_author</tabstop>
  <tabstop>change_title_to_title_case</tabstop>
  <tabstop>button_box</tabstop>
  <tabstop>central_widget</tabstop>
  <tabstop>search_field</tabstop>
  <tabstop>search_mode</tabstop>
  <tabstop>s_r_template</tabstop>
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -823,7 +823,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                                if book.series_index is not None:
                                    self.series_index.setValue(book.series_index)
                        if book.has_cover:
-                            if d.opt_auto_download_cover.isChecked() and book.has_cover:
+                            if d.opt_auto_download_cover.isChecked():
                                self.fetch_cover()
                            else:
                                self.fetch_cover_button.setFocus(Qt.OtherFocusReason)
--- a/src/calibre/gui2/dialogs/user_profiles.py
+++ b/src/calibre/gui2/dialogs/user_profiles.py
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import time, os
 from PyQt4.Qt import SIGNAL, QUrl, QAbstractListModel, Qt, \
-        QVariant, QInputDialog
+        QVariant
 from calibre.web.feeds.recipes import compile_recipe
 from calibre.web.feeds.news import AutomaticNewsRecipe
@ -256,15 +256,52 @@ class %(classname)s(%(base_class)s):
    def add_builtin_recipe(self):
        from calibre.web.feeds.recipes.collection import \
-            get_builtin_recipe_by_title, get_builtin_recipe_titles
+            get_builtin_recipe_collection, get_builtin_recipe_by_id
-        items = sorted(get_builtin_recipe_titles(), key=sort_key)
+        from PyQt4.Qt import QDialog, QVBoxLayout, QListWidgetItem, \
                QListWidget, QDialogButtonBox, QSize
        d = QDialog(self)
        d.l = QVBoxLayout()
        d.setLayout(d.l)
        d.list = QListWidget(d)
        d.list.doubleClicked.connect(lambda x: d.accept())
        d.l.addWidget(d.list)
        d.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
                Qt.Horizontal, d)
        d.bb.accepted.connect(d.accept)
        d.bb.rejected.connect(d.reject)
        d.l.addWidget(d.bb)
        d.setWindowTitle(_('Choose builtin recipe'))
        items = []
        for r in get_builtin_recipe_collection():
            id_ = r.get('id', '')
            title = r.get('title', '')
            lang = r.get('language', '')
            if id_ and title:
                items.append((title + ' [%s]'%lang, id_))
        items.sort(key=lambda x:sort_key(x[0]))
        for title, id_ in items:
            item = QListWidgetItem(title)
            item.setData(Qt.UserRole, id_)
            d.list.addItem(item)
        d.resize(QSize(450, 400))
        ret = d.exec_()
        d.list.doubleClicked.disconnect()
        if ret != d.Accepted:
            return
        items = list(d.list.selectedItems())
        if not items:
            return
        item = items[-1]
        id_ = unicode(item.data(Qt.UserRole).toString())
        title = unicode(item.data(Qt.DisplayRole).toString()).rpartition(' [')[0]
        profile = get_builtin_recipe_by_id(id_)
        if profile is None:
            raise Exception('Something weird happened')
        title, ok = QInputDialog.getItem(self, _('Pick recipe'), _('Pick the recipe to customize'),
                                     items, 0, False)
        if ok:
            title = unicode(title)
            profile = get_builtin_recipe_by_title(title)
        if self._model.has_title(title):
            if question_dialog(self, _('Replace recipe?'),
                _('A custom recipe named %s already exists. Do you want to '
--- a/src/calibre/gui2/layout.py
+++ b/src/calibre/gui2/layout.py
@ -8,9 +8,9 @@ __docformat__ = 'restructuredtext en'
 from functools import partial
 from PyQt4.Qt import QIcon, Qt, QWidget, QToolBar, QSize, \
-    pyqtSignal, QToolButton, QPushButton, \
+    pyqtSignal, QToolButton, QMenu, QCheckBox, \
-    QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup, \
+    QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup
-    QMenu
+
 from calibre.constants import __appname__
 from calibre.gui2.search_box import SearchBox2, SavedSearchBox
@ -178,7 +178,9 @@ class SearchBar(QWidget): # {{{
        x.setToolTip(_("<p>Search the list of books by title, author, publisher, tags, comments, etc.<br><br>Words separated by spaces are ANDed"))
        l.addWidget(x)
-        self.search_button = QPushButton(_('&Go!'))
+        self.search_button = QToolButton()
        self.search_button.setToolButtonStyle(Qt.ToolButtonTextOnly)
        self.search_button.setText(_('&Go!'))
        l.addWidget(self.search_button)
        self.search_button.setSizePolicy(QSizePolicy.Minimum,
                QSizePolicy.Minimum)
@ -192,6 +194,12 @@ class SearchBar(QWidget): # {{{
        l.addWidget(x)
        x.setToolTip(_("Reset Quick Search"))
        x = parent.search_highlight_only = QCheckBox()
        x.setText(_('&Highlight'))
        x.setToolTip(_('Highlight matched books in the book list, instead '
            'of restricting the book list to the matches.'))
        l.addWidget(x)
        x = parent.saved_search = SavedSearchBox(self)
        x.setMaximumSize(QSize(150, 16777215))
        x.setMinimumContentsLength(15)
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@ -10,7 +10,7 @@ from contextlib import closing
 from operator import attrgetter
 from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \
-        QModelIndex, QVariant, QDate
+        QModelIndex, QVariant, QDate, QColor
 from calibre.gui2 import NONE, config, UNDEFINED_QDATE
 from calibre.utils.pyparsing import ParseException
@ -93,6 +93,9 @@ class BooksModel(QAbstractTableModel): # {{{
        self.bool_no_icon = QIcon(I('list_remove.png'))
        self.bool_blank_icon = QIcon(I('blank.png'))
        self.device_connected = False
        self.rows_matching = set()
        self.lowest_row_matching = None
        self.highlight_only = False
        self.read_config()
    def change_alignment(self, colname, alignment):
@ -229,8 +232,26 @@ class BooksModel(QAbstractTableModel): # {{{
            self.endInsertRows()
            self.count_changed()
    def set_highlight_only(self, toWhat):
        self.highlight_only = toWhat
        if self.last_search:
            self.research()
    def search(self, text, reset=True):
        try:
            if self.highlight_only:
                self.db.search('')
                if not text:
                    self.rows_matching = set()
                    self.lowest_row_matching = None
                else:
                    self.rows_matching = self.db.search(text, return_matches=True)
                    if self.rows_matching:
                        self.lowest_row_matching = self.db.row(self.rows_matching[0])
                    self.rows_matching = set(self.rows_matching)
            else:
                self.rows_matching = set()
                self.lowest_row_matching = None
                self.db.search(text)
        except ParseException as e:
            self.searched.emit(e.msg)
@ -337,8 +358,9 @@ class BooksModel(QAbstractTableModel): # {{{
            name, val = mi.format_field(key)
            if mi.metadata_for_field(key)['datatype'] == 'comments':
                name += ':html'
-            if val:
+            if val and name not in data:
                data[name] = val
        return data
@ -651,6 +673,9 @@ class BooksModel(QAbstractTableModel): # {{{
            return NONE
        if role in (Qt.DisplayRole, Qt.EditRole):
            return self.column_to_dc_map[col](index.row())
        elif role == Qt.BackgroundColorRole:
            if self.id(index) in self.rows_matching:
                return QColor('lightgreen')
        elif role == Qt.DecorationRole:
            if self.column_to_dc_decorator_map[col] is not None:
                return self.column_to_dc_decorator_map[index.column()](index.row())
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -680,8 +680,14 @@ class BooksView(QTableView): # {{{
    def set_editable(self, editable, supports_backloading):
        self._model.set_editable(editable)
    def search_proxy(self, txt):
        self._model.search(txt)
        if self._model.lowest_row_matching is not None:
            self.select_rows([self._model.lowest_row_matching], using_ids=False)
        self.setFocus(Qt.OtherFocusReason)
    def connect_to_search_box(self, sb, search_done):
-        sb.search.connect(self._model.search)
+        sb.search.connect(self.search_proxy)
        self._search_done = search_done
        self._model.searched.connect(self.search_done)
--- a/src/calibre/gui2/preferences/plugins.py
+++ b/src/calibre/gui2/preferences/plugins.py
@ -15,7 +15,8 @@ from calibre.gui2.preferences.plugins_ui import Ui_Form
 from calibre.customize.ui import initialized_plugins, is_disabled, enable_plugin, \
                                 disable_plugin, plugin_customization, add_plugin, \
                                 remove_plugin
-from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files
+from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files, \
        question_dialog
 class PluginModel(QAbstractItemModel): # {{{
@ -76,6 +77,16 @@ class PluginModel(QAbstractItemModel): # {{{
                    return self.index(j, 0, parent)
        return QModelIndex()
    def plugin_to_index_by_properties(self, plugin):
        for i, category in enumerate(self.categories):
            parent = self.index(i, 0, QModelIndex())
            for j, p in enumerate(self._data[category]):
                if plugin.name == p.name and plugin.type == p.type and \
                        plugin.author == p.author and plugin.version == p.version:
                    return self.index(j, 0, parent)
        return QModelIndex()
    def refresh_plugin(self, plugin, rescan=False):
        if rescan:
            self.populate()
@ -132,7 +143,6 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.toggle_plugin_button.clicked.connect(self.toggle_plugin)
        self.customize_plugin_button.clicked.connect(self.customize_plugin)
        self.remove_plugin_button.clicked.connect(self.remove_plugin)
        self.button_plugin_browse.clicked.connect(self.find_plugin)
        self.button_plugin_add.clicked.connect(self.add_plugin)
    def toggle_plugin(self, *args):
@ -149,23 +159,39 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.modify_plugin(op='remove')
    def add_plugin(self):
-        path = unicode(self.plugin_path.text())
+        path = choose_files(self, 'add a plugin dialog', _('Add plugin'),
                filters=[(_('Plugins'), ['zip'])], all_files=False,
                    select_only_single_file=True)
        if not path:
            return
        path = path[0]
        if path and  os.access(path, os.R_OK) and path.lower().endswith('.zip'):
-            add_plugin(path)
+            if not question_dialog(self, _('Are you sure?'), '<p>' + \
                    _('Installing plugins is a <b>security risk</b>. '
                    'Plugins can contain a virus/malware. '
                        'Only install it if you got it from a trusted source.'
                        ' Are you sure you want to proceed?'),
                    show_copy_button=False):
                return
            plugin = add_plugin(path)
            self._plugin_model.populate()
            self._plugin_model.reset()
            self.changed_signal.emit()
-            self.plugin_path.setText('')
+            info_dialog(self, _('Success'),
                    _('Plugin <b>{0}</b> successfully installed under <b>'
                        ' {1} plugins</b>. You may have to restart calibre '
                        'for the plugin to take effect.').format(plugin.name, plugin.type),
                    show=True, show_copy_button=False)
            idx = self._plugin_model.plugin_to_index_by_properties(plugin)
            if idx.isValid():
                self.plugin_view.scrollTo(idx,
                        self.plugin_view.PositionAtCenter)
                self.plugin_view.scrollTo(idx,
                        self.plugin_view.PositionAtCenter)
        else:
            error_dialog(self, _('No valid plugin path'),
                         _('%s is not a valid plugin path')%path).exec_()
    def find_plugin(self):
        path = choose_files(self, 'choose plugin dialog', _('Choose plugin'),
                            filters=[('Plugins', ['zip'])], all_files=False,
                            select_only_single_file=True)
        if path:
            self.plugin_path.setText(path[0])
    def modify_plugin(self, op=''):
        index = self.plugin_view.currentIndex()
@ -191,10 +217,13 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
                if plugin.do_user_config():
                    self._plugin_model.refresh_plugin(plugin)
            elif op == 'remove':
                msg = _('Plugin {0} successfully removed').format(plugin.name)
                if remove_plugin(plugin):
                    self._plugin_model.populate()
                    self._plugin_model.reset()
                    self.changed_signal.emit()
                    info_dialog(self, _('Success'), msg, show=True,
                            show_copy_button=False)
                else:
                    error_dialog(self, _('Cannot remove builtin plugin'),
                         plugin.name + _(' cannot be removed. It is a '
--- a/src/calibre/gui2/preferences/plugins.ui
+++ b/src/calibre/gui2/preferences/plugins.ui
@ -72,67 +72,17 @@
    </layout>
   </item>
   <item>
-    <widget class="QGroupBox" name="groupBox_4">
+    <widget class="QPushButton" name="button_plugin_add">
     <property name="title">
      <string>Add new plugin</string>
     </property>
     <layout class="QVBoxLayout" name="verticalLayout_5">
      <item>
       <layout class="QHBoxLayout" name="horizontalLayout_5">
        <item>
         <widget class="QLabel" name="label_14">
     <property name="text">
-           <string>Plugin &amp;file:</string>
+      <string>&amp;Add a new plugin</string>
          </property>
          <property name="buddy">
           <cstring>plugin_path</cstring>
          </property>
         </widget>
        </item>
        <item>
         <widget class="QLineEdit" name="plugin_path"/>
        </item>
        <item>
         <widget class="QToolButton" name="button_plugin_browse">
          <property name="text">
           <string>...</string>
     </property>
     <property name="icon">
      <iconset resource="../../../../resources/images.qrc">
-            <normaloff>:/images/document_open.png</normaloff>:/images/document_open.png</iconset>
+       <normaloff>:/images/plugins.png</normaloff>:/images/plugins.png</iconset>
     </property>
    </widget>
   </item>
  </layout>
      </item>
      <item>
       <layout class="QHBoxLayout" name="horizontalLayout_4">
        <item>
         <spacer name="horizontalSpacer_2">
          <property name="orientation">
           <enum>Qt::Horizontal</enum>
          </property>
          <property name="sizeHint" stdset="0">
           <size>
            <width>40</width>
            <height>20</height>
           </size>
          </property>
         </spacer>
        </item>
        <item>
         <widget class="QPushButton" name="button_plugin_add">
          <property name="text">
           <string>&amp;Add</string>
          </property>
         </widget>
        </item>
       </layout>
      </item>
     </layout>
    </widget>
   </item>
  </layout>
 </widget>
 <resources>
  <include location="../../../../resources/images.qrc"/>
--- a/src/calibre/gui2/preferences/toolbar.py
+++ b/src/calibre/gui2/preferences/toolbar.py
@ -37,7 +37,10 @@ class BaseModel(QAbstractListModel):
                    dont_remove_from=set(['toolbar-device']))
        if name is None:
            return FakeAction('--- '+_('Separator')+' ---', None)
        try:
            return gui.iactions[name]
        except:
            return None
    def rowCount(self, parent):
        return len(self._data)
@ -125,6 +128,7 @@ class CurrentModel(BaseModel):
        self.gprefs_name = 'action-layout-'+key
        current = gprefs[self.gprefs_name]
        self._data = [self.name_to_action(x, gui) for x in current]
        self._data = [x for x in self._data if x is not None]
        self.key = key
        self.gui = gui
--- a/src/calibre/gui2/search_box.py
+++ b/src/calibre/gui2/search_box.py
@ -16,6 +16,7 @@ from calibre.gui2 import config
 from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor
 from calibre.gui2.dialogs.search import SearchDialog
 from calibre.utils.config import dynamic
 from calibre.utils.search_query_parser import saved_searches
 from calibre.utils.icu import sort_key
@ -375,6 +376,9 @@ class SearchBoxMixin(object): # {{{
            unicode(self.search.toolTip())))
        self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip())
        self.clear_button.setStatusTip(self.clear_button.toolTip())
        self.search_highlight_only.stateChanged.connect(self.highlight_only_changed)
        self.search_highlight_only.setChecked(
                            dynamic.get('search_highlight_only', False))
    def focus_search_box(self, *args):
        self.search.setFocus(Qt.OtherFocusReason)
@ -401,6 +405,11 @@ class SearchBoxMixin(object): # {{{
    def focus_to_library(self):
        self.current_view().setFocus(Qt.OtherFocusReason)
    def highlight_only_changed(self, toWhat):
        dynamic.set('search_highlight_only', toWhat)
        self.current_view().model().set_highlight_only(toWhat)
        self.focus_to_library()
    # }}}
 class SavedSearchBoxMixin(object): # {{{
--- a/src/calibre/gui2/shortcuts.py
+++ b/src/calibre/gui2/shortcuts.py
@ -150,7 +150,7 @@ class Delegate(QStyledItemDelegate):
        custom = []
        if editor.custom.isChecked():
            for x in ('1', '2'):
-                sc = getattr(editor, 'shortcut'+x)
+                sc = getattr(editor, 'shortcut'+x, None)
                if sc is not None:
                    custom.append(sc)
@ -266,6 +266,11 @@ class ShortcutConfig(QWidget):
        self.view.scrollTo(index)
    @property
    def is_editing(self):
        return self.view.state() == self.view.EditingState
 if __name__ == '__main__':
    from calibre.gui2 import is_ok_to_use_qt
    from calibre.gui2.viewer.keys import SHORTCUTS
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -19,7 +19,7 @@ from PyQt4.Qt import Qt, SIGNAL, QTimer, \
                     QMessageBox, QHelpEvent
 from calibre import  prints
-from calibre.constants import __appname__, isosx, DEBUG
+from calibre.constants import __appname__, isosx
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.config import prefs, dynamic
 from calibre.utils.ipc.server import Server
@ -103,7 +103,15 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
        self.gui_debug = gui_debug
        acmap = OrderedDict()
        for action in interface_actions():
            try:
                ac = action.load_actual_plugin(self)
            except:
                # Ignore errors in loading user supplied plugins
                import traceback
                traceback.print_exc()
                if ac.plugin_path is None:
                    raise
            ac.plugin_path = action.plugin_path
            ac.interface_action_base_plugin = action
            if ac.name in acmap:
@ -460,12 +468,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
        try:
            if 'calibre.ebooks.DRMError' in job.details:
                if not minz:
-                    d = error_dialog(self, _('Conversion Error'),
+                    from calibre.gui2.dialogs.drm_error import DRMErrorMessage
-                        _('<p>Could not convert: %s<p>It is a '
+                    d = DRMErrorMessage(self, job.description.split(':')[-1])
                        '<a href="%s">DRM</a>ed book. You must first remove the '
                        'DRM using third party tools.')%\
                            (job.description.split(':')[-1],
                                'http://bugs.calibre-ebook.com/wiki/DRM'))
                    d.setModal(False)
                    d.show()
                    self._modeless_dialogs.append(d)
@ -582,9 +586,6 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
            # Goes here, because if cf is valid, db is valid.
            db.prefs['field_metadata'] = db.field_metadata.all_metadata()
            db.commit_dirty_cache()
            if DEBUG and db.gm_count > 0:
                print 'get_metadata cache: {0:d} calls, {1:4.2f}% misses'.format(
                        db.gm_count, (db.gm_missed*100.0)/db.gm_count)
        for action in self.iactions.values():
            if not action.shutting_down():
                return
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -120,6 +120,13 @@ class ConfigDialog(QDialog, Ui_Dialog):
    def accept(self, *args):
        if self.shortcut_config.is_editing:
            from calibre.gui2 import info_dialog
            info_dialog(self, _('Still editing'),
                    _('You are in the middle of editing a keyboard shortcut'
                        ' first complete that, by clicking outside the '
                        ' shortcut editing box.'), show=True)
            return
        c = config()
        c.set('serif_family', unicode(self.serif_family.currentFont().family()))
        c.set('sans_family', unicode(self.sans_family.currentFont().family()))
@ -279,7 +286,7 @@ class Document(QWebPage): # {{{
    @pyqtSignature("")
    def init_hyphenate(self):
-        if self.hyphenate:
+        if self.hyphenate and getattr(self, 'loaded_lang', ''):
            self.javascript('do_hyphenation("%s")'%self.loaded_lang)
    def after_load(self):
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -26,6 +26,7 @@ from calibre.gui2.search_box import SearchBox2
 from calibre.ebooks.metadata import MetaInformation
 from calibre.customize.ui import available_input_formats
 from calibre.gui2.viewer.dictionary import Lookup
 from calibre import as_unicode
 class TOCItem(QStandardItem):
@ -626,13 +627,12 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
            QApplication.processEvents()
        if worker.exception is not None:
            if isinstance(worker.exception, DRMError):
-                error_dialog(self, _('DRM Error'),
+                from calibre.gui2.dialogs.drm_error import DRMErrorMessage
-                        _('<p>This book is protected by <a href="%s">DRM</a>')
+                DRMErrorMessage(self).exec_()
                        %'http://wiki.mobileread.com/wiki/DRM').exec_()
            else:
                r = getattr(worker.exception, 'reason', worker.exception)
                error_dialog(self, _('Could not open ebook'),
-                        unicode(r), det_msg=worker.traceback, show=True)
+                        as_unicode(r), det_msg=worker.traceback, show=True)
            self.close_progress_indicator()
        else:
            self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:])
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -411,7 +411,8 @@ class ResultCache(SearchQueryParser): # {{{
            if isinstance(location, list):
                if allow_recursion:
                    for loc in location:
-                        matches |= self.get_matches(loc, query, allow_recursion=False)
+                        matches |= self.get_matches(loc, query, candidates,
                                                    allow_recursion=False)
                    return matches
                raise ParseException(query, len(query), 'Recursive query group detected', self)
@ -419,11 +420,11 @@ class ResultCache(SearchQueryParser): # {{{
                fm = self.field_metadata[location]
                # take care of dates special case
                if fm['datatype'] == 'datetime':
-                    return self.get_dates_matches(location, query.lower())
+                    return self.get_dates_matches(location, query.lower(), candidates)
                # take care of numbers special case
                if fm['datatype'] in ('rating', 'int', 'float'):
-                    return self.get_numeric_matches(location, query.lower())
+                    return self.get_numeric_matches(location, query.lower(), candidates)
                # take care of the 'count' operator for is_multiples
                if fm['is_multiple'] and \
@ -431,7 +432,8 @@ class ResultCache(SearchQueryParser): # {{{
                        query[1:1] in '=<>!':
                    vf = lambda item, loc=fm['rec_index'], ms=fm['is_multiple']:\
                            len(item[loc].split(ms)) if item[loc] is not None else 0
-                    return self.get_numeric_matches(location, query[1:], val_func=vf)
+                    return self.get_numeric_matches(location, query[1:],
                                                    candidates, val_func=vf)
            # everything else, or 'all' matches
            matchkind = CONTAINS_MATCH
@ -598,7 +600,6 @@ class ResultCache(SearchQueryParser): # {{{
    def set(self, row, col, val, row_is_id=False):
        id = row if row_is_id else self._map_filtered[row]
        self._data[id][self.FIELD_MAP['all_metadata']] = None
        self._data[id][col] = val
    def get(self, row, col, row_is_id=False):
@ -629,7 +630,6 @@ class ResultCache(SearchQueryParser): # {{{
                self._data[id] = CacheRow(db, self.composites,
                        db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0])
                self._data[id].append(db.book_on_device_string(id))
                self._data[id].append(None)
            except IndexError:
                return None
        try:
@ -646,7 +646,6 @@ class ResultCache(SearchQueryParser): # {{{
            self._data[id] = CacheRow(db, self.composites,
                        db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0])
            self._data[id].append(db.book_on_device_string(id))
            self._data[id].append(None)
        self._map[0:0] = ids
        self._map_filtered[0:0] = ids
@ -671,7 +670,6 @@ class ResultCache(SearchQueryParser): # {{{
        for item in self._data:
            if item is not None:
                item.append(db.book_on_device_string(item[0]))
                item.append(None)
        self._map = [i[0] for i in self._data if i is not None]
        if field is not None:
            self.sort(field, ascending)
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -1524,17 +1524,30 @@ class EPUB_MOBI(CatalogPlugin):
                    this_title['formats'] = formats
                # Add user notes to be displayed in header
-                # Special case handling for datetime fields
+                # Special case handling for datetime fields and lists
                if self.opts.header_note_source_field:
                    field_md = self.__db.metadata_for_field(self.opts.header_note_source_field)
                    notes = self.__db.get_field(record['id'],
                                        self.opts.header_note_source_field,
                                        index_is_id=True)
                    if notes and field_md['datatype'] == 'datetime':
                        # Reformat date fields to match UI presentation: dd MMM YYYY
                        notes = format_date(notes,'dd MMM yyyy')
                    if notes:
                        if field_md['datatype'] == 'text':
                            if isinstance(notes,list):
                                notes = ' &middot; '.join(notes)
                        elif field_md['datatype'] == 'datetime':
                            notes = format_date(notes,'dd MMM yyyy')
                        elif field_md['datatype'] == 'composite':
                            m = re.match(r'\[(.+)\]$', notes)
                            if m is not None:
                                # Sniff for special pseudo-list string "[<item, item>]"
                                bracketed_content = m.group(1)
                                if ',' in bracketed_content:
                                    # Recast the comma-separated items as a list
                                    items = bracketed_content.split(',')
                                    items = [i.strip() for i in items]
                                    notes = ' &middot; '.join(items)
                                else:
                                    notes = bracketed_content
                        this_title['notes'] = {'source':field_md['name'],
                                                   'content':notes}
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -298,10 +298,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                            base,
                            prefer_custom=True)
-        self.FIELD_MAP['ondevice'] = base+1
+        self.FIELD_MAP['ondevice'] = base = base+1
-        self.field_metadata.set_field_record_index('ondevice', base+1, prefer_custom=False)
+        self.field_metadata.set_field_record_index('ondevice', base, prefer_custom=False)
        self.FIELD_MAP['all_metadata'] = base+2
        self.field_metadata.set_field_record_index('all_metadata', base+2, prefer_custom=False)
        script = '''
        DROP VIEW IF EXISTS meta2;
@ -343,10 +341,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        self.has_id  = self.data.has_id
        self.count   = self.data.count
        # Count times get_metadata is called, and how many times in the cache
        self.gm_count  = 0
        self.gm_missed = 0
        for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
                     'publisher', 'rating', 'series', 'series_index', 'tags',
                     'title', 'timestamp', 'uuid', 'pubdate', 'ondevice'):
@ -690,19 +684,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        '''
        row = self.data._data[idx] if index_is_id else self.data[idx]
        fm = self.FIELD_MAP
        self.gm_count += 1
        mi = row[self.FIELD_MAP['all_metadata']]
        if mi is not None:
            if get_cover:
                # Always get the cover, because the value can be wrong if the
                # original mi was from the OPF
                mi.cover = self.cover(idx, index_is_id=index_is_id, as_path=True)
            return mi
        self.gm_missed += 1
        mi = Metadata(None)
        self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id)
        aut_list = row[fm['au_map']]
        aut_list = [p.split(':::') for p in aut_list.split(':#:')]
@ -724,6 +706,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        formats = row[fm['formats']]
        if not formats:
            formats = None
        else:
            formats = formats.split(',')
        mi.formats = formats
        tags = row[fm['tags']]
        if tags:
@ -1387,7 +1371,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            if r is not None:
                if (now - r[self.FIELD_MAP['timestamp']]) > delta:
                    tags = r[self.FIELD_MAP['tags']]
-                    if tags and tag in tags.lower():
+                    if tags and tag in [x.strip() for x in
                            tags.lower().split(',')]:
                        yield r[self.FIELD_MAP['id']]
    def get_next_series_num_for(self, series):
--- a/src/calibre/library/field_metadata.py
+++ b/src/calibre/library/field_metadata.py
@ -162,15 +162,6 @@ class FieldMetadata(dict):
                           'search_terms':['tags', 'tag'],
                           'is_custom':False,
                           'is_category':True}),
            ('all_metadata',{'table':None,
                             'column':None,
                             'datatype':None,
                             'is_multiple':None,
                             'kind':'field',
                             'name':None,
                             'search_terms':[],
                             'is_custom':False,
                             'is_category':False}),
            ('author_sort',{'table':None,
                            'column':None,
                            'datatype':'text',
--- a/src/calibre/trac/bzr_commit_plugin.py
+++ b/src/calibre/trac/bzr_commit_plugin.py
@ -110,6 +110,7 @@ class cmd_commit(_cmd_commit):
            suffix = 'The fix will be in the next release.'
        action = action+'ed'
        msg = '%s in branch %s. %s'%(action, nick, suffix)
        msg = msg.replace('Fixesed', 'Fixed')
        server = xmlrpclib.ServerProxy(url)
        server.ticket.update(int(bug), msg,
                             {'status':'closed', 'resolution':'fixed'},
--- a/src/calibre/utils/cleantext.py
+++ b/src/calibre/utils/cleantext.py
@ -3,7 +3,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2010, sengian <sengian1@gmail.com>'
 __docformat__ = 'restructuredtext en'
-import re
+import re, htmlentitydefs
 _ascii_pat = None
@ -21,3 +21,32 @@ def clean_ascii_chars(txt, charlist=None):
        pat = re.compile(u'|'.join(map(unichr, charlist)))
    return pat.sub('', txt)
 ##
 # Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
 # Removes HTML or XML character references and entities from a text string.
 #
 # @param text The HTML (or XML) source text.
 # @return The plain text, as a Unicode string, if necessary.
 def unescape(text, rm=False, rchar=u''):
    def fixup(m, rm=rm, rchar=rchar):
        text = m.group(0)
        if text[:2] == "&#":
            # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16))
                else:
                    return unichr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
            except KeyError:
                pass
        if rm:
            return rchar #replace by char
        return text # leave as is
    return re.sub("&#?\w+;", fixup, text)
--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@ -18,6 +18,24 @@ class _Parser(object):
    LEX_NUM = 4
    LEX_EOF = 5
    def _python(self, func):
        locals = {}
        exec func in locals
        if 'evaluate' not in locals:
            self.error('no evaluate function in python')
        try:
            result = locals['evaluate'](self.parent.kwargs)
            if isinstance(result, (float, int)):
                result = unicode(result)
            elif isinstance(result, list):
                result = ','.join(result)
            elif isinstance(result, str):
                result = unicode(result)
            return result
        except Exception as e:
            self.error('python function threw exception: ' + e.msg)
    def _strcmp(self, x, y, lt, eq, gt):
        v = strcmp(x, y)
        if v < 0:
@ -79,6 +97,7 @@ class _Parser(object):
            'field'    : (1, lambda s, x: s.parent.get_value(x, [], s.parent.kwargs)),
            'multiply' : (2, partial(_math, op='*')),
            'print'    : (-1, _print),
            'python'   : (1, _python),
            'strcat'   : (-1, _concat),
            'strcmp'   : (5, _strcmp),
            'substr'   : (3, lambda s, x, y, z: x[int(y): len(x) if int(z) == 0 else int(z)]),
@ -362,7 +381,7 @@ class TemplateFormatter(string.Formatter):
                (r'\'.*?((?<!\\)\')',   lambda x,t: (3, t[1:-1])),
                (r'\n#.*?(?=\n)',       None),
                (r'\s',                 None)
-        ])
+        ], flags=re.DOTALL)
    def _eval_program(self, val, prog):
        # keep a cache of the lex'ed program under the theory that re-lexing
--- a/src/calibre/utils/magick/draw.py
+++ b/src/calibre/utils/magick/draw.py
@ -92,6 +92,9 @@ def identify_data(data):
    or raises an Exception if data is not an image.
    '''
    img = Image()
    if hasattr(img, 'identify'):
        img.identify(data)
    else:
        img.load(data)
    width, height = img.size
    fmt = img.format
--- a/src/calibre/utils/magick/magick.c
+++ b/src/calibre/utils/magick/magick.c
@ -456,6 +456,26 @@ magick_Image_load(magick_Image *self, PyObject *args, PyObject *kwargs) {
 // }}}
 // Image.identify {{{
 static PyObject *
 magick_Image_identify(magick_Image *self, PyObject *args, PyObject *kwargs) {
    const char *data;
 	Py_ssize_t dlen;
    MagickBooleanType res;
    NULL_CHECK(NULL)
    if (!PyArg_ParseTuple(args, "s#", &data, &dlen)) return NULL;
    res = MagickPingImageBlob(self->wand, data, dlen);
    if (!res)
        return magick_set_exception(self->wand);
    Py_RETURN_NONE;
 }
 // }}}
 // Image.open {{{
 static PyObject *
 magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) {
@ -993,6 +1013,10 @@ static PyMethodDef magick_Image_methods[] = {
    {"destroy", (PyCFunction)magick_Image_destroy, METH_VARARGS,
    "Destroy the underlying ImageMagick Wand. WARNING: After using this method, all methods on this object will raise an exception."},
    {"identify", (PyCFunction)magick_Image_identify, METH_VARARGS,
     "Identify an image from a byte buffer (string)"
    },
    {"load", (PyCFunction)magick_Image_load, METH_VARARGS,
     "Load an image from a byte buffer (string)"
    },
--- a/src/calibre/utils/wmf/init.py
+++ b/src/calibre/utils/wmf/init.py
@ -5,5 +5,52 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import glob
 from calibre.constants import plugins, iswindows, filesystem_encoding
 from calibre.ptempfile import TemporaryDirectory
 from calibre import CurrentDir
 from calibre.utils.magick import Image, PixelWand
 class Unavailable(Exception):
    pass
 class NoRaster(Exception):
    pass
 def extract_raster_image(wmf_data):
    try:
        wmf, wmf_err = plugins['wmf']
    except KeyError:
        raise Unavailable('libwmf not available on this platform')
    if wmf_err:
        raise Unavailable(wmf_err)
    if iswindows:
        import sys, os
        appdir = sys.app_dir
        if isinstance(appdir, unicode):
            appdir = appdir.encode(filesystem_encoding)
        fdir = os.path.join(appdir, 'wmffonts')
        wmf.set_font_dir(fdir)
    data = ''
    with TemporaryDirectory('wmf2png') as tdir:
        with CurrentDir(tdir):
            wmf.render(wmf_data)
            images = list(sorted(glob.glob('*.png')))
            if not images:
                raise NoRaster('No raster images in WMF')
            data = open(images[0], 'rb').read()
    im = Image()
    im.load(data)
    pw = PixelWand()
    pw.color = '#ffffff'
    im.rotate(pw, 180)
    return im.export('png')
--- a/src/calibre/utils/wmf/wmf.c
+++ b/src/calibre/utils/wmf/wmf.c
@ -4,6 +4,7 @@
 #include <libwmf/api.h>
 #include <libwmf/svg.h>
 //#include <libwmf/gd.h>
 typedef struct {
    char *data;
@ -13,7 +14,7 @@ typedef struct {
 //This code is taken mostly from the Abiword wmf plugin
-
+// Buffer read {{{
 // returns unsigned char cast to int, or EOF
 static int wmf_WMF_read(void * context) {
    char c;
@ -22,11 +23,11 @@ static int wmf_WMF_read(void * context) {
 	if (info->pos == info->len)
 		return EOF;
-	c = info->data[pos];
+	c = info->data[info->pos];
 	info->pos++;
-	return (int)c;
+	return (int)((unsigned char)c);
 }
 // returns (-1) on error, else 0
@ -44,8 +45,17 @@ static long wmf_WMF_tell(void * context) {
 	return (long) info->pos;
 }
 // }}}
 char _png_name_buf[100];
 char *wmf_png_name(void *ctxt) {
    int *num = (int*)ctxt;
    *num = *num + 1;
    snprintf(_png_name_buf, 90, "%04d.png", *num);
    return _png_name_buf;
 }
 #define CLEANUP if(API) { if (stream) wmf_free(API, stream); wmf_api_destroy(API); };
 static PyObject *
@ -66,9 +76,9 @@ wmf_render(PyObject *self, PyObject *args) {
 	unsigned int max_width  = 1600;
 	unsigned int max_height = 1200;
 	unsigned long max_flags = 0;
 	static const char* Default_Description = "wmf2svg";
    int fname_counter = 0;
 	wmf_error_t err;
@ -125,6 +135,8 @@ wmf_render(PyObject *self, PyObject *args) {
 	ddata->Description = (char *)Default_Description;
 	ddata->bbox = bbox;
    ddata->image.context = (void *)&fname_counter;
    ddata->image.name = wmf_png_name;
 	wmf_display_size(API, &disp_width, &disp_height, 96, 96);
@ -156,9 +168,9 @@ wmf_render(PyObject *self, PyObject *args) {
 		ddata->height = (unsigned int) ceil ((double) wmf_height);
 	}
-	ddata->flags |= WMF_SVG_INLINE_IMAGES;
+    // Needs GD
-
+	//ddata->flags |= WMF_SVG_INLINE_IMAGES;
-	ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER;
+	//ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER;
    err = wmf_play(API, 0, &(bbox));
@ -178,11 +190,32 @@ wmf_render(PyObject *self, PyObject *args) {
    return ans;
 }
 #ifdef _WIN32
 void set_libwmf_fontdir(const char *);
 static PyObject *
 wmf_setfontdir(PyObject *self, PyObject *args) {
    char *path;
    if (!PyArg_ParseTuple(args, "s", &path))
        return NULL;
    set_libwmf_fontdir(path);
    Py_RETURN_NONE;
 }
 #endif
 static PyMethodDef wmf_methods[] = {
    {"render", wmf_render, METH_VARARGS,
-        "render(path) -> Render wmf as svg."
+        "render(data) -> Render wmf as svg."
    },
 #ifdef _WIN32
    {"set_font_dir", wmf_setfontdir, METH_VARARGS,
        "set_font_dir(path) -> Set the path to the fonts dir on windows, must be called at least once before using render()"
    },
 #endif
    {NULL}  /* Sentinel */
 };
--- a/src/calibre/utils/zipfile.py
+++ b/src/calibre/utils/zipfile.py
@ -982,9 +982,12 @@ class ZipFile:
            zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
        if fname != zinfo.orig_filename:
-            raise BadZipfile, \
+            print ('WARNING: Header (%r) and directory (%r) filenames do not'
-                      'File name in directory "%s" and header "%s" differ.' % (
+                    ' match inside ZipFile')%(fname, zinfo.orig_filename)
-                          zinfo.orig_filename, fname)
+            print 'Using directory filename %r'%zinfo.orig_filename
            #raise BadZipfile, \
            #          'File name in directory "%r" and header "%r" differ.' % (
            #              zinfo.orig_filename, fname)
        # check for encrypted flag & handle password
        is_encrypted = zinfo.flag_bits & 0x1
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@ -108,7 +108,6 @@ def download_builtin_recipe(urn):
    br = browser()
    return br.open_novisit('http://status.calibre-ebook.com/recipe/'+urn).read()
 def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
    for x in get_builtin_recipe_collection():
        if x.get('title') == title:
@ -127,6 +126,24 @@ def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
                        'Failed to download recipe, using builtin version')
            return P('recipes/%s.recipe'%urn, data=True)
 def get_builtin_recipe_by_id(id_, log=None, download_recipe=False):
    for x in get_builtin_recipe_collection():
        if x.get('id') == id_:
            urn = x.get('id')[8:]
            if download_recipe:
                try:
                    if log is not None:
                        log('Trying to get latest version of recipe:', urn)
                    return download_builtin_recipe(urn)
                except:
                    if log is None:
                        import traceback
                        traceback.print_exc()
                    else:
                        log.exception(
                        'Failed to download recipe, using builtin version')
            return P('recipes/%s.recipe'%urn, data=True)
 class SchedulerConfig(object):
    def __init__(self):