Merge + remove translation in rtf2xml + modify debug dir integration

2025-07-09 03:04:10 -04:00 · 2011-01-13 08:32:03 +01:00 · 2011-01-13 08:32:03 +01:00 · 8da8eca1d3
commit 8da8eca1d3
parent f1e0994856 d0f92778f8
59 changed files with 2768 additions and 1203 deletions
--- a/resources/images/document-encrypt.png
+++ b/resources/images/document-encrypt.png
--- a/resources/images/news/zerohedge.png
+++ b/resources/images/news/zerohedge.png
--- a/resources/recipes/expansion_spanish.recipe
+++ b/resources/recipes/expansion_spanish.recipe
@ -1,59 +1,79 @@
 #!/usr/bin/env  python
-# -*- coding: utf-8 -*-
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__author__    = 'Gerardo Diez'
+__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
+description   = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
+__docformat__ = 'restructuredtext en'
+
 '''
-www.expansion.com
+expansion.es
 '''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+class Publico(BasicNewsRecipe):
+    title               =u'Expansion.com'
+    __author__      ='Gerardo Diez'
+    publisher       =u'Unidad Editorial Información Económica, S.L.'
+    category                ='finances, catalunya'
+    oldest_article      =1
+    max_articles_per_feed   =100
+    simultaneous_downloads  =10
+    cover_url       =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
+    timefmt         ='[%A, %d %B, %Y]'
+    encoding        ='latin'
+    language        ='es'
+    remove_javascript   =True
+    no_stylesheets      =True
+    keep_only_tags      =dict(name='div', attrs={'class':['noticia primer_elemento']})
+    remove_tags         =[
+                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
+                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
+                dict(name='span', attrs={'class':['comentarios']}),
+                dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
+                dict(name='div', attrs={'id':['comentarios_lectores_listado']})
+                            ]
+    feeds               =[
+                (u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
+                (u'Portada: Bolsas', u'http://estaticos.expansion.com/rss/mercados.xml'),
+                (u'Divisas', u'http://estaticos.expansion.com/rss/mercadosdivisas.xml'),
+                (u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
+                (u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
+                (u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),

-from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag
+                (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
+                (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
+                (u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
+                (u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
+                (u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
+                (u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),

-class Expansion(BasicNewsRecipe):
-    title                 = 'Diario Expansion'
-    __author__            = 'Darko Miletic'
-    description           = 'Lider de informacion de mercados, economica y politica'
-    publisher             = 'expansion.com'
-    category              = 'news, politics, Spain'
-    oldest_article        = 2
-    max_articles_per_feed = 100
-    no_stylesheets        = True
-    use_embedded_content  = False
-    delay                 = 1
-    encoding              = 'iso-8859-15'
-    language = 'es'
+                (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
+                (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
+                (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
+                (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
+                (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
+                (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
+                (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
+                (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
+                (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
+                (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
+                (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
+                (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),

-    direction             = 'ltr'
+                (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
+                (u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
+                (u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),

-    html2lrf_options = [
-                          '--comment'  , description
-                        , '--category' , category
-                        , '--publisher', publisher
-                        ]
+                (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
+                (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
+                (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),

-    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+                (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
+                (u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
+                (u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
+                (u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),

-    feeds              = [
-                            (u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178')
-                           ,(u'Temas del dia'   , u'http://rss.expansion.com/rss/descarga.htm?data2=178')
-                         ]
-
-
-    keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
-
-    remove_tags        = [
-                             dict(name=['object','link','script'])
-                            ,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
-                         ]
-
-    remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
-
-    def preprocess_html(self, soup):
-        soup.html['dir' ] = self.direction
-        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
-        soup.head.insert(0,mcharset)
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
+                (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
+                (u'Catalu&ntilde;a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
+                (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
+                ]

--- a/resources/recipes/globe_and_mail.recipe
+++ b/resources/recipes/globe_and_mail.recipe
@ -8,12 +8,13 @@ __docformat__ = 'restructuredtext en'
 globeandmail.com
 '''

+import re
+
 from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1287083651(BasicNewsRecipe):
    title          = u'Globe & Mail'
-    __license__   = 'GPL v3'
-    __author__ = 'Szing'
+    __author__ = 'Kovid Goyal'
    oldest_article = 2
    no_stylesheets = True
    max_articles_per_feed = 100
@ -38,24 +39,19 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
      (u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
    ]

-    keep_only_tags = [
-      dict(name='h1'),
-      dict(name='h2', attrs={'id':'articletitle'}),
-      dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}),
-      dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}),
-      dict(name='id', attrs={'class':'article'}),
-      dict(name='table', attrs={'class':'todays-market'}),
-      dict(name='header', attrs={'id':'leadheader'})
-    ]
+    preprocess_regexps = [
+        (re.compile(r'<head.*?</head>', re.DOTALL), lambda m: '<head></head>'),
+        (re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
+        ]

+    remove_tags_before = dict(name='h1')
    remove_tags = [
-      dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']})
-    ]
-
-    #this has to be here or the text in the article appears twice.
-    remove_tags_after = [dict(id='article')]
+            dict(name='div', attrs={'id':['ShareArticles', 'topStories']}),
+            dict(href=lambda x: x and 'tracking=' in x),
+            {'class':['articleTools', 'pagination', 'Ads', 'topad',
+                'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]

    #Use the mobile version rather than the web version
    def print_version(self, url):
-        return url + '&service=mobile'
+        return url.rpartition('?')[0] + '?service=mobile'

--- a/resources/recipes/msnbc.recipe
+++ b/resources/recipes/msnbc.recipe
@ -1,10 +1,9 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 msnbc.msn.com
 '''

-import re
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class MsNBC(BasicNewsRecipe):
@ -19,7 +18,16 @@ class MsNBC(BasicNewsRecipe):
    publisher              = 'msnbc.com'
    category               = 'news, USA, world'
    language               = 'en'
-    extra_css              = ' body{ font-family: sans-serif } .head{font-family: serif; font-size: xx-large; font-weight: bold; color: #CC0000} .abstract{font-weight: bold} .source{font-size: small} .updateTime{font-size: small} '
+    extra_css              = """
+                                body{ font-family: Georgia,Times,serif }
+                                .hide{display: none}
+                                .caption{font-family: Arial,sans-serif; font-size: x-small}
+                                .entry-summary{font-family: Arial,sans-serif}
+                                .copyright{font-size: 0.95em; font-style: italic}
+                                .source-org{font-size: small; font-family: Arial,sans-serif}
+                                img{display: block; margin-bottom: 0.5em}
+                                span.byline{display: none}
+                            """

    conversion_options = {
                             'comments' : description
@ -28,14 +36,20 @@ class MsNBC(BasicNewsRecipe):
                            ,'publisher': publisher
                         }

-    preprocess_regexps = [
-        (re.compile(r'</style></head>', re.DOTALL|re.IGNORECASE),lambda match: '</style>')
-       ,(re.compile(r'<div class="head">', re.DOTALL|re.IGNORECASE),lambda match: '</head><body><div class="head">'),
-    ]
+    remove_tags_before = dict(name='h1', attrs={'id':'headline'})
+    remove_tags_after = dict(name='span', attrs={'class':['copyright','Linear copyright']})
+    keep_only_tags=[
+                      dict(attrs={'id':['headline','deck','byline','source','intelliTXT']})
+                     ,dict(attrs={'class':['gl_headline','articleText','drawer-content Linear','v-center3','byline','textBodyBlack']})
+                   ]
+    remove_attributes=['property','lang','rel','xmlns:fb','xmlns:v','xmlns:dc','xmlns:dcmitype','xmlns:og','xmlns:media','xmlns:vcard','typeof','itemscope','itemtype','itemprop','about','type','size','width','height','onreadystatechange','data','border','hspace','vspace']
+
+    remove_tags      = [
+                          dict(name=['iframe','object','link','embed','meta','table'])
+                         ,dict(name='span', attrs={'class':['copyright','Linear copyright']})
+                         ,dict(name='div', attrs={'class':'social'})
+                       ]

-    remove_tags_before = dict(name='div', attrs={'class':'head'})
-    remove_tags_after = dict(name='div', attrs={'class':'copyright'})
-    remove_tags      = [dict(name=['iframe','object','link','script','form'])]

    feeds = [
               (u'US News'       , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml'      )
@ -48,11 +62,26 @@ class MsNBC(BasicNewsRecipe):
              ,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml'      )
            ]

-    def print_version(self, url):
-        return url + 'print/1/displaymode/1098/'
-
    def preprocess_html(self, soup):
-        for item in soup.head.findAll('div'):
-            item.extract()
+        for item in soup.body.findAll('html'):
+            item.name='div'
+        for item in soup.body.findAll('div'):
+            if item.has_key('id') and item['id'].startswith('vine-'):
+               item.extract()
+            if item.has_key('class') and ( item['class'].startswith('ad') or item['class'].startswith('vine')):
+               item.extract()
+        for item in soup.body.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        for item in soup.body.findAll('ol'):
+            if item.has_key('class') and item['class'].startswith('grid'):
+               item.extract()
+        for item in soup.body.findAll('span'):
+            if ( item.has_key('id') and item['id'].startswith('byLine') and item.string is None) or ( item.has_key('class') and item['class'].startswith('inline') ):
+               item.extract()
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
        return soup

--- a/resources/recipes/technology_review.recipe
+++ b/resources/recipes/technology_review.recipe
@ -35,7 +35,6 @@ class TechnologyReview(BasicNewsRecipe):
    def get_article_url(self, article):
        return article.get('guid', article.get('id', None))

-
    def print_version(self, url):
        baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
        split1 = string.split(url,"/")
@ -43,3 +42,25 @@ class TechnologyReview(BasicNewsRecipe):
        split2= string.split(xxx,"/")
        s =  baseurl + split2[0]
        return s
+
+
+    def postprocess_html(self,soup, True):
+        #remove picture
+        headerhtml = soup.find(True, {'class':'header'})
+        headerhtml.replaceWith("")
+
+        #remove close button
+        closehtml = soup.find(True, {'class':'close'})
+        closehtml.replaceWith("")
+
+        #remove banner advertisement
+        bannerhtml = soup.find(True, {'class':'bannerad'})
+        bannerhtml.replaceWith("")
+
+        #thanks kiklop74!  This code removes all links from the text
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
+
+        return soup
--- a/resources/recipes/tyzden.recipe
+++ b/resources/recipes/tyzden.recipe
@ -28,7 +28,7 @@ class TyzdenRecipe(BasicNewsRecipe):
    if (weeknum > 1):
        weeknum -= 1

-    title = u'.tyzden ' + str(weeknum) + '/' + str(year)
+    title = u'tyzden'

    base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum)
    base_url = base_url_path + '.html'
--- a/resources/recipes/wired_daily.recipe
+++ b/resources/recipes/wired_daily.recipe
@ -2,8 +2,10 @@
 __license__   = 'GPL v3'
 __docformat__ = 'restructuredtext en'

+import re

 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.chardet import xml_to_unicode

 class Wired_Daily(BasicNewsRecipe):

@ -15,30 +17,43 @@ class Wired_Daily(BasicNewsRecipe):

    no_stylesheets = True

+    preprocess_regexps = [(re.compile(r'<head.*</head>', re.DOTALL), lambda m:
+        '<head></head>')]
+
    remove_tags_before = dict(name='div', id='content')
-    remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
-        'footer', 'advertisement', 'blog_subscription_unit',
-        'brightcove_component']),
-        {'class':'entryActions'},
-        dict(name=['noscript', 'script'])]
+    remove_tags = [dict(id=['header', 'commenting_module', 'post_nav',
+        'social_tools', 'sidebar', 'footer', 'social_wishlist', 'pgwidget',
+        'outerWrapper', 'inf_widget']),
+        {'class':['entryActions', 'advertisement', 'entryTags']},
+        dict(name=['noscript', 'script']),
+        dict(name='h4', attrs={'class':re.compile(r'rat\d+')}),
+        {'class':lambda x: x and x.startswith('contentjump')},
+        dict(name='li', attrs={'class':['entryCategories', 'entryEdit']})]
+

    feeds = [
        ('Top News', 'http://feeds.wired.com/wired/index'),
-        ('Culture', 'http://feeds.wired.com/wired/culture'),
-        ('Software', 'http://feeds.wired.com/wired/software'),
-        ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
-        ('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
-        ('Cars', 'http://feeds.wired.com/wired/cars'),
-        ('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
-        ('Gaming', 'http://feeds.wired.com/wired/gaming'),
-        ('Science', 'http://feeds.wired.com/wired/science'),
-        ('Med Tech', 'http://feeds.wired.com/wired/medtech'),
-        ('Politics', 'http://feeds.wired.com/wired/politics'),
-        ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
-        ('Commentary', 'http://feeds.wired.com/wired/commentary'),
+        ('Product Reviews',
+            'http://www.wired.com/reviews/feeds/latestProductsRss'),
+        ('Autopia', 'http://www.wired.com/autopia/feed/'),
+        ('Danger Room', 'http://www.wired.com/dangerroom/feed/'),
+        ('Epicenter', 'http://www.wired.com/epicenter/feed/'),
+        ('Gadget Lab', 'http://www.wired.com/gadgetlab/feed/'),
+        ('Geek Dad', 'http://www.wired.com/geekdad/feed/'),
+        ('Playbook', 'http://www.wired.com/playbook/feed/'),
+        ('Rawfile', 'http://www.wired.com/rawfile/feed/'),
+        ('This Day in Tech', 'http://www.wired.com/thisdayintech/feed/'),
+        ('Threat Level', 'http://www.wired.com/threatlevel/feed/'),
+        ('Underwire', 'http://www.wired.com/underwire/feed/'),
+        ('Web Monkey', 'http://www.webmonkey.com/feed/'),
+        ('Science', 'http://www.wired.com/wiredscience/feed/'),
        ]

+    def populate_article_metadata(self, article, soup, first):
+        if article.text_summary:
+            article.text_summary = xml_to_unicode(article.text_summary,
+                    resolve_entities=True)[0]
+
    def print_version(self, url):
-        return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
-
+        return url + '/all/1'

--- a/resources/recipes/zerohedge.recipe
+++ b/resources/recipes/zerohedge.recipe
@ -0,0 +1,33 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.zerohedge.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ZeroHedge(BasicNewsRecipe):
+    title                  = 'Zero Hedge'
+    __author__             = 'Darko Miletic'
+    description            = 'On a long enough timeline the survival rate for everyone drops to zero'
+    oldest_article         = 10
+    max_articles_per_feed  = 100
+    no_stylesheets         = True
+    use_embedded_content   = True
+    encoding               = 'utf8'
+    publisher              = 'zero hedge'
+    category               = 'news, USA, world, economy, politics'
+    language               = 'en'
+    masthead_url           = 'http://www.zerohedge.com/themes/newsflash/logo.png'
+    publication_type       = 'blog'
+    extra_css              = 'body{ font-family: sans-serif }'
+
+    conversion_options = {
+                             'comments' : description
+                            ,'tags'     : category
+                            ,'language' : language
+                            ,'publisher': publisher
+                         }
+
+
+    feeds = [(u'Articles', u'http://feeds.feedburner.com/zerohedge/feed')]
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -459,6 +459,18 @@ def force_unicode(obj, enc=preferred_encoding):
                        obj = obj.decode('utf-8')
    return obj

+def as_unicode(obj, enc=preferred_encoding):
+    if not isbytestring(obj):
+        try:
+            obj = unicode(obj)
+        except:
+            try:
+                obj = str(obj)
+            except:
+                obj = repr(obj)
+    return force_unicode(obj, enc=enc)
+
+

 def human_readable(size):
    """ Convert a size in bytes into a human readable form """
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -88,6 +88,7 @@ class Plumber(object):
        self.ui_reporter = report_progress
        self.abort_after_input_dump = abort_after_input_dump

+        # Pipeline options {{{
        # Initialize the conversion options that are independent of input and
        # output formats. The input and output plugins can still disable these
        # options via recommendations.
@ -527,6 +528,7 @@ OptionRecommendation(name='timestamp',
    help=_('Set the book timestamp (used by the date column in calibre).')),

 ]
+        # }}}

        input_fmt = os.path.splitext(self.input)[1]
        if not input_fmt:
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -16,7 +16,6 @@ import uuid

 from lxml import etree

-from calibre import guess_type
 from calibre import prepare_string_for_xml
 from calibre.constants import __appname__, __version__
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
@ -41,7 +40,7 @@ class FB2MLizer(object):
        # in different directories. FB2 images are all in a flat layout so we rename all images
        # into a sequential numbering system to ensure there are no collisions between image names.
        self.image_hrefs = {}
-        # Mapping of toc items and their 
+        # Mapping of toc items and their
        self.toc = {}
        # Used to see whether a new <section> needs to be opened
        self.section_level = 0
@ -51,7 +50,7 @@ class FB2MLizer(object):
        self.oeb_book = oeb_book
        self.opts = opts
        self.reset_state()
-        
+
        # Used for adding <section>s and <title>s to allow readers
        # to generate toc from the document.
        if self.opts.sectionize == 'toc':
@ -75,20 +74,20 @@ class FB2MLizer(object):
        text = re.sub(r'(?miu)<p>\s*</p>', '', text)
        text = re.sub(r'(?miu)\s*</p>', '</p>', text)
        text = re.sub(r'(?miu)</p>\s*<p>', '</p>\n\n<p>', text)
-        
+
        text = re.sub(r'(?miu)<title>\s*</title>', '', text)
        text = re.sub(r'(?miu)\s+</title>', '</title>', text)
-        
+
        text = re.sub(r'(?miu)<section>\s*</section>', '', text)
        text = re.sub(r'(?miu)\s*</section>', '\n</section>', text)
        text = re.sub(r'(?miu)</section>\s*', '</section>\n\n', text)
        text = re.sub(r'(?miu)\s*<section>', '\n<section>', text)
        text = re.sub(r'(?miu)<section>\s*', '<section>\n', text)
        text = re.sub(r'(?miu)</section><section>', '</section>\n\n<section>', text)
-        
+
        if self.opts.insert_blank_line:
            text = re.sub(r'(?miu)</p>', '</p><empty-line />', text)
-        
+
        return text

    def fb2_header(self):
@ -102,6 +101,7 @@ class FB2MLizer(object):
        metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year)
        metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en'
        metadata['id'] = None
+        metadata['cover'] = self.get_cover()

        author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
        if len(author_parts) == 1:
@ -121,10 +121,11 @@ class FB2MLizer(object):
                break
        if metadata['id'] is None:
            self.log.warn('No UUID identifier found')
-            metadata['id'] = str(uuid.uuid4()) 
+            metadata['id'] = str(uuid.uuid4())

        for key, value in metadata.items():
-            metadata[key] = prepare_string_for_xml(value)
+            if not key == 'cover':
+                metadata[key] = prepare_string_for_xml(value)

        return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \
                '<description>' \
@ -136,6 +137,7 @@ class FB2MLizer(object):
                            '<last-name>%(author_last)s</last-name>' \
                        '</author>' \
                        '<book-title>%(title)s</book-title>' \
+                        '%(cover)s' \
                        '<lang>%(lang)s</lang>' \
                    '</title-info>' \
                    '<document-info>' \
@ -154,48 +156,64 @@ class FB2MLizer(object):
    def fb2_footer(self):
        return u'</FictionBook>'

+    def get_cover(self):
+        cover_href = None
+
+        # Get the raster cover if it's available.
+        if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
+            id = unicode(self.oeb_book.metadata.cover[0])
+            cover_item = self.oeb_book.manifest.ids[id]
+            if cover_item.media_type in OEB_RASTER_IMAGES:
+                cover_href = cover_item.href
+        else:
+            # Figure out if we have a title page or a cover page
+            page_name = ''
+            if 'titlepage' in self.oeb_book.guide:
+                page_name = 'titlepage'
+            elif 'cover' in self.oeb_book.guide:
+                page_name = 'cover'
+
+            if page_name:
+                cover_item = self.oeb_book.manifest.hrefs[self.oeb_book.guide[page_name].href]
+                # Get the first image in the page
+                for img in cover_item.xpath('//img'):
+                    cover_href = cover_item.abshref(img.get('src'))
+                    break
+
+        if cover_href:
+            # Only write the image tag if it is in the manifest.
+            if cover_href in self.oeb_book.manifest.hrefs.keys():
+                if cover_href not in self.image_hrefs.keys():
+                    self.image_hrefs[cover_href] = '_%s.jpg' % len(self.image_hrefs.keys())
+            return u'<coverpage><image xlink:href="#%s" /></coverpage>' % self.image_hrefs[cover_href]
+
+        return u''
+
    def get_text(self):
        text = ['<body>']
-        
+
        # Create main section if there are no others to create
        if self.opts.sectionize == 'nothing':
            text.append('<section>')
            self.section_level += 1
-        
-        # Insert the title page / cover into the spine if it is not already referenced.
-        title_name = u''
-        if 'titlepage' in self.oeb_book.guide:
-            title_name = 'titlepage'
-        elif 'cover' in self.oeb_book.guide:
-            title_name = 'cover'
-        if title_name:
-            title_item = self.oeb_book.manifest.hrefs[self.oeb_book.guide[title_name].href]
-            if title_item.spine_position is None and title_item.media_type == 'application/xhtml+xml':
-                self.oeb_book.spine.insert(0, title_item, True)
-        # Create xhtml page to reference cover image so it can be used.
-        if not title_name and self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
-            id = unicode(self.oeb_book.metadata.cover[0])
-            cover_item = self.oeb_book.manifest.ids[id]
-            if cover_item.media_type in OEB_RASTER_IMAGES:
-                self.insert_image_cover(cover_item.href)
-        
+
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to FictionBook2 XML' % item.href)
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
-            
+
            # Start a <section> if we must sectionize each file or if the TOC references this page
            page_section_open = False
            if self.opts.sectionize == 'files' or self.toc.get(item.href) == 'page':
                text.append('<section>')
                page_section_open = True
                self.section_level += 1
-            
+
            text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
-            
+
            if page_section_open:
                text.append('</section>')
                self.section_level -= 1
-                
+
        # Close any open sections
        while self.section_level > 0:
            text.append('</section>')
@ -203,17 +221,6 @@ class FB2MLizer(object):

        return ''.join(text) + '</body>'

-    def insert_image_cover(self, image_href):
-        from calibre.ebooks.oeb.base import RECOVER_PARSER
-        try:
-            root = etree.fromstring(u'<html xmlns="%s"><body><img src="%s" /></body></html>' % (XHTML_NS, image_href), parser=RECOVER_PARSER)
-        except:
-            root = etree.fromstring(u'', parser=RECOVER_PARSER)
-        
-        id, href = self.oeb_book.manifest.generate('fb2_cover', 'fb2_cover.xhtml')
-        item = self.oeb_book.manifest.add(id, href, guess_type(href)[0], data=root)
-        self.oeb_book.spine.insert(0, item, True)
-
    def fb2mlize_images(self):
        '''
        This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
@ -345,7 +352,7 @@ class FB2MLizer(object):
                        self.toc[page.href] = None
                elif toc_entry and elem_tree.attrib.get('id', None):
                    newlevel = toc_entry.get(elem_tree.attrib.get('id', None), None)
-                    
+
                # Start a new section if necessary
                if newlevel:
                    if not (newlevel > self.section_level):
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@ -85,42 +85,42 @@ def get_metadata(stream):
    """ Return metadata as a L{MetaInfo} object """
    stream.seek(0)
    if stream.read(5) != r'{\rtf':
-        return MetaInformation(_('Unknown'), None)
+        return MetaInformation(_('Unknown'))
    block = get_document_info(stream)[0]
    if not block:
-        return MetaInformation(_('Unknown'), None)
+        return MetaInformation(_('Unknown'))

    stream.seek(0)
    cpg = detect_codepage(stream)
    stream.seek(0)
    
    title_match = title_pat.search(block)
-    if title_match:
+    if title_match is not None:
        title = decode(title_match.group(1).strip(), cpg)
    else:
        title = _('Unknown')
    author_match = author_pat.search(block)
-    if author_match:
+    if author_match is not None:
        author = decode(author_match.group(1).strip(), cpg)
    else:
        author = None
-    mi = MetaInformation(title, author)
+    mi = MetaInformation(title)
    if author:
        mi.authors = string_to_authors(author)
-    
+
    comment_match = comment_pat.search(block)
-    if comment_match:
+    if comment_match is not None:
        comment = decode(comment_match.group(1).strip(), cpg)
        mi.comments = comment
    tags_match = tags_pat.search(block)
-    if tags_match:
+    if tags_match is not None:
        tags = decode(tags_match.group(1).strip(), cpg)
        mi.tags = tags
    publisher_match = publisher_pat.search(block)
-    if publisher_match:
+    if publisher_match is not None:
        publisher = decode(publisher_match.group(1).strip(), cpg)
        mi.publisher = publisher
-    
+
    return mi

 def create_metadata(stream, options):
@ -149,7 +149,7 @@ def create_metadata(stream, options):
        md.append('}')
        stream.seek(0)
        src   = stream.read()
-        ans = src[:6] + ''.join(md) + src[6:]
+        ans = src[:6] + u''.join(md) + src[6:]
        stream.seek(0)
        stream.write(ans)

@ -197,7 +197,7 @@ def set_metadata(stream, options):
        tags = options.tags
        if tags is not None:
            tags =  ', '.join(tags)
-            tags = tags.encode('ascii', 'ignore')
+            tags = tags.encode('ascii', 'replace')
            pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL)
            if pat.search(src):
                src = pat.sub(r'{\\category ' + tags + r'}', src)
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -77,19 +77,16 @@ class RTFInput(InputFormatPlugin):

    def generate_xml(self, stream):
        from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
-        ofile = 'dataxml.xml' 
-        run_lev = 1
-        if hasattr(self.opts, 'debug_pipeline'):
+        ofile = 'dataxml.xml'
+        run_lev, debug_dir = 1, None
+        #just to check if the debug process is lauched, no need of this directory in fact
+        if getattr(self.opts, 'debug_pipeline', None) is not None:
            try:
+                os.mkdir('rtfdebug')
                debug_dir = 'rtfdebug'
-                os.mkdir(debug_dir)
                run_lev = 4
-            except OSError, ( errno, strerror ):
-                print strerror
-                print errno
-                debug_dir = None
-        else:
-            debug_dir = None
+            except:
+                pass
        parser = ParseRtf(
            in_file    = stream,
            out_file   = ofile,
@ -127,32 +124,38 @@ class RTFInput(InputFormatPlugin):

            # Write or do not write paragraphs. Default is 0.
            empty_paragraphs = 1,
-            
+
            #debug
            deb_dir = debug_dir,
            run_level = run_lev,
        )
        parser.parse_rtf()
-        ans = open('dataxml.xml').read()
-        return ans
+        with open(ofile, 'rb') as f:
+            return f.read()

    def extract_images(self, picts):
+        import imghdr
        self.log('Extracting images...')
-        
-        raw = open(picts, 'rb').read()
+
+        with open(picts, 'rb') as f:
+            raw = f.read()
        picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
-        hex = re.compile(r'[^a-zA-Z0-9]')
+        hex = re.compile(r'[^a-fA-F0-9]')
        encs = [hex.sub('', pict) for pict in picts]
-        
+
        count = 0
        imap = {}
        for enc in encs:
            if len(enc) % 2 == 1:
                enc = enc[:-1]
            data = enc.decode('hex')
+            fmt = imghdr.what(None, data)
+            if fmt is None:
+                fmt = 'wmf'
            count += 1
-            name = '%04d.wmf' % count
-            open(name, 'wb').write(data)
+            name = '%04d.%s' % (count, fmt)
+            with open(name, 'wb') as f:
+                f.write(data)
            imap[count] = name
            #open(name+'.hex', 'wb').write(enc)
        return self.convert_images(imap)
@ -183,6 +186,7 @@ class RTFInput(InputFormatPlugin):
        # return self.convert_images(imap)

    def convert_images(self, imap):
+        self.default_img = None
        for count, val in imap.iteritems():
            try:
                imap[count] = self.convert_image(val)
@ -191,11 +195,35 @@ class RTFInput(InputFormatPlugin):
        return imap

    def convert_image(self, name):
-        from calibre.utils.magick import Image
-        img = Image()
-        img.open(name)
+        if not name.endswith('.wmf'):
+            return name
+        try:
+            return self.rasterize_wmf(name)
+        except:
+            self.log.exception('Failed to convert WMF image %r'%name)
+        return self.replace_wmf(name)
+
+    def replace_wmf(self, name):
+        from calibre.ebooks import calibre_cover
+        if self.default_img is None:
+            self.default_img = calibre_cover('Conversion of WMF images is not supported',
+            'Use Microsoft Word or OpenOffice to save this RTF file'
+            ' as HTML and convert that in calibre.', title_size=36,
+            author_size=20)
        name = name.replace('.wmf', '.jpg')
-        img.save(name)
+        with open(name, 'wb') as f:
+            f.write(self.default_img)
+        return name
+
+    def rasterize_wmf(self, name):
+        raise ValueError('Conversion of WMF images not supported')
+        from calibre.utils.wmf import extract_raster_image
+        with open(name, 'rb') as f:
+            data = f.read()
+        data = extract_raster_image(data)
+        name = name.replace('.wmf', '.jpg')
+        with open(name, 'wb') as f:
+            f.write(data)
        return name


@ -285,6 +313,7 @@ class RTFInput(InputFormatPlugin):
        try:
            xml = self.generate_xml(stream.name)
        except RtfInvalidCodeException, e:
+            raise
            raise ValueError(_('This RTF file has a feature calibre does not '
            'support. Convert it to HTML first and then try it.\n%s')%e)

--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -107,7 +107,7 @@ class ParseRtf:
                no_dtd = 0,
                char_data = '',
                ):
-                
+
        """
        Requires:
        'file' --file to parse
@ -124,7 +124,7 @@ class ParseRtf:
            through a file. Only for debugging.
        Returns: Nothing
        """
-        
+
        self.__file = in_file
        self.__out_file = out_file
        self.__out_dir = out_dir
@ -155,12 +155,12 @@ class ParseRtf:
        if hasattr(the_file, 'read'): return
        if the_file == None:
            if type == "file_to_parse":
-                msg = _("\nYou must provide a file for the script to work")
+                msg = "\nYou must provide a file for the script to work"
            raise RtfInvalidCodeException, msg
        elif os.path.exists(the_file):
            pass # do nothing
        else:
-            msg = _("\nThe file '%s' cannot be found") % the_file
+            msg = "\nThe file '%s' cannot be found" % the_file
            raise RtfInvalidCodeException, msg

    def __check_dir(self, the_dir):
@ -169,7 +169,7 @@ class ParseRtf:
            return
        dir_exists = os.path.isdir(the_dir)
        if not dir_exists:
-            msg = _("\n%s is not a directory") % the_dir
+            msg = "\n%s is not a directory" % the_dir
            raise RtfInvalidCodeException, msg
        return 1

@ -247,7 +247,7 @@ class ParseRtf:
            if check_encoding_obj.check_encoding(self.__file, enc):
                file_name = self.__file if isinstance(self.__file, str) \
                                    else self.__file.encode('utf-8')
-                msg = _('File %s does not appear to be correctly encoded.\n') % file_name 
+                msg = 'File %s does not appear to be correctly encoded.\n' % file_name
                raise InvalidRtfException, msg
        delete_info_obj = delete_info.DeleteInfo(
            in_file = self.__temp_file,
@ -542,7 +542,7 @@ class ParseRtf:
                pass
                #sys.stderr.write( msg + ' in ' + file_name + "\n")
            else:
-                msg = _('%s in file %s\n') % (msg, file_name)
+                msg = '%s in file %s\n' % (msg, file_name)
                raise RtfInvalidCodeException, msg

    def __return_code(self, num):
@ -558,4 +558,4 @@ class ParseRtf:
        with open(write_file, 'wb') as write_obj:
            for line in read_obj:
                write_obj.write(line)
-        return write_file
+        return write_file
--- a/src/calibre/ebooks/rtf2xml/check_brackets.py
+++ b/src/calibre/ebooks/rtf2xml/check_brackets.py
@ -54,7 +54,7 @@ class CheckBrackets:
                        return (False, "closed bracket doesn't match, line %s" % line_count)

        if self.__bracket_count != 0:
-            msg = _('At end of file open and closed brackets don\'t match\n' \
+            msg = ('At end of file open and closed brackets don\'t match\n' \
                        'total number of brackets is %s') % self.__bracket_count
            return (False, msg)
-        return (True, _("Brackets match!"))
+        return (True, "Brackets match!")
--- a/src/calibre/ebooks/rtf2xml/check_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/check_encoding.py
@ -13,10 +13,10 @@ class CheckEncoding:
            try:
                char.decode(encoding)
            except UnicodeError, msg:
-                sys.stderr.write(_('line: %s char: %s\n') %  (line_num, char_position))
+                sys.stderr.write('line: %s char: %s\n' %  (line_num, char_position))
                sys.stderr.write(str(msg) + '\n')

-    def check_encoding(self, path, encoding='us-ascii', verbose = True):
+    def check_encoding(self, path, encoding='us-ascii', verbose=True):
        line_num = 0
        with open(path, 'r') as read_obj:
            for line in read_obj:
@ -28,7 +28,7 @@ class CheckEncoding:
                        if len(line) < 1000:
                            self.__get_position_error(line, encoding, line_num)
                        else:
-                            sys.stderr.write(_('line: %d has bad encoding\n') % line_num)
+                            sys.stderr.write('line: %d has bad encoding\n' % line_num)
                    return True
        return False

--- a/src/calibre/ebooks/rtf2xml/combine_borders.py
+++ b/src/calibre/ebooks/rtf2xml/combine_borders.py
@ -78,14 +78,14 @@ class CombineBorders:
            self.add_to_border_desc(line)

    def combine_borders(self):
-        with open(self.__file, 'r') as read_obj, \
-                open(self.__write_to, 'w') as write_obj:
-           for line in read_obj:
-                self.__first_five = line[0:5]
-                if self.__state == 'border':
-                    self.__border_func(line, write_obj)
-                else:
-                    write_obj.write(self.__default_func(line))
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as write_obj:
+                for line in read_obj:
+                    self.__first_five = line[0:5]
+                    if self.__state == 'border':
+                        self.__border_func(line, write_obj)
+                    else:
+                        write_obj.write(self.__default_func(line))
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "combine_borders.data")
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@ -1,4 +1,4 @@
-import os, tempfile
+import os, tempfile, sys

 from calibre.ebooks.rtf2xml import copy, check_encoding

@ -208,15 +208,16 @@ class ConvertToTags:
        """
        #keep maximum compatibility with previous version
        check_encoding_obj = check_encoding.CheckEncoding(
-                    bug_handler = self.__bug_handler,
-                        )
-        if not check_encoding_obj.check_encoding(self.__file, verbose = False):
+                    bug_handler=self.__bug_handler)
+
+        if not check_encoding_obj.check_encoding(self.__file, verbose=False):
            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
        elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
            self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
        else:
            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
-            sys.stderr.write(_('Bad RTF encoding, revert to US-ASCII chars and hope for the best'))
+            sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
+                    ' hope for the best')
        self.__new_line = 0
        self.__write_new_line()
        if self.__no_dtd:
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@ -3,7 +3,6 @@
 #   copyright 2002 Paul Henry Tremblay                                  #
 #                                                                       #
 #########################################################################
-
 '''
 Codepages as to RTF 1.9.1:
    437	United States IBM
@ -79,7 +78,7 @@ class DefaultEncoding:
        else:
            code_page = 'ansicpg' + self.__code_page
        return self.__platform, code_page, self.__default_num
-    
+
    def get_codepage(self):
        if not self.__datafetched:
            self._encoding()
@ -91,7 +90,7 @@ class DefaultEncoding:
            self._encoding()
            self.__datafetched = True
        return self.__platform
-    
+
    def _encoding(self):
        with open(self.__file, 'r') as read_obj:
            if not self.__fetchraw:
--- a/src/calibre/ebooks/rtf2xml/delete_info.py
+++ b/src/calibre/ebooks/rtf2xml/delete_info.py
@ -128,7 +128,7 @@ class DeleteInfo:
                # not sure what happens here!
                # believe I have a '{\*}
                if self.__run_level > 3:
-                    msg = _('flag problem\n')
+                    msg = 'flag problem\n'
                    raise self.__bug_handler, msg
                return True
        elif self.__token_info in self.__allowable :
@ -144,14 +144,14 @@ class DeleteInfo:
            self.__found_list_func(line)
        elif self.__token_info in self.__not_allowable:
            if not self.__ob:
-                self.__write_cb = False
+                self.__write_cb = True
            self.__ob = 0
            self.__state = 'delete'
            self.__cb_count = 0
            return False
        else:
            if self.__run_level > 5:
-                msg = _('After an asterisk, and found neither an allowable or non-allowable token\n\
+                msg = ('After an asterisk, and found neither an allowable or non-allowable token\n\
                            token is "%s"\n') % self.__token_info
                raise self.__bug_handler, msg
            if not self.__ob:
@ -187,32 +187,31 @@ class DeleteInfo:

    def delete_info(self):
        """Main method for handling other methods. Read one line in at
-        a time, and determine wheter to print the line based on the state."""
-        with open(self.__file, 'r') as read_obj, \
-            open(self.__write_to, 'w') as self.__write_obj:
-            for line in read_obj:
-                #ob<nu<open-brack<0001
-                to_print = True
-                self.__token_info = line[:16]
-                if self.__token_info == 'ob<nu<open-brack':
-                    self.__ob_count = line[-5:-1]
-                if self.__token_info == 'cb<nu<clos-brack':
-                    self.__cb_count = line[-5:-1]
-                action = self.__state_dict.get(self.__state)
-                if not action:
-                    sys.stderr.write(_('No action in dictionary state is "%s" \n')
-                            % self.__state)
-                to_print = action(line)
-                # if self.__after_asterisk:
-                    # to_print = self.__asterisk_func(line)
-                # elif self.__list:
-                    # self.__in_list_func(line)
-                # elif self.__delete:
-                    # to_print = self.__delete_func(line)
-                # else:
-                    # to_print = self.__default_func(line)
-                if to_print:
-                    self.__write_obj.write(line)
+        a time, and determine whether to print the line based on the state."""
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                for line in read_obj:
+                    #ob<nu<open-brack<0001
+                    to_print = True
+                    self.__token_info = line[:16]
+                    if self.__token_info == 'ob<nu<open-brack':
+                        self.__ob_count = line[-5:-1]
+                    if self.__token_info == 'cb<nu<clos-brack':
+                        self.__cb_count = line[-5:-1]
+                    action = self.__state_dict.get(self.__state)
+                    if not action:
+                        sys.stderr.write('No action in dictionary state is "%s" \n' % self.__state)
+                    to_print = action(line)
+                    # if self.__after_asterisk:
+                        # to_print = self.__asterisk_func(line)
+                    # elif self.__list:
+                        # self.__in_list_func(line)
+                    # elif self.__delete:
+                        # to_print = self.__delete_func(line)
+                    # else:
+                        # to_print = self.__default_func(line)
+                    if to_print:
+                        self.__write_obj.write(line)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "delete_info.data")
--- a/src/calibre/ebooks/rtf2xml/footnote.py
+++ b/src/calibre/ebooks/rtf2xml/footnote.py
@ -120,35 +120,35 @@ class Footnote:
        """
        self.__initiate_sep_values()
        self.__footnote_holder = tempfile.mktemp()
-        with open(self.__file) as read_obj, \
-            open(self.__write_to, 'w') as self.__write_obj, \
-                open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
-            for line in read_obj:
-                self.__token_info = line[:16]
-                # keep track of opening and closing brackets
-                if self.__token_info == 'ob<nu<open-brack':
-                    self.__ob_count = line[-5:-1]
-                if self.__token_info == 'cb<nu<clos-brack':
-                    self.__cb_count = line[-5:-1]
-                # In the middle of footnote text
-                if self.__in_footnote:
-                    self.__in_footnote_func(line)
-                # not in the middle of footnote text
-                else:
-                    self.__default_sep(line)
-        with open(self.__footnote_holder, 'r') as read_obj, \
-                open(self.__write_to, 'a') as write_obj:
-            write_obj.write(
-                'mi<mk<sect-close\n'
-                'mi<mk<body-close\n'
-                'mi<tg<close_____<section\n'
-                'mi<tg<close_____<body\n'
-                'mi<tg<close_____<doc\n'
-                'mi<mk<footnt-beg\n')
-            for line in read_obj:
-                write_obj.write(line)
-            write_obj.write(
-            'mi<mk<footnt-end\n')
+        with open(self.__file) as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
+                    for line in read_obj:
+                        self.__token_info = line[:16]
+                        # keep track of opening and closing brackets
+                        if self.__token_info == 'ob<nu<open-brack':
+                            self.__ob_count = line[-5:-1]
+                        if self.__token_info == 'cb<nu<clos-brack':
+                            self.__cb_count = line[-5:-1]
+                        # In the middle of footnote text
+                        if self.__in_footnote:
+                            self.__in_footnote_func(line)
+                        # not in the middle of footnote text
+                        else:
+                            self.__default_sep(line)
+        with open(self.__footnote_holder, 'r') as read_obj:
+            with open(self.__write_to, 'a') as write_obj:
+                write_obj.write(
+                    'mi<mk<sect-close\n'
+                    'mi<mk<body-close\n'
+                    'mi<tg<close_____<section\n'
+                    'mi<tg<close_____<body\n'
+                    'mi<tg<close_____<doc\n'
+                    'mi<mk<footnt-beg\n')
+                for line in read_obj:
+                    write_obj.write(line)
+                write_obj.write(
+                'mi<mk<footnt-end\n')
        os.remove(self.__footnote_holder)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
@ -190,15 +190,15 @@ class Footnote:
        These two functions do the work of separating the footnotes form the
        body.
        """
-        with open(self.__file) as read_obj, \
-            open(self.__write_to, 'w') as self.__write_obj, \
-                open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
-            for line in read_obj:
-                self.__token_info = line[:16]
-                if self.__state == 'body':
-                    self.__get_foot_body_func(line)
-                elif self.__state == 'foot':
-                    self.__get_foot_foot_func(line)
+        with open(self.__file) as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
+                    for line in read_obj:
+                        self.__token_info = line[:16]
+                        if self.__state == 'body':
+                            self.__get_foot_body_func(line)
+                        elif self.__state == 'foot':
+                            self.__get_foot_foot_func(line)

    def __get_foot_from_temp(self, num):
        """
@ -228,13 +228,13 @@ class Footnote:
        print out to the third file.
        If no footnote marker is found, simply print out the token (line).
        """
-        with open(self.__footnote_holder, 'r') as self.__read_from_foot_obj, \
-            open(self.__write_to, 'r') as read_obj, \
-                open(self.__write_to2, 'w') as self.__write_obj:
-            for line in read_obj:
-                if line[:16] == 'mi<mk<footnt-ind':
-                    line = self.__get_foot_from_temp(line[17:-1])
-                self.__write_obj.write(line)
+        with open(self.__footnote_holder, 'r') as self.__read_from_foot_obj:
+            with open(self.__write_to, 'r') as read_obj:
+                with open(self.__write_to2, 'w') as self.__write_obj:
+                    for line in read_obj:
+                        if line[:16] == 'mi<mk<footnt-ind':
+                            line = self.__get_foot_from_temp(line[17:-1])
+                        self.__write_obj.write(line)

    def join_footnotes(self):
        """
--- a/src/calibre/ebooks/rtf2xml/get_char_map.py
+++ b/src/calibre/ebooks/rtf2xml/get_char_map.py
@ -43,6 +43,8 @@ class GetCharMap:
    def get_char_map(self, map):
        if map == 'ansicpg0':
            map = 'ansicpg1250'
+        if map in ('ansicpg10000', '10000'):
+            map = 'mac_roman'
        found_map = False
        map_dict = {}
        self.__char_file.seek(0)
@ -59,10 +61,10 @@ class GetCharMap:
                fields = line.split(':')
                fields[1].replace('\\colon', ':')
                map_dict[fields[1]] = fields[3]
-            
-        
+
+
        if not found_map:
-            msg = _('no map found\nmap is "%s"\n') %(map,)
+            msg = 'no map found\nmap is "%s"\n'%(map,)
            raise self.__bug_handler, msg
        return map_dict

--- a/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
+++ b/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
@ -16,8 +16,10 @@
 #                                                                       #
 #########################################################################
 import sys, os, tempfile, cStringIO
+
 from calibre.ebooks.rtf2xml import get_char_map, copy
 from calibre.ebooks.rtf2xml.char_set import char_set
+
 class Hex2Utf8:
    """
    Convert Microsoft hexidecimal numbers to utf-8
@ -265,7 +267,7 @@ class Hex2Utf8:
                    # msg = 'no dictionary entry for %s\n'
                    # msg += 'the hexidecimal num is "%s"\n' % (hex_num)
                    # msg += 'dictionary is %s\n' % self.__current_dict_name
-                    msg = _('Character "&#x%s;" does not appear to be valid (or is a control character)\n') % token
+                    msg = 'Character "&#x%s;" does not appear to be valid (or is a control character)\n' % token
                    raise self.__bug_handler, msg

    def __found_body_func(self, line):
@ -293,7 +295,7 @@ class Hex2Utf8:
                self.__token_info = line[:16]
                action = self.__preamble_state_dict.get(self.__state)
                if action is None:
-                    sys.stderr.write(_('error no state found in hex_2_utf8'),
+                    sys.stderr.write('error no state found in hex_2_utf8',
                    self.__state
                    )
                action(line)
@ -553,7 +555,7 @@ class Hex2Utf8:
                self.__token_info = line[:16]
                action = self.__body_state_dict.get(self.__state)
                if action is None:
-                    sys.stderr.write(_('error no state found in hex_2_utf8'),
+                    sys.stderr.write('error no state found in hex_2_utf8',
                    self.__state
                    )
                action(line)
--- a/src/calibre/ebooks/rtf2xml/inline.py
+++ b/src/calibre/ebooks/rtf2xml/inline.py
@ -297,7 +297,7 @@ class Inline:
            inline_list = self.__inline_list[last_index:]
            if len(inline_list) <= 0:
                if self.__run_level > 3:
-                    msg = _('self.__inline_list is %s\n') % self.__inline_list
+                    msg = 'self.__inline_list is %s\n' % self.__inline_list
                    raise self.__bug_handler, msg
                self.__write_obj.write('error\n')
                self.__groups_in_waiting[0] = 0
@ -393,27 +393,27 @@ class Inline:
            the state.
        """
        self.__initiate_values()
-        with open(self.__file, 'r') as read_obj, \
-            open(self.__write_to, 'w') as self.__write_obj: 
-            for line in read_obj:
-                token = line[0:-1]
-                self.__token_info = ''
-                if token == 'tx<mc<__________<rdblquote'\
-                    or token == 'tx<mc<__________<ldblquote'\
-                    or token == 'tx<mc<__________<lquote'\
-                    or token == 'tx<mc<__________<rquote'\
-                    or token == 'tx<mc<__________<emdash'\
-                    or token == 'tx<mc<__________<endash'\
-                    or token == 'tx<mc<__________<bullet':
-                    self.__token_info = 'text'
-                else:
-                    self.__token_info = line[:16]
-                self.__set_list_func(line)
-                action = self.__state_dict.get(self.__state)
-                if action is None:
-                    sys.stderr.write(_('No matching state in module inline_for_lists.py\n'))
-                    sys.stderr.write(self.__state + '\n')
-                action(line)
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                for line in read_obj:
+                    token = line[0:-1]
+                    self.__token_info = ''
+                    if token == 'tx<mc<__________<rdblquote'\
+                        or token == 'tx<mc<__________<ldblquote'\
+                        or token == 'tx<mc<__________<lquote'\
+                        or token == 'tx<mc<__________<rquote'\
+                        or token == 'tx<mc<__________<emdash'\
+                        or token == 'tx<mc<__________<endash'\
+                        or token == 'tx<mc<__________<bullet':
+                        self.__token_info = 'text'
+                    else:
+                        self.__token_info = line[:16]
+                    self.__set_list_func(line)
+                    action = self.__state_dict.get(self.__state)
+                    if action is None:
+                        sys.stderr.write('No matching state in module inline_for_lists.py\n')
+                        sys.stderr.write(self.__state + '\n')
+                    action(line)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "inline.data")
--- a/src/calibre/ebooks/rtf2xml/line_endings.py
+++ b/src/calibre/ebooks/rtf2xml/line_endings.py
@ -15,7 +15,7 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import os, tempfile, re
+import os, tempfile

 from calibre.ebooks.rtf2xml import copy
 from calibre.utils.cleantext import clean_ascii_chars
--- a/src/calibre/ebooks/rtf2xml/pict.py
+++ b/src/calibre/ebooks/rtf2xml/pict.py
@ -77,11 +77,11 @@ class Pict:
            try:
                os.mkdir(self.__dir_name)
            except OSError, msg:
-                msg = _("%sCouldn't make directory '%s':\n") % (str(msg), self.__dir_name)
+                msg = "%sCouldn't make directory '%s':\n" % (str(msg), self.__dir_name)
                raise self.__bug_handler
        else:
            if self.__run_level > 1:
-                sys.stderr.write(_('Removing files from old pict directory...\n'))
+                sys.stderr.write('Removing files from old pict directory...\n')
            all_files = os.listdir(self.__dir_name)
            for the_file in all_files:
                the_file = os.path.join(self.__dir_name, the_file)
@ -90,7 +90,7 @@ class Pict:
                except OSError:
                    pass
            if self.__run_level > 1:
-                sys.stderr.write(_('Files removed.\n'))
+                sys.stderr.write('Files removed.\n')

    def __create_pict_file(self):
        """Create a file for all the pict data to be written to.
@ -146,25 +146,25 @@ class Pict:

    def process_pict(self):
        self.__make_dir()
-        with open(self.__file) as read_obj, \
-            open(self.__write_to, 'w') as write_obj:
-            for line in read_obj:
-                self.__token_info = line[:16]
-                if self.__token_info == 'ob<nu<open-brack':
-                    self.__ob_count = line[-5:-1]
-                if self.__token_info == 'cb<nu<clos-brack':
-                    self.__cb_count = line[-5:-1]
-                if not self.__in_pict:
-                    to_print = self.__default(line, write_obj)
-                    if to_print :
-                        write_obj.write(line)
-                else:
-                    to_print = self.__in_pict_func(line)
-                    if to_print :
-                        write_obj.write(line)
-            if self.__already_found_pict:
-                self.__write_pic_obj.write("}\n")
-                self.__write_pic_obj.close()
+        with open(self.__file) as read_obj:
+            with open(self.__write_to, 'w') as write_obj:
+                for line in read_obj:
+                    self.__token_info = line[:16]
+                    if self.__token_info == 'ob<nu<open-brack':
+                        self.__ob_count = line[-5:-1]
+                    if self.__token_info == 'cb<nu<clos-brack':
+                        self.__cb_count = line[-5:-1]
+                    if not self.__in_pict:
+                        to_print = self.__default(line, write_obj)
+                        if to_print :
+                            write_obj.write(line)
+                    else:
+                        to_print = self.__in_pict_func(line)
+                        if to_print :
+                            write_obj.write(line)
+                if self.__already_found_pict:
+                    self.__write_pic_obj.write("}\n")
+                    self.__write_pic_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "pict.data")
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@ -622,12 +622,12 @@ class ProcessTokens:
                num = int(num)
            except ValueError:
                if self.__run_level > 3:
-                    msg = _('Number "%s" cannot be converted to integer\n') % num
+                    msg = 'Number "%s" cannot be converted to integer\n' % num
                    raise self.__bug_handler, msg
            type = self.__number_type_dict.get(num)
            if type is None:
                if self.__run_level > 3:
-                    msg = _('No type for "%s" in self.__number_type_dict\n')
+                    msg = 'No type for "%s" in self.__number_type_dict\n'
                    raise self.__bug_handler
                type = 'Arabic'
        return 'cw<%s<%s<nu<%s\n' % (pre, token, type)
@ -637,7 +637,7 @@ class ProcessTokens:
        if not lang_name:
            lang_name = "not defined"
            if self.__run_level > 3:
-                msg = _('No entry for number "%s"') % num
+                msg = 'No entry for number "%s"' % num
                raise self.__bug_handler, msg
        return 'cw<%s<%s<nu<%s\n' % (pre, token, lang_name)

@ -689,7 +689,7 @@ class ProcessTokens:
            return 'cw<%s<%s<nu<false\n' % (pre, token)
                ##return 'cw<nu<nu<nu<%s>false<%s\n' % (token, token)
        else:
-            msg = _("boolean should have some value module process tokens\ntoken is %s\n'%s'\n") % (token, num)
+            msg = "boolean should have some value module process tokens\ntoken is %s\n'%s'\n" % (token, num)
            raise self.__bug_handler, msg

    def __no_sup_sub_func(self, pre, token, num):
@ -703,7 +703,7 @@ class ProcessTokens:
            numerator = float(re.search('[0-9.\-]+', numerator).group())
        except TypeError, msg:
            if self.__run_level > 3:
-                msg = _('No number to process?\nthis indicates that the token \(\\li\) \
+                msg = ('No number to process?\nthis indicates that the token \(\\li\) \
                should have a number and does not\nnumerator is \
                "%s"\ndenominator is "%s"\n') % (numerator, denominator)
                raise self.__bug_handler, msg
@ -724,12 +724,12 @@ class ProcessTokens:
            second = match_obj.group(2)
            if not second:
                if self.__run_level > 3:
-                    msg = _("token is '%s' \n") % token
+                    msg = "token is '%s' \n" % token
                    raise self.__bug_handler, msg
                return first, 0
        else:
            if self.__run_level > 3:
-                msg = _("token is '%s' \n") % token
+                msg = "token is '%s' \n" % token
                raise self.__bug_handler
            return token, 0
        return first, second
@ -758,7 +758,7 @@ class ProcessTokens:
        pre, token, action = self.dict_token.get(token, (None, None, None))
        if action:
            return action(pre, token, num)
-    
+
    def __check_brackets(self, in_file):
        self.__check_brack_obj = check_brackets.CheckBrackets\
            (file = in_file)
@ -769,53 +769,54 @@ class ProcessTokens:
    def process_tokens(self):
        """Main method for handling other methods. """
        line_count = 0
-        with open(self.__file, 'r') as read_obj, open(self.__write_to, 'wb') as write_obj:
-            for line in read_obj:
-                token = line.replace("\n","")
-                line_count += 1
-                if line_count == 1 and token != '\\{':
-                        msg = _('Invalid RTF: document doesn\'t start with {\n')
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'wb') as write_obj:
+                for line in read_obj:
+                    token = line.replace("\n","")
+                    line_count += 1
+                    if line_count == 1 and token != '\\{':
+                            msg = 'Invalid RTF: document doesn\'t start with {\n'
+                            raise self.__exception_handler, msg
+                    elif line_count == 2 and token[0:4] != '\\rtf':
+                            msg = 'Invalid RTF: document doesn\'t start with \\rtf \n'
+                            raise self.__exception_handler, msg
+
+                    the_index = token.find('\\ ')
+                    if token is not None and  the_index > -1:
+                        msg = 'Invalid RTF: token "\\ " not valid.\n'
                        raise self.__exception_handler, msg
-                elif line_count == 2 and token[0:4] != '\\rtf':
-                        msg =_('Invalid RTF: document doesn\'t start with \\rtf \n')
-                        raise self.__exception_handler, msg
-                
-                the_index = token.find('\\ ')
-                if token is not None and  the_index > -1:
-                    msg =_('Invalid RTF: token "\\ " not valid.\n')
-                    raise self.__exception_handler, msg
-                elif token[:1] == "\\":
-                    try:
-                        token.decode('us-ascii')
-                    except UnicodeError, msg:
-                        msg = _('Invalid RTF: Tokens not ascii encoded.\n%s') % str(msg)
-                        raise self.__exception_handler, msg
-                    line = self.process_cw(token)
-                    if line is not None:
-                        write_obj.write(line)
-                else:
-                    fields = re.split(self.__utf_exp, token)
-                    for field in fields:
-                        if not field:
-                            continue
-                        if field[0:1] == '&':
-                            write_obj.write('tx<ut<__________<%s\n' % field)
-                        else:
-                            write_obj.write('tx<nu<__________<%s\n' % field)
+                    elif token[:1] == "\\":
+                        try:
+                            token.decode('us-ascii')
+                        except UnicodeError, msg:
+                            msg = 'Invalid RTF: Tokens not ascii encoded.\n%s' % str(msg)
+                            raise self.__exception_handler, msg
+                        line = self.process_cw(token)
+                        if line is not None:
+                            write_obj.write(line)
+                    else:
+                        fields = re.split(self.__utf_exp, token)
+                        for field in fields:
+                            if not field:
+                                continue
+                            if field[0:1] == '&':
+                                write_obj.write('tx<ut<__________<%s\n' % field)
+                            else:
+                                write_obj.write('tx<nu<__________<%s\n' % field)

        if not line_count:
-            msg =_('Invalid RTF: file appears to be empty.\n')
+            msg = 'Invalid RTF: file appears to be empty.\n'
            raise self.__exception_handler, msg
-        
+
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "processed_tokens.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
-        
+
        bad_brackets = self.__check_brackets(self.__file)
        if bad_brackets:
-            msg = _('Invalid RTF: document does not have matching brackets.\n')
+            msg = 'Invalid RTF: document does not have matching brackets.\n'
            raise self.__exception_handler, msg
        else:
            return self.__return_code
--- a/src/calibre/ebooks/rtf2xml/replace_illegals.py
+++ b/src/calibre/ebooks/rtf2xml/replace_illegals.py
@ -37,10 +37,10 @@ class ReplaceIllegals:
    def replace_illegals(self):
        """
        """
-        with open(self.__file, 'r') as read_obj, \
-            open(self.__write_to, 'w') as write_obj:
-            for line in read_obj:
-                write_obj.write(clean_ascii_chars(line))
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as write_obj:
+                for line in read_obj:
+                    write_obj.write(clean_ascii_chars(line))
        copy_obj = copy.Copy()
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "replace_illegals.data")
--- a/src/calibre/ebooks/textile/init.py
+++ b/src/calibre/ebooks/textile/init.py
@ -0,0 +1,6 @@
+from functions import textile, textile_restricted, Textile
+
+if False:
+    textile, textile_restricted, Textile
+
+__all__ = ['textile', 'textile_restricted']
--- a/src/calibre/ebooks/textile/functions.py
+++ b/src/calibre/ebooks/textile/functions.py
@ -0,0 +1,981 @@
+#!/usr/bin/env python
+"""
+PyTextile
+
+A Humane Web Text Generator
+"""
+
+__version__ = '2.1.4'
+
+__date__ = '2009/12/04'
+
+__copyright__ = """
+Copyright (c) 2009, Jason Samsa, http://jsamsa.com/
+Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
+Copyright (c) 2003, Mark Pilgrim, http://diveintomark.org/
+
+Original PHP Version:
+Copyright (c) 2003-2004, Dean Allen <dean@textism.com>
+All rights reserved.
+
+Thanks to Carlo Zottmann <carlo@g-blog.net> for refactoring
+Textile's procedural code into a class framework
+
+Additions and fixes Copyright (c) 2006 Alex Shiels http://thresholdstate.com/
+
+"""
+
+__license__ = """
+L I C E N S E
+=============
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name Textile nor the names of its contributors may be used to
+  endorse or promote products derived from this software without specific
+  prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+"""
+
+import re
+import uuid
+from urlparse import urlparse
+
+def _normalize_newlines(string):
+    out = re.sub(r'\r\n', '\n', string)
+    out = re.sub(r'\n{3,}', '\n\n', out)
+    out = re.sub(r'\n\s*\n', '\n\n', out)
+    out = re.sub(r'"$', '" ', out)
+    return out
+
+def getimagesize(url):
+    """
+    Attempts to determine an image's width and height, and returns a string
+    suitable for use in an <img> tag, or None in case of failure.
+    Requires that PIL is installed.
+
+    >>> getimagesize("http://www.google.com/intl/en_ALL/images/logo.gif")
+    ... #doctest: +ELLIPSIS, +SKIP
+    'width="..." height="..."'
+
+    """
+
+    try:
+        import ImageFile
+        import urllib2
+    except ImportError:
+        return None
+
+    try:
+        p = ImageFile.Parser()
+        f = urllib2.urlopen(url)
+        while True:
+            s = f.read(1024)
+            if not s:
+                break
+            p.feed(s)
+            if p.image:
+                return 'width="%i" height="%i"' % p.image.size
+    except (IOError, ValueError):
+        return None
+
+class Textile(object):
+    hlgn = r'(?:\<(?!>)|(?<!<)\>|\<\>|\=|[()]+(?! ))'
+    vlgn = r'[\-^~]'
+    clas = r'(?:\([^)]+\))'
+    lnge = r'(?:\[[^\]]+\])'
+    styl = r'(?:\{[^}]+\})'
+    cspn = r'(?:\\\d+)'
+    rspn = r'(?:\/\d+)'
+    a = r'(?:%s|%s)*' % (hlgn, vlgn)
+    s = r'(?:%s|%s)*' % (cspn, rspn)
+    c = r'(?:%s)*' % '|'.join([clas, styl, lnge, hlgn])
+
+    pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
+    # urlch = r'[\w"$\-_.+!*\'(),";/?:@=&%#{}|\\^~\[\]`]'
+    urlch = '[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]'
+
+    url_schemes = ('http', 'https', 'ftp', 'mailto')
+
+    btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p')
+    btag_lite = ('bq', 'bc', 'p')
+
+    glyph_defaults = (
+        ('txt_quote_single_open',  '&#8216;'),
+        ('txt_quote_single_close', '&#8217;'),
+        ('txt_quote_double_open',  '&#8220;'),
+        ('txt_quote_double_close', '&#8221;'),
+        ('txt_apostrophe',         '&#8217;'),
+        ('txt_prime',              '&#8242;'),
+        ('txt_prime_double',       '&#8243;'),
+        ('txt_ellipsis',           '&#8230;'),
+        ('txt_emdash',             '&#8212;'),
+        ('txt_endash',             '&#8211;'),
+        ('txt_dimension',          '&#215;'),
+        ('txt_trademark',          '&#8482;'),
+        ('txt_registered',         '&#174;'),
+        ('txt_copyright',          '&#169;'),
+    )
+
+    def __init__(self, restricted=False, lite=False, noimage=False):
+        """docstring for __init__"""
+        self.restricted = restricted
+        self.lite = lite
+        self.noimage = noimage
+        self.get_sizes = False
+        self.fn = {}
+        self.urlrefs = {}
+        self.shelf = {}
+        self.rel = ''
+        self.html_type = 'xhtml'
+
+    def textile(self, text, rel=None, head_offset=0, html_type='xhtml'):
+        """
+        >>> import textile
+        >>> textile.textile('some textile')
+        u'\\t<p>some textile</p>'
+        """
+        self.html_type = html_type
+
+        # text = unicode(text)
+        text = _normalize_newlines(text)
+
+        if self.restricted:
+            text = self.encode_html(text, quotes=False)
+
+        if rel:
+            self.rel = ' rel="%s"' % rel
+
+        text = self.getRefs(text)
+
+        text = self.block(text, int(head_offset))
+
+        text = self.retrieve(text)
+
+        return text
+
+    def pba(self, input, element=None):
+        """
+        Parse block attributes.
+
+        >>> t = Textile()
+        >>> t.pba(r'\3')
+        ''
+        >>> t.pba(r'\\3', element='td')
+        ' colspan="3"'
+        >>> t.pba(r'/4', element='td')
+        ' rowspan="4"'
+        >>> t.pba(r'\\3/4', element='td')
+        ' colspan="3" rowspan="4"'
+
+        >>> t.vAlign('^')
+        'top'
+
+        >>> t.pba('^', element='td')
+        ' style="vertical-align:top;"'
+
+        >>> t.pba('{line-height:18px}')
+        ' style="line-height:18px;"'
+
+        >>> t.pba('(foo-bar)')
+        ' class="foo-bar"'
+
+        >>> t.pba('(#myid)')
+        ' id="myid"'
+
+        >>> t.pba('(foo-bar#myid)')
+        ' class="foo-bar" id="myid"'
+
+        >>> t.pba('((((')
+        ' style="padding-left:4em;"'
+
+        >>> t.pba(')))')
+        ' style="padding-right:3em;"'
+
+        >>> t.pba('[fr]')
+        ' lang="fr"'
+
+        """
+        style = []
+        aclass = ''
+        lang = ''
+        colspan = ''
+        rowspan = ''
+        id = ''
+
+        if not input:
+            return ''
+
+        matched = input
+        if element == 'td':
+            m = re.search(r'\\(\d+)', matched)
+            if m:
+                colspan = m.group(1)
+
+            m = re.search(r'/(\d+)', matched)
+            if m:
+                rowspan = m.group(1)
+
+        if element == 'td' or element == 'tr':
+            m = re.search(r'(%s)' % self.vlgn, matched)
+            if m:
+                style.append("vertical-align:%s;" % self.vAlign(m.group(1)))
+
+        m = re.search(r'\{([^}]*)\}', matched)
+        if m:
+            style.append(m.group(1).rstrip(';') + ';')
+            matched = matched.replace(m.group(0), '')
+
+        m = re.search(r'\[([^\]]+)\]', matched, re.U)
+        if m:
+            lang = m.group(1)
+            matched = matched.replace(m.group(0), '')
+
+        m = re.search(r'\(([^()]+)\)', matched, re.U)
+        if m:
+            aclass = m.group(1)
+            matched = matched.replace(m.group(0), '')
+
+        m = re.search(r'([(]+)', matched)
+        if m:
+            style.append("padding-left:%sem;" % len(m.group(1)))
+            matched = matched.replace(m.group(0), '')
+
+        m = re.search(r'([)]+)', matched)
+        if m:
+            style.append("padding-right:%sem;" % len(m.group(1)))
+            matched = matched.replace(m.group(0), '')
+
+        m = re.search(r'(%s)' % self.hlgn, matched)
+        if m:
+            style.append("text-align:%s;" % self.hAlign(m.group(1)))
+
+        m = re.search(r'^(.*)#(.*)$', aclass)
+        if m:
+            id = m.group(2)
+            aclass = m.group(1)
+
+        if self.restricted:
+            if lang:
+                return ' lang="%s"'
+            else:
+                return ''
+
+        result = []
+        if style:
+            result.append(' style="%s"' % "".join(style))
+        if aclass:
+            result.append(' class="%s"' % aclass)
+        if lang:
+            result.append(' lang="%s"' % lang)
+        if id:
+            result.append(' id="%s"' % id)
+        if colspan:
+            result.append(' colspan="%s"' % colspan)
+        if rowspan:
+            result.append(' rowspan="%s"' % rowspan)
+        return ''.join(result)
+
+    def hasRawText(self, text):
+        """
+        checks whether the text has text not already enclosed by a block tag
+
+        >>> t = Textile()
+        >>> t.hasRawText('<p>foo bar biz baz</p>')
+        False
+
+        >>> t.hasRawText(' why yes, yes it does')
+        True
+
+        """
+        r = re.compile(r'<(p|blockquote|div|form|table|ul|ol|pre|h\d)[^>]*?>.*</\1>', re.S).sub('', text.strip()).strip()
+        r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r)
+        return '' != r
+
+    def table(self, text):
+        r"""
+        >>> t = Textile()
+        >>> t.table('|one|two|three|\n|a|b|c|')
+        '\t<table>\n\t\t<tr>\n\t\t\t<td>one</td>\n\t\t\t<td>two</td>\n\t\t\t<td>three</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t</tr>\n\t</table>\n\n'
+        """
+        text = text + "\n\n"
+        pattern = re.compile(r'^(?:table(_?%(s)s%(a)s%(c)s)\. ?\n)?^(%(a)s%(c)s\.? ?\|.*\|)\n\n' % {'s':self.s, 'a':self.a, 'c':self.c}, re.S|re.M|re.U)
+        return pattern.sub(self.fTable, text)
+
+    def fTable(self, match):
+        tatts = self.pba(match.group(1), 'table')
+        rows = []
+        for row in [ x for x in match.group(2).split('\n') if x]:
+            rmtch = re.search(r'^(%s%s\. )(.*)' % (self.a, self.c), row.lstrip())
+            if rmtch:
+                ratts = self.pba(rmtch.group(1), 'tr')
+                row = rmtch.group(2)
+            else:
+                ratts = ''
+
+            cells = []
+            for cell in row.split('|')[1:-1]:
+                ctyp = 'd'
+                if re.search(r'^_', cell):
+                    ctyp = "h"
+                cmtch = re.search(r'^(_?%s%s%s\. )(.*)' % (self.s, self.a, self.c), cell)
+                if cmtch:
+                    catts = self.pba(cmtch.group(1), 'td')
+                    cell = cmtch.group(2)
+                else:
+                    catts = ''
+
+                cell = self.graf(self.span(cell))
+                cells.append('\t\t\t<t%s%s>%s</t%s>' % (ctyp, catts, cell, ctyp))
+            rows.append("\t\t<tr%s>\n%s\n\t\t</tr>" % (ratts, '\n'.join(cells)))
+            cells = []
+            catts = None
+        return "\t<table%s>\n%s\n\t</table>\n\n" % (tatts, '\n'.join(rows))
+
+    def lists(self, text):
+        """
+        >>> t = Textile()
+        >>> t.lists("* one\\n* two\\n* three")
+        '\\t<ul>\\n\\t\\t<li>one</li>\\n\\t\\t<li>two</li>\\n\\t\\t<li>three</li>\\n\\t</ul>'
+        """
+        pattern = re.compile(r'^([#*]+%s .*)$(?![^#*])' % self.c, re.U|re.M|re.S)
+        return pattern.sub(self.fList, text)
+
+    def fList(self, match):
+        text = match.group(0).split("\n")
+        result = []
+        lists = []
+        for i, line in enumerate(text):
+            try:
+                nextline = text[i+1]
+            except IndexError:
+                nextline = ''
+
+            m = re.search(r"^([#*]+)(%s%s) (.*)$" % (self.a, self.c), line, re.S)
+            if m:
+                tl, atts, content = m.groups()
+                nl = ''
+                nm = re.search(r'^([#*]+)\s.*', nextline)
+                if nm:
+                    nl = nm.group(1)
+                if tl not in lists:
+                    lists.append(tl)
+                    atts = self.pba(atts)
+                    line = "\t<%sl%s>\n\t\t<li>%s" % (self.lT(tl), atts, self.graf(content))
+                else:
+                    line = "\t\t<li>" + self.graf(content)
+
+                if len(nl) <= len(tl):
+                    line = line + "</li>"
+                for k in reversed(lists):
+                    if len(k) > len(nl):
+                        line = line + "\n\t</%sl>" % self.lT(k)
+                        if len(k) > 1:
+                            line = line + "</li>"
+                        lists.remove(k)
+
+            result.append(line)
+        return "\n".join(result)
+
+    def lT(self, input):
+        if re.search(r'^#+', input):
+            return 'o'
+        else:
+            return 'u'
+
+    def doPBr(self, in_):
+        return re.compile(r'<(p)([^>]*?)>(.*)(</\1>)', re.S).sub(self.doBr, in_)
+
+    def doBr(self, match):
+        if self.html_type == 'html':
+            content = re.sub(r'(.+)(?:(?<!<br>)|(?<!<br />))\n(?![#*\s|])', '\\1<br>', match.group(3))
+        else:
+            content = re.sub(r'(.+)(?:(?<!<br>)|(?<!<br />))\n(?![#*\s|])', '\\1<br />', match.group(3))
+        return '<%s%s>%s%s' % (match.group(1), match.group(2), content, match.group(4))
+
+    def block(self, text, head_offset = 0):
+        """
+        >>> t = Textile()
+        >>> t.block('h1. foobar baby')
+        '\\t<h1>foobar baby</h1>'
+        """
+        if not self.lite:
+            tre = '|'.join(self.btag)
+        else:
+            tre = '|'.join(self.btag_lite)
+        text = text.split('\n\n')
+
+        tag = 'p'
+        atts = cite = graf = ext = c1 = ''
+
+        out = []
+
+        anon = False
+        for line in text:
+            pattern = r'^(%s)(%s%s)\.(\.?)(?::(\S+))? (.*)$' % (tre, self.a, self.c)
+            match = re.search(pattern, line, re.S)
+            if match:
+                if ext:
+                    out.append(out.pop() + c1)
+
+                tag, atts, ext, cite, graf = match.groups()
+                h_match = re.search(r'h([1-6])', tag)
+                if h_match:
+                    head_level, = h_match.groups()
+                    tag = 'h%i' % max(1,
+                                      min(int(head_level) + head_offset,
+                                          6))
+                o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext,
+                                                      cite, graf)
+                # leave off c1 if this block is extended,
+                # we'll close it at the start of the next block
+
+                if ext:
+                    line = "%s%s%s%s" % (o1, o2, content, c2)
+                else:
+                    line = "%s%s%s%s%s" % (o1, o2, content, c2, c1)
+
+            else:
+                anon = True
+                if ext or not re.search(r'^\s', line):
+                    o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext,
+                                                          cite, line)
+                    # skip $o1/$c1 because this is part of a continuing
+                    # extended block
+                    if tag == 'p' and not self.hasRawText(content):
+                        line = content
+                    else:
+                        line = "%s%s%s" % (o2, content, c2)
+                else:
+                    line = self.graf(line)
+
+            line = self.doPBr(line)
+            if self.html_type == 'xhtml':
+                line = re.sub(r'<br>', '<br />', line)
+
+            if ext and anon:
+                out.append(out.pop() + "\n" + line)
+            else:
+                out.append(line)
+
+            if not ext:
+                tag = 'p'
+                atts = ''
+                cite = ''
+                graf = ''
+
+        if ext:
+            out.append(out.pop() + c1)
+        return '\n\n'.join(out)
+
+    def fBlock(self, tag, atts, ext, cite, content):
+        """
+        >>> t = Textile()
+        >>> t.fBlock("bq", "", None, "", "Hello BlockQuote")
+        ('\\t<blockquote>\\n', '\\t\\t<p>', 'Hello BlockQuote', '</p>', '\\n\\t</blockquote>')
+
+        >>> t.fBlock("bq", "", None, "http://google.com", "Hello BlockQuote")
+        ('\\t<blockquote cite="http://google.com">\\n', '\\t\\t<p>', 'Hello BlockQuote', '</p>', '\\n\\t</blockquote>')
+
+        >>> t.fBlock("bc", "", None, "", 'printf "Hello, World";') # doctest: +ELLIPSIS
+        ('<pre>', '<code>', ..., '</code>', '</pre>')
+
+        >>> t.fBlock("h1", "", None, "", "foobar")
+        ('', '\\t<h1>', 'foobar', '</h1>', '')
+        """
+        atts = self.pba(atts)
+        o1 = o2 = c2 = c1 = ''
+
+        m = re.search(r'fn(\d+)', tag)
+        if m:
+            tag = 'p'
+            if m.group(1) in self.fn:
+                fnid = self.fn[m.group(1)]
+            else:
+                fnid = m.group(1)
+            atts = atts + ' id="fn%s"' % fnid
+            if atts.find('class=') < 0:
+                atts = atts + ' class="footnote"'
+            content = ('<sup>%s</sup>' % m.group(1)) + content
+
+        if tag == 'bq':
+            cite = self.checkRefs(cite)
+            if cite:
+                cite = ' cite="%s"' % cite
+            else:
+                cite = ''
+            o1 = "\t<blockquote%s%s>\n" % (cite, atts)
+            o2 = "\t\t<p%s>" % atts
+            c2 = "</p>"
+            c1 = "\n\t</blockquote>"
+
+        elif tag == 'bc':
+            o1 = "<pre%s>" % atts
+            o2 = "<code%s>" % atts
+            c2 = "</code>"
+            c1 = "</pre>"
+            content = self.shelve(self.encode_html(content.rstrip("\n") + "\n"))
+
+        elif tag == 'notextile':
+            content = self.shelve(content)
+            o1 = o2 = ''
+            c1 = c2 = ''
+
+        elif tag == 'pre':
+            content = self.shelve(self.encode_html(content.rstrip("\n") + "\n"))
+            o1 = "<pre%s>" % atts
+            o2 = c2 = ''
+            c1 = '</pre>'
+
+        else:
+            o2 = "\t<%s%s>" % (tag, atts)
+            c2 = "</%s>" % tag
+
+        content = self.graf(content)
+        return o1, o2, content, c2, c1
+
+    def footnoteRef(self, text):
+        """
+        >>> t = Textile()
+        >>> t.footnoteRef('foo[1] ') # doctest: +ELLIPSIS
+        'foo<sup class="footnote"><a href="#fn...">1</a></sup> '
+        """
+        return re.sub(r'\b\[([0-9]+)\](\s)?', self.footnoteID, text)
+
+    def footnoteID(self, match):
+        id, t = match.groups()
+        if id not in self.fn:
+            self.fn[id] = str(uuid.uuid4())
+        fnid = self.fn[id]
+        if not t:
+            t = ''
+        return '<sup class="footnote"><a href="#fn%s">%s</a></sup>%s' % (fnid, id, t)
+
+    def glyphs(self, text):
+        """
+        >>> t = Textile()
+
+        >>> t.glyphs("apostrophe's")
+        'apostrophe&#8217;s'
+
+        >>> t.glyphs("back in '88")
+        'back in &#8217;88'
+
+        >>> t.glyphs('foo ...')
+        'foo &#8230;'
+
+        >>> t.glyphs('--')
+        '&#8212;'
+
+        >>> t.glyphs('FooBar[tm]')
+        'FooBar&#8482;'
+
+        >>> t.glyphs("<p><cite>Cat's Cradle</cite> by Vonnegut</p>")
+        '<p><cite>Cat&#8217;s Cradle</cite> by Vonnegut</p>'
+
+        """
+         # fix: hackish
+        text = re.sub(r'"\Z', '\" ', text)
+
+        glyph_search = (
+            re.compile(r"(\w)\'(\w)"),                                      # apostrophe's
+            re.compile(r'(\s)\'(\d+\w?)\b(?!\')'),                          # back in '88
+            re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'),                       #  single closing
+            re.compile(r'\'/'),                                             #  single opening
+            re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'),                       #  double closing
+            re.compile(r'"'),                                               #  double opening
+            re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'),        #  3+ uppercase acronym
+            re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'),           #  3+ uppercase
+            re.compile(r'\b(\s{0,1})?\.{3}'),                                     #  ellipsis
+            re.compile(r'(\s?)--(\s?)'),                                    #  em dash
+            re.compile(r'\s-(?:\s|$)'),                                     #  en dash
+            re.compile(r'(\d+)( ?)x( ?)(?=\d+)'),                           #  dimension sign
+            re.compile(r'\b ?[([]TM[])]', re.I),                            #  trademark
+            re.compile(r'\b ?[([]R[])]', re.I),                             #  registered
+            re.compile(r'\b ?[([]C[])]', re.I),                             #  copyright
+         )
+
+        glyph_replace = [x % dict(self.glyph_defaults) for x in (
+            r'\1%(txt_apostrophe)s\2',           # apostrophe's
+            r'\1%(txt_apostrophe)s\2',           # back in '88
+            r'\1%(txt_quote_single_close)s',     #  single closing
+            r'%(txt_quote_single_open)s',         #  single opening
+            r'\1%(txt_quote_double_close)s',        #  double closing
+            r'%(txt_quote_double_open)s',             #  double opening
+            r'<acronym title="\2">\1</acronym>', #  3+ uppercase acronym
+            r'<span class="caps">\1</span>',     #  3+ uppercase
+            r'\1%(txt_ellipsis)s',                  #  ellipsis
+            r'\1%(txt_emdash)s\2',               #  em dash
+            r' %(txt_endash)s ',                 #  en dash
+            r'\1\2%(txt_dimension)s\3',          #  dimension sign
+            r'%(txt_trademark)s',                #  trademark
+            r'%(txt_registered)s',                #  registered
+            r'%(txt_copyright)s',                #  copyright
+        )]
+
+        result = []
+        for line in re.compile(r'(<.*?>)', re.U).split(text):
+            if not re.search(r'<.*>', line):
+                for s, r in zip(glyph_search, glyph_replace):
+                    line = s.sub(r, line)
+            result.append(line)
+        return ''.join(result)
+
+    def vAlign(self, input):
+        d = {'^':'top', '-':'middle', '~':'bottom'}
+        return d.get(input, '')
+
+    def hAlign(self, input):
+        d = {'<':'left', '=':'center', '>':'right', '<>': 'justify'}
+        return d.get(input, '')
+
+    def getRefs(self, text):
+        """
+        what is this for?
+        """
+        pattern = re.compile(r'(?:(?<=^)|(?<=\s))\[(.+)\]((?:http(?:s?):\/\/|\/)\S+)(?=\s|$)', re.U)
+        text = pattern.sub(self.refs, text)
+        return text
+
+    def refs(self, match):
+        flag, url = match.groups()
+        self.urlrefs[flag] = url
+        return ''
+
+    def checkRefs(self, url):
+        return self.urlrefs.get(url, url)
+
+    def isRelURL(self, url):
+        """
+        Identify relative urls.
+
+        >>> t = Textile()
+        >>> t.isRelURL("http://www.google.com/")
+        False
+        >>> t.isRelURL("/foo")
+        True
+
+        """
+        (scheme, netloc) = urlparse(url)[0:2]
+        return not scheme and not netloc
+
+    def relURL(self, url):
+        scheme = urlparse(url)[0]
+        if self.restricted and scheme and scheme not in self.url_schemes:
+            return '#'
+        return url
+
+    def shelve(self, text):
+        id = str(uuid.uuid4())
+        self.shelf[id] = text
+        return id
+
+    def retrieve(self, text):
+        """
+        >>> t = Textile()
+        >>> id = t.shelve("foobar")
+        >>> t.retrieve(id)
+        'foobar'
+        """
+        while True:
+            old = text
+            for k, v in self.shelf.items():
+                text = text.replace(k, v)
+            if text == old:
+                break
+        return text
+
+    def encode_html(self, text, quotes=True):
+        a = (
+            ('&', '&#38;'),
+            ('<', '&#60;'),
+            ('>', '&#62;')
+        )
+
+        if quotes:
+            a = a + (
+                ("'", '&#39;'),
+                ('"', '&#34;')
+            )
+
+        for k, v in a:
+            text = text.replace(k, v)
+        return text
+
+    def graf(self, text):
+        if not self.lite:
+            text = self.noTextile(text)
+            text = self.code(text)
+
+        text = self.links(text)
+
+        if not self.noimage:
+            text = self.image(text)
+
+        if not self.lite:
+            text = self.lists(text)
+            text = self.table(text)
+
+        text = self.span(text)
+        text = self.footnoteRef(text)
+        text = self.glyphs(text)
+
+        return text.rstrip('\n')
+
+    def links(self, text):
+        """
+        >>> t = Textile()
+        >>> t.links('fooobar "Google":http://google.com/foobar/ and hello world "flickr":http://flickr.com/photos/jsamsa/ ') # doctest: +ELLIPSIS
+        'fooobar ... and hello world ...'
+        """
+
+        punct = '!"#$%&\'*+,-./:;=?@\\^_`|~'
+
+        pattern = r'''
+            (?P<pre>    [\s\[{(]|[%s]   )?
+            "                          # start
+            (?P<atts>   %s       )
+            (?P<text>   [^"]+?   )
+            \s?
+            (?:   \(([^)]+?)\)(?=")   )?     # $title
+            ":
+            (?P<url>    (?:ftp|https?)? (?: :// )? [-A-Za-z0-9+&@#/?=~_()|!:,.;]*[-A-Za-z0-9+&@#/=~_()|]   )
+            (?P<post>   [^\w\/;]*?   )
+            (?=<|\s|$)
+        ''' % (re.escape(punct), self.c)
+
+        text = re.compile(pattern, re.X).sub(self.fLink, text)
+
+        return text
+
+    def fLink(self, match):
+        pre, atts, text, title, url, post = match.groups()
+
+        if pre == None:
+            pre = ''
+
+        # assume ) at the end of the url is not actually part of the url
+        # unless the url also contains a (
+        if url.endswith(')') and not url.find('(') > -1:
+            post = url[-1] + post
+            url = url[:-1]
+
+        url = self.checkRefs(url)
+
+        atts = self.pba(atts)
+        if title:
+            atts = atts +  ' title="%s"' % self.encode_html(title)
+
+        if not self.noimage:
+            text = self.image(text)
+
+        text = self.span(text)
+        text = self.glyphs(text)
+
+        url = self.relURL(url)
+        out = '<a href="%s"%s%s>%s</a>' % (self.encode_html(url), atts, self.rel, text)
+        out = self.shelve(out)
+        return ''.join([pre, out, post])
+
+    def span(self, text):
+        """
+        >>> t = Textile()
+        >>> t.span(r"hello %(bob)span *strong* and **bold**% goodbye")
+        'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
+        """
+        qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^')
+        pnct = ".,\"'?!;:"
+
+        for qtag in qtags:
+            pattern = re.compile(r"""
+                (?:^|(?<=[\s>%(pnct)s])|([\]}]))
+                (%(qtag)s)(?!%(qtag)s)
+                (%(c)s)
+                (?::(\S+))?
+                ([^\s%(qtag)s]+|\S[^%(qtag)s\n]*[^\s%(qtag)s\n])
+                ([%(pnct)s]*)
+                %(qtag)s
+                (?:$|([\]}])|(?=%(selfpnct)s{1,2}|\s))
+            """ % {'qtag':qtag, 'c':self.c, 'pnct':pnct,
+                   'selfpnct':self.pnct}, re.X)
+            text = pattern.sub(self.fSpan, text)
+        return text
+
+
+    def fSpan(self, match):
+        _, tag, atts, cite, content, end, _ = match.groups()
+
+        qtags = {
+            '*': 'strong',
+            '**': 'b',
+            '??': 'cite',
+            '_' : 'em',
+            '__': 'i',
+            '-' : 'del',
+            '%' : 'span',
+            '+' : 'ins',
+            '~' : 'sub',
+            '^' : 'sup'
+        }
+        tag = qtags[tag]
+        atts = self.pba(atts)
+        if cite:
+            atts = atts + 'cite="%s"' % cite
+
+        content = self.span(content)
+
+        out = "<%s%s>%s%s</%s>" % (tag, atts, content, end, tag)
+        return out
+
+    def image(self, text):
+        """
+        >>> t = Textile()
+        >>> t.image('!/imgs/myphoto.jpg!:http://jsamsa.com')
+        '<a href="http://jsamsa.com"><img src="/imgs/myphoto.jpg" alt="" /></a>'
+        """
+        pattern = re.compile(r"""
+            (?:[\[{])?          # pre
+            \!                 # opening !
+            (%s)               # optional style,class atts
+            (?:\. )?           # optional dot-space
+            ([^\s(!]+)         # presume this is the src
+            \s?                # optional space
+            (?:\(([^\)]+)\))?  # optional title
+            \!                 # closing
+            (?::(\S+))?        # optional href
+            (?:[\]}]|(?=\s|$)) # lookahead: space or end of string
+        """ % self.c, re.U|re.X)
+        return pattern.sub(self.fImage, text)
+
+    def fImage(self, match):
+        # (None, '', '/imgs/myphoto.jpg', None, None)
+        atts, url, title, href = match.groups()
+        atts  = self.pba(atts)
+
+        if title:
+            atts = atts + ' title="%s" alt="%s"' % (title, title)
+        else:
+            atts = atts + ' alt=""'
+
+        if not self.isRelURL(url) and self.get_sizes:
+            size = getimagesize(url)
+            if (size):
+                atts += " %s" % size
+
+        if href:
+            href = self.checkRefs(href)
+
+        url = self.checkRefs(url)
+        url = self.relURL(url)
+
+        out = []
+        if href:
+            out.append('<a href="%s" class="img">' % href)
+        if self.html_type == 'html':
+            out.append('<img src="%s"%s>' % (url, atts))
+        else:
+            out.append('<img src="%s"%s />' % (url, atts))
+        if href:
+            out.append('</a>')
+
+        return ''.join(out)
+
+    def code(self, text):
+        text = self.doSpecial(text, '<code>', '</code>', self.fCode)
+        text = self.doSpecial(text, '@', '@', self.fCode)
+        text = self.doSpecial(text, '<pre>', '</pre>', self.fPre)
+        return text
+
+    def fCode(self, match):
+        before, text, after = match.groups()
+        if after == None:
+            after = ''
+        # text needs to be escaped
+        if not self.restricted:
+            text = self.encode_html(text)
+        return ''.join([before, self.shelve('<code>%s</code>' % text), after])
+
+    def fPre(self, match):
+        before, text, after = match.groups()
+        if after == None:
+            after = ''
+        # text needs to be escapedd
+        if not self.restricted:
+            text = self.encode_html(text)
+        return ''.join([before, '<pre>', self.shelve(text), '</pre>', after])
+
+    def doSpecial(self, text, start, end, method=None):
+        if method == None:
+            method = self.fSpecial
+        pattern = re.compile(r'(^|\s|[\[({>])%s(.*?)%s(\s|$|[\])}])?' % (re.escape(start), re.escape(end)), re.M|re.S)
+        return pattern.sub(method, text)
+
+    def fSpecial(self, match):
+        """
+        special blocks like notextile or code
+        """
+        before, text, after = match.groups()
+        if after == None:
+            after = ''
+        return ''.join([before, self.shelve(self.encode_html(text)), after])
+
+    def noTextile(self, text):
+        text = self.doSpecial(text, '<notextile>', '</notextile>', self.fTextile)
+        return self.doSpecial(text, '==', '==', self.fTextile)
+
+    def fTextile(self, match):
+        before, notextile, after = match.groups()
+        if after == None:
+            after = ''
+        return ''.join([before, self.shelve(notextile), after])
+
+
+def textile(text, head_offset=0, html_type='xhtml', encoding=None, output=None):
+    """
+    this function takes additional parameters:
+    head_offset - offset to apply to heading levels (default: 0)
+    html_type - 'xhtml' or 'html' style tags (default: 'xhtml')
+    """
+    return Textile().textile(text, head_offset=head_offset,
+                             html_type=html_type)
+
+def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
+    """
+    Restricted version of Textile designed for weblog comments and other
+    untrusted input.
+
+    Raw HTML is escaped.
+    Style attributes are disabled.
+    rel='nofollow' is added to external links.
+
+    When lite=True is set (the default):
+    Block tags are restricted to p, bq, and bc.
+    Lists and tables are disabled.
+
+    When noimage=True is set (the default):
+    Image tags are disabled.
+
+    """
+    return Textile(restricted=True, lite=lite,
+                   noimage=noimage).textile(text, rel='nofollow',
+                                            html_type=html_type)
+
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -12,7 +12,7 @@ from calibre.ebooks.chardet import detect
 from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
    separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
    preserve_spaces, detect_paragraph_type, detect_formatting_type, \
-    convert_heuristic, normalize_line_endings
+    convert_heuristic, normalize_line_endings, convert_textile
 from calibre import _ent_pat, xml_entity_to_unicode

 class TXTInput(InputFormatPlugin):
@ -41,6 +41,7 @@ class TXTInput(InputFormatPlugin):
                   'paragraph and no styling is applied.\n'
                   '* heuristic: Process using heuristics to determine formatting such '
                   'as chapter headings and italic text.\n'
+                   '* textile: Processing using textile formatting.\n'
                   '* markdown: Processing using markdown formatting. '
                   'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
        OptionRecommendation(name='preserve_spaces', recommended_value=False,
@ -91,6 +92,9 @@ class TXTInput(InputFormatPlugin):
            except RuntimeError:
                raise ValueError('This txt file has malformed markup, it cannot be'
                    ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
+        elif options.formatting_type == 'textile':
+            log.debug('Running text though textile conversion...')
+            html = convert_textile(txt)
        else:
            # Determine the paragraph type of the document.
            if options.paragraph_type == 'auto':
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -1,4 +1,8 @@
 # -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+

 '''
 Read content from txt file.
@ -7,15 +11,11 @@ Read content from txt file.
 import os, re

 from calibre import prepare_string_for_xml, isbytestring
-from calibre.ebooks.markdown import markdown
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor
 from calibre.ebooks.conversion.preprocess import DocAnalysis
-
-__license__   = 'GPL v3'
-__copyright__ = '2009, John Schember <john@nachtimwald.com>'
-__docformat__ = 'restructuredtext en'
+from calibre.utils.cleantext import clean_ascii_chars

 HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'

@ -34,9 +34,9 @@ def clean_txt(txt):
    txt = re.sub('(?<=.)\s+$', '', txt)
    # Remove excessive line breaks.
    txt = re.sub('\n{3,}', '\n\n', txt)
-    #remove ASCII invalid chars
+    #remove ASCII invalid chars : 0 to 8 and 11-14 to 24
    txt = clean_ascii_chars(txt)
-    
+
    return txt

 def split_txt(txt, epub_split_size_kb=0):
@ -73,12 +73,18 @@ def convert_heuristic(txt, title='', epub_split_size_kb=0):
    return tp.convert(txt, title, epub_split_size_kb)

 def convert_markdown(txt, title='', disable_toc=False):
+    from calibre.ebooks.markdown import markdown
    md = markdown.Markdown(
          extensions=['footnotes', 'tables', 'toc'],
          extension_configs={"toc": {"disable_toc": disable_toc}},
          safe_mode=False)
    return HTML_TEMPLATE % (title, md.convert(txt))

+def convert_textile(txt, title=''):
+    from calibre.ebooks.textile import textile
+    html = textile(txt, encoding='utf-8')
+    return HTML_TEMPLATE % (title, html)
+
 def normalize_line_endings(txt):
    txt = txt.replace('\r\n', '\n')
    txt = txt.replace('\r', '\n')
@ -114,66 +120,75 @@ def split_string_separator(txt, size) :
 def detect_paragraph_type(txt):
    '''
    Tries to determine the formatting of the document.
-    
+
    block: Paragraphs are separated by a blank line.
    single: Each line is a paragraph.
    print: Each paragraph starts with a 2+ spaces or a tab
           and ends when a new paragraph is reached.
    unformatted: most lines have hard line breaks, few/no blank lines or indents
-    
+
    returns block, single, print, unformatted
    '''
    txt = txt.replace('\r\n', '\n')
    txt = txt.replace('\r', '\n')
    txt_line_count = len(re.findall('(?mu)^\s*.+$', txt))
-    
+
    # Check for hard line breaks - true if 55% of the doc breaks in the same region
    docanalysis = DocAnalysis('txt', txt)
    hardbreaks = docanalysis.line_histogram(.55)
-    
+
    if hardbreaks:
        # Determine print percentage
        tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt))
        print_percent = tab_line_count / float(txt_line_count)
-     
+
        # Determine block percentage
        empty_line_count = len(re.findall('(?mu)^\s*$', txt))
        block_percent = empty_line_count / float(txt_line_count)
-        
+
        # Compare the two types - the type with the larger number of instances wins
        # in cases where only one or the other represents the vast majority of the document neither wins
        if print_percent >= block_percent:
            if .15 <= print_percent <= .75:
                return 'print'
        elif .15 <= block_percent <= .75:
-            return 'block'     
+            return 'block'

-        # Assume unformatted text with hardbreaks if nothing else matches        
+        # Assume unformatted text with hardbreaks if nothing else matches
        return 'unformatted'
-    
+
    # return single if hardbreaks is false
    return 'single'


 def detect_formatting_type(txt):
+    markdown_count = 0
+    textile_count = 0
+
    # Check for markdown
    # Headings
-    if len(re.findall('(?mu)^#+', txt)) >= 5:
-        return 'markdown'
-    if len(re.findall('(?mu)^=+$', txt)) >= 5:
-        return 'markdown'
-    if len(re.findall('(?mu)^-+$', txt)) >= 5:
-        return 'markdown'
+    markdown_count += len(re.findall('(?mu)^#+', txt))
+    markdown_count += len(re.findall('(?mu)^=+$', txt))
+    markdown_count += len(re.findall('(?mu)^-+$', txt))
    # Images
-    if len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) >= 5:
-        return 'markdown'
+    markdown_count += len(re.findall('(?u)!\[.*?\]\(.+?\)', txt))
    # Links
-    if len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt)) >= 5:
-        return 'markdown'
-    # Escaped characters
-    md_escapted_characters = ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '.', '!']
-    for c in md_escapted_characters:
-        if txt.count('\\'+c) > 10:
+    markdown_count += len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt))
+
+    # Check for textile
+    # Headings
+    textile_count += len(re.findall(r'(?mu)^h[1-6]\.', txt))
+    # Block quote.
+    textile_count += len(re.findall(r'(?mu)^bq\.', txt))
+    # Images
+    textile_count += len(re.findall(r'\![^\s]+(:[^\s]+)*', txt))
+    # Links
+    textile_count += len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt))
+
+    if markdown_count > 5 or textile_count > 5:
+        if markdown_count > textile_count:
            return 'markdown'
-    
+        else:
+            return 'textile'
+
    return 'heuristic'
--- a/src/calibre/gui2/catalog/catalog_bibtex.py
+++ b/src/calibre/gui2/catalog/catalog_bibtex.py
@ -28,17 +28,17 @@ class PluginWidget(QWidget, Ui_Form):
    def __init__(self, parent=None):
        QWidget.__init__(self, parent)
        self.setupUi(self)
+
+    def initialize(self, name, db): #not working properly to update
        from calibre.library.catalog import FIELDS
-        
+
        self.all_fields = [x for x in FIELDS if x != 'all']
        #add custom columns
-        db = db_()
        self.all_fields.extend([x for x in sorted(db.custom_field_keys())])
        #populate
        for x in self.all_fields:
            QListWidgetItem(x, self.db_fields)

-    def initialize(self, name, db): #not working properly to update
        self.name = name
        fields = gprefs.get(name+'_db_fields', self.all_fields)
        # Restore the activated db_fields from last use
--- a/src/calibre/gui2/dialogs/drm_error.py
+++ b/src/calibre/gui2/dialogs/drm_error.py
@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from PyQt4.Qt import QDialog
+from calibre.gui2.dialogs.drm_error_ui import Ui_Dialog
+
+class DRMErrorMessage(QDialog, Ui_Dialog):
+
+    def __init__(self, parent=None, title=None):
+        QDialog.__init__(self, parent)
+        self.setupUi(self)
+        if title is not None:
+            t = unicode(self.msg.text())
+            self.msg.setText('<h2>%s</h2>%s'%(title, t))
+        self.resize(self.sizeHint())
+
--- a/src/calibre/gui2/dialogs/drm_error.ui
+++ b/src/calibre/gui2/dialogs/drm_error.ui
@ -0,0 +1,102 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>Dialog</class>
+ <widget class="QDialog" name="Dialog">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>417</width>
+    <height>235</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>This book is DRMed</string>
+  </property>
+  <layout class="QGridLayout" name="gridLayout">
+   <item row="0" column="0">
+    <widget class="QLabel" name="label">
+     <property name="sizePolicy">
+      <sizepolicy hsizetype="Preferred" vsizetype="Preferred">
+       <horstretch>0</horstretch>
+       <verstretch>0</verstretch>
+      </sizepolicy>
+     </property>
+     <property name="maximumSize">
+      <size>
+       <width>132</width>
+       <height>16777215</height>
+      </size>
+     </property>
+     <property name="text">
+      <string/>
+     </property>
+     <property name="pixmap">
+      <pixmap resource="../../../../resources/images.qrc">:/images/document-encrypt.png</pixmap>
+     </property>
+    </widget>
+   </item>
+   <item row="0" column="1">
+    <widget class="QLabel" name="msg">
+     <property name="text">
+      <string>&lt;p&gt;This book is locked by &lt;b&gt;DRM&lt;/b&gt;. To learn more about DRM and why you cannot read or convert this book in calibre, 
+&lt;a href=&quot;http://bugs.calibre-ebook.com/wiki/DRM&quot;&gt;click here&lt;/a&gt;.</string>
+     </property>
+     <property name="wordWrap">
+      <bool>true</bool>
+     </property>
+     <property name="openExternalLinks">
+      <bool>true</bool>
+     </property>
+    </widget>
+   </item>
+   <item row="1" column="0" colspan="2">
+    <widget class="QDialogButtonBox" name="buttonBox">
+     <property name="orientation">
+      <enum>Qt::Horizontal</enum>
+     </property>
+     <property name="standardButtons">
+      <set>QDialogButtonBox::Close</set>
+     </property>
+    </widget>
+   </item>
+  </layout>
+ </widget>
+ <resources>
+  <include location="../../../../resources/images.qrc"/>
+ </resources>
+ <connections>
+  <connection>
+   <sender>buttonBox</sender>
+   <signal>accepted()</signal>
+   <receiver>Dialog</receiver>
+   <slot>accept()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>248</x>
+     <y>254</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>157</x>
+     <y>274</y>
+    </hint>
+   </hints>
+  </connection>
+  <connection>
+   <sender>buttonBox</sender>
+   <signal>rejected()</signal>
+   <receiver>Dialog</receiver>
+   <slot>reject()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>316</x>
+     <y>260</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>286</x>
+     <y>274</y>
+    </hint>
+   </hints>
+  </connection>
+ </connections>
+</ui>
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -15,7 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_string
 from calibre.ebooks.metadata.book.base import composite_formatter
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.gui2.custom_column_widgets import populate_metadata_page
-from calibre.gui2 import error_dialog
+from calibre.gui2 import error_dialog, ResizableDialog
 from calibre.gui2.progress_indicator import ProgressIndicator
 from calibre.utils.config import dynamic
 from calibre.utils.titlecase import titlecase
@ -49,7 +49,7 @@ def get_cover_data(path):



-class MyBlockingBusy(QDialog):
+class MyBlockingBusy(QDialog): # {{{

    do_one_signal = pyqtSignal()

@ -241,8 +241,9 @@ class MyBlockingBusy(QDialog):
        self.current_index += 1
        self.do_one_signal.emit()

+    # }}}

-class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
+class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):

    s_r_functions = {       ''              : lambda x: x,
                            _('Lower Case') : lambda x: icu_lower(x),
@ -261,9 +262,8 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
                        ]

    def __init__(self, window, rows, model, tab):
-        QDialog.__init__(self, window)
+        ResizableDialog.__init__(self, window)
        Ui_MetadataBulkDialog.__init__(self)
-        self.setupUi(self)
        self.model = model
        self.db = model.db
        self.ids = [self.db.id(r) for r in rows]
--- a/src/calibre/gui2/dialogs/metadata_bulk.ui
+++ b/src/calibre/gui2/dialogs/metadata_bulk.ui
--- a/src/calibre/gui2/dialogs/user_profiles.py
+++ b/src/calibre/gui2/dialogs/user_profiles.py
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import time, os

 from PyQt4.Qt import SIGNAL, QUrl, QAbstractListModel, Qt, \
-        QVariant, QInputDialog
+        QVariant

 from calibre.web.feeds.recipes import compile_recipe
 from calibre.web.feeds.news import AutomaticNewsRecipe
@ -256,24 +256,61 @@ class %(classname)s(%(base_class)s):

    def add_builtin_recipe(self):
        from calibre.web.feeds.recipes.collection import \
-            get_builtin_recipe_by_title, get_builtin_recipe_titles
-        items = sorted(get_builtin_recipe_titles(), key=sort_key)
+            get_builtin_recipe_collection, get_builtin_recipe_by_id
+        from PyQt4.Qt import QDialog, QVBoxLayout, QListWidgetItem, \
+                QListWidget, QDialogButtonBox, QSize

+        d = QDialog(self)
+        d.l = QVBoxLayout()
+        d.setLayout(d.l)
+        d.list = QListWidget(d)
+        d.list.doubleClicked.connect(lambda x: d.accept())
+        d.l.addWidget(d.list)
+        d.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
+                Qt.Horizontal, d)
+        d.bb.accepted.connect(d.accept)
+        d.bb.rejected.connect(d.reject)
+        d.l.addWidget(d.bb)
+        d.setWindowTitle(_('Choose builtin recipe'))
+        items = []
+        for r in get_builtin_recipe_collection():
+            id_ = r.get('id', '')
+            title = r.get('title', '')
+            lang = r.get('language', '')
+            if id_ and title:
+                items.append((title + ' [%s]'%lang, id_))

-        title, ok = QInputDialog.getItem(self, _('Pick recipe'), _('Pick the recipe to customize'),
-                                     items, 0, False)
-        if ok:
-            title = unicode(title)
-            profile = get_builtin_recipe_by_title(title)
-            if self._model.has_title(title):
-                if question_dialog(self, _('Replace recipe?'),
-                    _('A custom recipe named %s already exists. Do you want to '
-                        'replace it?')%title):
-                    self._model.replace_by_title(title, profile)
-                else:
-                    return
+        items.sort(key=lambda x:sort_key(x[0]))
+        for title, id_ in items:
+            item = QListWidgetItem(title)
+            item.setData(Qt.UserRole, id_)
+            d.list.addItem(item)
+
+        d.resize(QSize(450, 400))
+        ret = d.exec_()
+        d.list.doubleClicked.disconnect()
+        if ret != d.Accepted:
+            return
+
+        items = list(d.list.selectedItems())
+        if not items:
+            return
+        item = items[-1]
+        id_ = unicode(item.data(Qt.UserRole).toString())
+        title = unicode(item.data(Qt.DisplayRole).toString()).rpartition(' [')[0]
+        profile = get_builtin_recipe_by_id(id_)
+        if profile is None:
+            raise Exception('Something weird happened')
+
+        if self._model.has_title(title):
+            if question_dialog(self, _('Replace recipe?'),
+                _('A custom recipe named %s already exists. Do you want to '
+                    'replace it?')%title):
+                self._model.replace_by_title(title, profile)
            else:
-                self.model.add(title, profile)
+                return
+        else:
+            self.model.add(title, profile)

        self.clear()

--- a/src/calibre/gui2/layout.py
+++ b/src/calibre/gui2/layout.py
@ -8,9 +8,9 @@ __docformat__ = 'restructuredtext en'
 from functools import partial

 from PyQt4.Qt import QIcon, Qt, QWidget, QToolBar, QSize, \
-    pyqtSignal, QToolButton, QPushButton, \
-    QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup, \
-    QMenu
+    pyqtSignal, QToolButton, QMenu, QCheckBox, \
+    QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup
+

 from calibre.constants import __appname__
 from calibre.gui2.search_box import SearchBox2, SavedSearchBox
@ -178,7 +178,9 @@ class SearchBar(QWidget): # {{{
        x.setToolTip(_("<p>Search the list of books by title, author, publisher, tags, comments, etc.<br><br>Words separated by spaces are ANDed"))
        l.addWidget(x)

-        self.search_button = QPushButton(_('&Go!'))
+        self.search_button = QToolButton()
+        self.search_button.setToolButtonStyle(Qt.ToolButtonTextOnly)
+        self.search_button.setText(_('&Go!'))
        l.addWidget(self.search_button)
        self.search_button.setSizePolicy(QSizePolicy.Minimum,
                QSizePolicy.Minimum)
@ -192,6 +194,12 @@ class SearchBar(QWidget): # {{{
        l.addWidget(x)
        x.setToolTip(_("Reset Quick Search"))

+        x = parent.search_highlight_only = QCheckBox()
+        x.setText(_('&Highlight'))
+        x.setToolTip(_('Highlight matched books in the book list, instead '
+            'of restricting the book list to the matches.'))
+        l.addWidget(x)
+
        x = parent.saved_search = SavedSearchBox(self)
        x.setMaximumSize(QSize(150, 16777215))
        x.setMinimumContentsLength(15)
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@ -10,7 +10,7 @@ from contextlib import closing
 from operator import attrgetter

 from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \
-        QModelIndex, QVariant, QDate
+        QModelIndex, QVariant, QDate, QColor

 from calibre.gui2 import NONE, config, UNDEFINED_QDATE
 from calibre.utils.pyparsing import ParseException
@ -93,6 +93,9 @@ class BooksModel(QAbstractTableModel): # {{{
        self.bool_no_icon = QIcon(I('list_remove.png'))
        self.bool_blank_icon = QIcon(I('blank.png'))
        self.device_connected = False
+        self.rows_matching = set()
+        self.lowest_row_matching = None
+        self.highlight_only = False
        self.read_config()

    def change_alignment(self, colname, alignment):
@ -229,9 +232,27 @@ class BooksModel(QAbstractTableModel): # {{{
            self.endInsertRows()
            self.count_changed()

+    def set_highlight_only(self, toWhat):
+        self.highlight_only = toWhat
+        if self.last_search:
+            self.research()
+
    def search(self, text, reset=True):
        try:
-            self.db.search(text)
+            if self.highlight_only:
+                self.db.search('')
+                if not text:
+                    self.rows_matching = set()
+                    self.lowest_row_matching = None
+                else:
+                    self.rows_matching = self.db.search(text, return_matches=True)
+                    if self.rows_matching:
+                        self.lowest_row_matching = self.db.row(self.rows_matching[0])
+                    self.rows_matching = set(self.rows_matching)
+            else:
+                self.rows_matching = set()
+                self.lowest_row_matching = None
+                self.db.search(text)
        except ParseException as e:
            self.searched.emit(e.msg)
            return
@ -337,8 +358,9 @@ class BooksModel(QAbstractTableModel): # {{{
            name, val = mi.format_field(key)
            if mi.metadata_for_field(key)['datatype'] == 'comments':
                name += ':html'
-            if val:
+            if val and name not in data:
                data[name] = val
+
        return data


@ -651,6 +673,9 @@ class BooksModel(QAbstractTableModel): # {{{
            return NONE
        if role in (Qt.DisplayRole, Qt.EditRole):
            return self.column_to_dc_map[col](index.row())
+        elif role == Qt.BackgroundColorRole:
+            if self.id(index) in self.rows_matching:
+                return QColor('lightgreen')
        elif role == Qt.DecorationRole:
            if self.column_to_dc_decorator_map[col] is not None:
                return self.column_to_dc_decorator_map[index.column()](index.row())
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -680,8 +680,14 @@ class BooksView(QTableView): # {{{
    def set_editable(self, editable, supports_backloading):
        self._model.set_editable(editable)

+    def search_proxy(self, txt):
+        self._model.search(txt)
+        if self._model.lowest_row_matching is not None:
+            self.select_rows([self._model.lowest_row_matching], using_ids=False)
+        self.setFocus(Qt.OtherFocusReason)
+
    def connect_to_search_box(self, sb, search_done):
-        sb.search.connect(self._model.search)
+        sb.search.connect(self.search_proxy)
        self._search_done = search_done
        self._model.searched.connect(self.search_done)

--- a/src/calibre/gui2/preferences/toolbar.py
+++ b/src/calibre/gui2/preferences/toolbar.py
@ -37,7 +37,10 @@ class BaseModel(QAbstractListModel):
                    dont_remove_from=set(['toolbar-device']))
        if name is None:
            return FakeAction('--- '+_('Separator')+' ---', None)
-        return gui.iactions[name]
+        try:
+            return gui.iactions[name]
+        except:
+            return None

    def rowCount(self, parent):
        return len(self._data)
@ -124,7 +127,8 @@ class CurrentModel(BaseModel):
        BaseModel.__init__(self)
        self.gprefs_name = 'action-layout-'+key
        current = gprefs[self.gprefs_name]
-        self._data =  [self.name_to_action(x, gui) for x in current]
+        self._data = [self.name_to_action(x, gui) for x in current]
+        self._data = [x for x in self._data if x is not None]
        self.key = key
        self.gui = gui

--- a/src/calibre/gui2/search_box.py
+++ b/src/calibre/gui2/search_box.py
@ -16,6 +16,7 @@ from calibre.gui2 import config
 from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor
 from calibre.gui2.dialogs.search import SearchDialog
+from calibre.utils.config import dynamic
 from calibre.utils.search_query_parser import saved_searches
 from calibre.utils.icu import sort_key

@ -375,6 +376,9 @@ class SearchBoxMixin(object): # {{{
            unicode(self.search.toolTip())))
        self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip())
        self.clear_button.setStatusTip(self.clear_button.toolTip())
+        self.search_highlight_only.stateChanged.connect(self.highlight_only_changed)
+        self.search_highlight_only.setChecked(
+                            dynamic.get('search_highlight_only', False))

    def focus_search_box(self, *args):
        self.search.setFocus(Qt.OtherFocusReason)
@ -401,6 +405,11 @@ class SearchBoxMixin(object): # {{{
    def focus_to_library(self):
        self.current_view().setFocus(Qt.OtherFocusReason)

+    def highlight_only_changed(self, toWhat):
+        dynamic.set('search_highlight_only', toWhat)
+        self.current_view().model().set_highlight_only(toWhat)
+        self.focus_to_library()
+
    # }}}

 class SavedSearchBoxMixin(object): # {{{
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -468,12 +468,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
        try:
            if 'calibre.ebooks.DRMError' in job.details:
                if not minz:
-                    d = error_dialog(self, _('Conversion Error'),
-                        _('<p>Could not convert: %s<p>It is a '
-                        '<a href="%s">DRM</a>ed book. You must first remove the '
-                        'DRM using third party tools.')%\
-                            (job.description.split(':')[-1],
-                                'http://bugs.calibre-ebook.com/wiki/DRM'))
+                    from calibre.gui2.dialogs.drm_error import DRMErrorMessage
+                    d = DRMErrorMessage(self, job.description.split(':')[-1])
                    d.setModal(False)
                    d.show()
                    self._modeless_dialogs.append(d)
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -26,6 +26,7 @@ from calibre.gui2.search_box import SearchBox2
 from calibre.ebooks.metadata import MetaInformation
 from calibre.customize.ui import available_input_formats
 from calibre.gui2.viewer.dictionary import Lookup
+from calibre import as_unicode

 class TOCItem(QStandardItem):

@ -626,13 +627,12 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
            QApplication.processEvents()
        if worker.exception is not None:
            if isinstance(worker.exception, DRMError):
-                error_dialog(self, _('DRM Error'),
-                        _('<p>This book is protected by <a href="%s">DRM</a>')
-                        %'http://wiki.mobileread.com/wiki/DRM').exec_()
+                from calibre.gui2.dialogs.drm_error import DRMErrorMessage
+                DRMErrorMessage(self).exec_()
            else:
                r = getattr(worker.exception, 'reason', worker.exception)
                error_dialog(self, _('Could not open ebook'),
-                        unicode(r), det_msg=worker.traceback, show=True)
+                        as_unicode(r), det_msg=worker.traceback, show=True)
            self.close_progress_indicator()
        else:
            self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:])
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -411,7 +411,8 @@ class ResultCache(SearchQueryParser): # {{{
            if isinstance(location, list):
                if allow_recursion:
                    for loc in location:
-                        matches |= self.get_matches(loc, query, allow_recursion=False)
+                        matches |= self.get_matches(loc, query, candidates,
+                                                    allow_recursion=False)
                    return matches
                raise ParseException(query, len(query), 'Recursive query group detected', self)

@ -419,11 +420,11 @@ class ResultCache(SearchQueryParser): # {{{
                fm = self.field_metadata[location]
                # take care of dates special case
                if fm['datatype'] == 'datetime':
-                    return self.get_dates_matches(location, query.lower())
+                    return self.get_dates_matches(location, query.lower(), candidates)

                # take care of numbers special case
                if fm['datatype'] in ('rating', 'int', 'float'):
-                    return self.get_numeric_matches(location, query.lower())
+                    return self.get_numeric_matches(location, query.lower(), candidates)

                # take care of the 'count' operator for is_multiples
                if fm['is_multiple'] and \
@ -431,7 +432,8 @@ class ResultCache(SearchQueryParser): # {{{
                        query[1:1] in '=<>!':
                    vf = lambda item, loc=fm['rec_index'], ms=fm['is_multiple']:\
                            len(item[loc].split(ms)) if item[loc] is not None else 0
-                    return self.get_numeric_matches(location, query[1:], val_func=vf)
+                    return self.get_numeric_matches(location, query[1:],
+                                                    candidates, val_func=vf)

            # everything else, or 'all' matches
            matchkind = CONTAINS_MATCH
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -1524,19 +1524,32 @@ class EPUB_MOBI(CatalogPlugin):
                    this_title['formats'] = formats

                # Add user notes to be displayed in header
-                # Special case handling for datetime fields
+                # Special case handling for datetime fields and lists
                if self.opts.header_note_source_field:
                    field_md = self.__db.metadata_for_field(self.opts.header_note_source_field)
                    notes = self.__db.get_field(record['id'],
                                        self.opts.header_note_source_field,
                                        index_is_id=True)
-                    if notes and field_md['datatype'] == 'datetime':
-                        # Reformat date fields to match UI presentation: dd MMM YYYY
-                        notes = format_date(notes,'dd MMM yyyy')
-
                    if notes:
+                        if field_md['datatype'] == 'text':
+                            if isinstance(notes,list):
+                                notes = ' &middot; '.join(notes)
+                        elif field_md['datatype'] == 'datetime':
+                            notes = format_date(notes,'dd MMM yyyy')
+                        elif field_md['datatype'] == 'composite':
+                            m = re.match(r'\[(.+)\]$', notes)
+                            if m is not None:
+                                # Sniff for special pseudo-list string "[<item, item>]"
+                                bracketed_content = m.group(1)
+                                if ',' in bracketed_content:
+                                    # Recast the comma-separated items as a list
+                                    items = bracketed_content.split(',')
+                                    items = [i.strip() for i in items]
+                                    notes = ' &middot; '.join(items)
+                                else:
+                                    notes = bracketed_content
                        this_title['notes'] = {'source':field_md['name'],
-                                               'content':notes}
+                                                   'content':notes}

                titles.append(this_title)

--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -341,10 +341,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        self.has_id  = self.data.has_id
        self.count   = self.data.count

-        # Count times get_metadata is called, and how many times in the cache
-        self.gm_count  = 0
-        self.gm_missed = 0
-
        for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
                     'publisher', 'rating', 'series', 'series_index', 'tags',
                     'title', 'timestamp', 'uuid', 'pubdate', 'ondevice'):
@ -710,6 +706,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        formats = row[fm['formats']]
        if not formats:
            formats = None
+        else:
+            formats = formats.split(',')
        mi.formats = formats
        tags = row[fm['tags']]
        if tags:
--- a/src/calibre/trac/bzr_commit_plugin.py
+++ b/src/calibre/trac/bzr_commit_plugin.py
@ -110,6 +110,7 @@ class cmd_commit(_cmd_commit):
            suffix = 'The fix will be in the next release.'
        action = action+'ed'
        msg = '%s in branch %s. %s'%(action, nick, suffix)
+        msg = msg.replace('Fixesed', 'Fixed')
        server = xmlrpclib.ServerProxy(url)
        server.ticket.update(int(bug), msg,
                             {'status':'closed', 'resolution':'fixed'},
--- a/src/calibre/utils/cleantext.py
+++ b/src/calibre/utils/cleantext.py
@ -4,7 +4,6 @@ __copyright__ = '2010, sengian <sengian1@gmail.com>'
 __docformat__ = 'restructuredtext en'

 import re, htmlentitydefs
-from functools import partial

 _ascii_pat = None

@ -50,4 +49,4 @@ def unescape(text, rm=False, rchar=u''):
        if rm:
            return rchar #replace by char
        return text # leave as is
-    return re.sub("&#?\w+;", fixup, text)
+    return re.sub("&#?\w+;", fixup, text)
--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@ -18,6 +18,24 @@ class _Parser(object):
    LEX_NUM = 4
    LEX_EOF = 5

+    def _python(self, func):
+        locals = {}
+        exec func in locals
+        if 'evaluate' not in locals:
+            self.error('no evaluate function in python')
+        try:
+            result = locals['evaluate'](self.parent.kwargs)
+            if isinstance(result, (float, int)):
+                result = unicode(result)
+            elif isinstance(result, list):
+                result = ','.join(result)
+            elif isinstance(result, str):
+                result = unicode(result)
+            return result
+        except Exception as e:
+            self.error('python function threw exception: ' + e.msg)
+
+
    def _strcmp(self, x, y, lt, eq, gt):
        v = strcmp(x, y)
        if v < 0:
@ -79,6 +97,7 @@ class _Parser(object):
            'field'    : (1, lambda s, x: s.parent.get_value(x, [], s.parent.kwargs)),
            'multiply' : (2, partial(_math, op='*')),
            'print'    : (-1, _print),
+            'python'   : (1, _python),
            'strcat'   : (-1, _concat),
            'strcmp'   : (5, _strcmp),
            'substr'   : (3, lambda s, x, y, z: x[int(y): len(x) if int(z) == 0 else int(z)]),
@ -362,7 +381,7 @@ class TemplateFormatter(string.Formatter):
                (r'\'.*?((?<!\\)\')',   lambda x,t: (3, t[1:-1])),
                (r'\n#.*?(?=\n)',       None),
                (r'\s',                 None)
-        ])
+        ], flags=re.DOTALL)

    def _eval_program(self, val, prog):
        # keep a cache of the lex'ed program under the theory that re-lexing
--- a/src/calibre/utils/magick/draw.py
+++ b/src/calibre/utils/magick/draw.py
@ -92,7 +92,10 @@ def identify_data(data):
    or raises an Exception if data is not an image.
    '''
    img = Image()
-    img.load(data)
+    if hasattr(img, 'identify'):
+        img.identify(data)
+    else:
+        img.load(data)
    width, height = img.size
    fmt = img.format
    return (width, height, fmt)
--- a/src/calibre/utils/magick/magick.c
+++ b/src/calibre/utils/magick/magick.c
@ -456,6 +456,26 @@ magick_Image_load(magick_Image *self, PyObject *args, PyObject *kwargs) {

 // }}}

+// Image.identify {{{
+static PyObject *
+magick_Image_identify(magick_Image *self, PyObject *args, PyObject *kwargs) {
+    const char *data;
+	Py_ssize_t dlen;
+    MagickBooleanType res;
+    
+    NULL_CHECK(NULL)
+    if (!PyArg_ParseTuple(args, "s#", &data, &dlen)) return NULL;
+
+    res = MagickPingImageBlob(self->wand, data, dlen);
+
+    if (!res)
+        return magick_set_exception(self->wand);
+
+    Py_RETURN_NONE;
+}
+
+// }}}
+
 // Image.open {{{
 static PyObject *
 magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) {
@ -993,6 +1013,10 @@ static PyMethodDef magick_Image_methods[] = {
    {"destroy", (PyCFunction)magick_Image_destroy, METH_VARARGS,
    "Destroy the underlying ImageMagick Wand. WARNING: After using this method, all methods on this object will raise an exception."},

+    {"identify", (PyCFunction)magick_Image_identify, METH_VARARGS,
+     "Identify an image from a byte buffer (string)"
+    },
+
    {"load", (PyCFunction)magick_Image_load, METH_VARARGS,
     "Load an image from a byte buffer (string)"
    },
--- a/src/calibre/utils/wmf/init.py
+++ b/src/calibre/utils/wmf/init.py
@ -5,5 +5,52 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

+import glob
+
+from calibre.constants import plugins, iswindows, filesystem_encoding
+from calibre.ptempfile import TemporaryDirectory
+from calibre import CurrentDir
+from calibre.utils.magick import Image, PixelWand
+
+class Unavailable(Exception):
+    pass
+
+class NoRaster(Exception):
+    pass
+
+def extract_raster_image(wmf_data):
+    try:
+        wmf, wmf_err = plugins['wmf']
+    except KeyError:
+        raise Unavailable('libwmf not available on this platform')
+    if wmf_err:
+        raise Unavailable(wmf_err)
+
+    if iswindows:
+        import sys, os
+        appdir = sys.app_dir
+        if isinstance(appdir, unicode):
+            appdir = appdir.encode(filesystem_encoding)
+        fdir = os.path.join(appdir, 'wmffonts')
+        wmf.set_font_dir(fdir)
+
+    data = ''
+
+    with TemporaryDirectory('wmf2png') as tdir:
+        with CurrentDir(tdir):
+            wmf.render(wmf_data)
+
+            images = list(sorted(glob.glob('*.png')))
+            if not images:
+                raise NoRaster('No raster images in WMF')
+            data = open(images[0], 'rb').read()
+
+    im = Image()
+    im.load(data)
+    pw = PixelWand()
+    pw.color = '#ffffff'
+    im.rotate(pw, 180)
+
+    return im.export('png')


--- a/src/calibre/utils/wmf/wmf.c
+++ b/src/calibre/utils/wmf/wmf.c
@ -4,6 +4,7 @@

 #include <libwmf/api.h>
 #include <libwmf/svg.h>
+//#include <libwmf/gd.h>

 typedef struct {
    char *data;
@ -13,7 +14,7 @@ typedef struct {

 //This code is taken mostly from the Abiword wmf plugin

-
+// Buffer read {{{
 // returns unsigned char cast to int, or EOF
 static int wmf_WMF_read(void * context) {
    char c;
@ -22,11 +23,11 @@ static int wmf_WMF_read(void * context) {
 	if (info->pos == info->len)
 		return EOF;

-	c = info->data[pos];
+	c = info->data[info->pos];

 	info->pos++;

-	return (int)c;
+	return (int)((unsigned char)c);
 }

 // returns (-1) on error, else 0
@ -44,8 +45,17 @@ static long wmf_WMF_tell(void * context) {

 	return (long) info->pos;
 }
+// }}}


+char _png_name_buf[100];
+char *wmf_png_name(void *ctxt) {
+    int *num = (int*)ctxt;
+    *num = *num + 1;
+    snprintf(_png_name_buf, 90, "%04d.png", *num);
+    return _png_name_buf;
+}
+
 #define CLEANUP if(API) { if (stream) wmf_free(API, stream); wmf_api_destroy(API); };

 static PyObject *
@ -66,9 +76,9 @@ wmf_render(PyObject *self, PyObject *args) {

 	unsigned int max_width  = 1600;
 	unsigned int max_height = 1200;
-	unsigned long max_flags = 0;

 	static const char* Default_Description = "wmf2svg";
+    int fname_counter = 0;

 	wmf_error_t err;

@ -125,6 +135,8 @@ wmf_render(PyObject *self, PyObject *args) {
 	ddata->Description = (char *)Default_Description;

 	ddata->bbox = bbox;
+    ddata->image.context = (void *)&fname_counter;
+    ddata->image.name = wmf_png_name;

 	wmf_display_size(API, &disp_width, &disp_height, 96, 96);

@ -156,9 +168,9 @@ wmf_render(PyObject *self, PyObject *args) {
 		ddata->height = (unsigned int) ceil ((double) wmf_height);
 	}

-	ddata->flags |= WMF_SVG_INLINE_IMAGES;
-
-	ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER;
+    // Needs GD
+	//ddata->flags |= WMF_SVG_INLINE_IMAGES;
+	//ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER;

    err = wmf_play(API, 0, &(bbox));

@ -178,11 +190,32 @@ wmf_render(PyObject *self, PyObject *args) {
    return ans;
 }

+#ifdef _WIN32
+void set_libwmf_fontdir(const char *);
+
+static PyObject *
+wmf_setfontdir(PyObject *self, PyObject *args) {
+    char *path;
+    if (!PyArg_ParseTuple(args, "s", &path))
+        return NULL;
+    set_libwmf_fontdir(path);
+
+    Py_RETURN_NONE;
+}
+#endif
+
+
+

 static PyMethodDef wmf_methods[] = {
    {"render", wmf_render, METH_VARARGS,
-        "render(path) -> Render wmf as svg."
+        "render(data) -> Render wmf as svg."
    },
+#ifdef _WIN32
+    {"set_font_dir", wmf_setfontdir, METH_VARARGS,
+        "set_font_dir(path) -> Set the path to the fonts dir on windows, must be called at least once before using render()"
+    },
+#endif

    {NULL}  /* Sentinel */
 };
--- a/src/calibre/utils/zipfile.py
+++ b/src/calibre/utils/zipfile.py
@ -982,9 +982,12 @@ class ZipFile:
            zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])

        if fname != zinfo.orig_filename:
-            raise BadZipfile, \
-                      'File name in directory "%s" and header "%s" differ.' % (
-                          zinfo.orig_filename, fname)
+            print ('WARNING: Header (%r) and directory (%r) filenames do not'
+                    ' match inside ZipFile')%(fname, zinfo.orig_filename)
+            print 'Using directory filename %r'%zinfo.orig_filename
+            #raise BadZipfile, \
+            #          'File name in directory "%r" and header "%r" differ.' % (
+            #              zinfo.orig_filename, fname)

        # check for encrypted flag & handle password
        is_encrypted = zinfo.flag_bits & 0x1
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -700,10 +700,17 @@ class BasicNewsRecipe(Recipe):
        for attr in self.remove_attributes:
            for x in soup.findAll(attrs={attr:True}):
                del x[attr]
-        for base in list(soup.findAll(['base', 'iframe'])):
+        for base in list(soup.findAll(['base', 'iframe', 'canvas', 'embed',
+            'command', 'datalist', 'video', 'audio'])):
            base.extract()

        ans = self.postprocess_html(soup, first_fetch)
+
+        # Nuke HTML5 tags
+        for x in ans.findAll(['article', 'aside', 'header', 'footer', 'nav',
+            'figcaption', 'figure', 'section']):
+            x.name = 'div'
+
        if job_info:
            url, f, a, feed_len = job_info
            try:
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@ -108,7 +108,6 @@ def download_builtin_recipe(urn):
    br = browser()
    return br.open_novisit('http://status.calibre-ebook.com/recipe/'+urn).read()

-
 def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
    for x in get_builtin_recipe_collection():
        if x.get('title') == title:
@ -127,6 +126,24 @@ def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
                        'Failed to download recipe, using builtin version')
            return P('recipes/%s.recipe'%urn, data=True)

+def get_builtin_recipe_by_id(id_, log=None, download_recipe=False):
+    for x in get_builtin_recipe_collection():
+        if x.get('id') == id_:
+            urn = x.get('id')[8:]
+            if download_recipe:
+                try:
+                    if log is not None:
+                        log('Trying to get latest version of recipe:', urn)
+                    return download_builtin_recipe(urn)
+                except:
+                    if log is None:
+                        import traceback
+                        traceback.print_exc()
+                    else:
+                        log.exception(
+                        'Failed to download recipe, using builtin version')
+            return P('recipes/%s.recipe'%urn, data=True)
+
 class SchedulerConfig(object):

    def __init__(self):