Merge from trunk

2025-08-30 23:00:21 -04:00 · 2011-10-19 07:51:44 +02:00 · 2011-10-19 07:51:44 +02:00 · 9548696089
commit 9548696089
parent bef7077158 d1ef8de37b
37 changed files with 1129 additions and 358 deletions
--- a/recipes/20minutes.recipe
+++ b/recipes/20minutes.recipe
@ -0,0 +1,71 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
 '''
 20minutes.fr
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Minutes(BasicNewsRecipe):
    title                  = '20 minutes'
    __author__             = 'calibre'
    description            = 'Actualités'
    encoding               = 'cp1252'
    publisher              = '20minutes.fr'
    category               = 'Actualités, France, Monde'
    language               = 'fr'
    use_embedded_content   = False
    timefmt                = ' [%d %b %Y]'
    max_articles_per_feed  = 15
    no_stylesheets         = True
    remove_empty_feeds     = True
    filterDuplicates       = True
    extra_css = '''
                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    .mna-details {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .mna-image {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .mna-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    remove_tags = [
            dict(name='iframe'),
            dict(name='div', attrs={'class':['mn-section-heading']}),
            dict(name='a', attrs={'href':['#commentaires']}),
            dict(name='div', attrs={'class':['mn-right']}),
            dict(name='div', attrs={'class':['mna-box']}),
            dict(name='div', attrs={'class':['mna-comment-call']}),
            dict(name='div', attrs={'class':['mna-tools']}),
            dict(name='div', attrs={'class':['mn-trilist']})
    ]
    keep_only_tags    = [dict(id='mn-article')]
    remove_tags_after  = dict(name='div', attrs={'class':['mna-body','mna-signature']})
    feeds = [
        ('France', 'http://www.20minutes.fr/rss/actu-france.xml'),
        ('International', 'http://www.20minutes.fr/rss/monde.xml'),
        ('Tech/Web', 'http://www.20minutes.fr/rss/hightech.xml'),
        ('Sciences', 'http://www.20minutes.fr/rss/sciences.xml'),
        ('Economie', 'http://www.20minutes.fr/rss/economie.xml'),
        ('Politique', 'http://www.20minutes.fr/rss/politique.xml'),
        (u'Médias', 'http://www.20minutes.fr/rss/media.xml'),
        ('Cinema', 'http://www.20minutes.fr/rss/cinema.xml'),
        ('People', 'http://www.20minutes.fr/rss/people.xml'),
        ('Culture', 'http://www.20minutes.fr/rss/culture.xml'),
        ('Sport', 'http://www.20minutes.fr/rss/sport.xml'),
        ('Paris', 'http://www.20minutes.fr/rss/paris.xml'),
        ('Lyon', 'http://www.20minutes.fr/rss/lyon.xml'),
        ('Toulouse', 'http://www.20minutes.fr/rss/toulouse.xml')
    ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/cnn.recipe
+++ b/recipes/cnn.recipe
@ -22,6 +22,14 @@ class CNN(BasicNewsRecipe):
    #match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html']
    max_articles_per_feed = 25
    extra_css = '''
                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    .cnn_story_author, .cnn_stryathrtmp {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .cnn_strycaptiontxt, .cnnArticleGalleryPhotoContainer {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .cnn_strycbftrtxt, .cnnEditorialNote {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .cnn_strycntntlft {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    preprocess_regexps = [
        (re.compile(r'<!--\[if.*if\]-->', re.DOTALL), lambda m: ''),
        (re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
@ -32,7 +40,12 @@ class CNN(BasicNewsRecipe):
    remove_tags = [
            {'class':['cnn_strybtntools', 'cnn_strylftcntnt',
                'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
-                'cnn_strycntntrgt', 'hed_side', 'foot']},
+                'cnn_strycntntrgt', 'hed_side', 'foot', 'cnn_strylftcntnt cnn_strylftcexpbx']},
            {'class':['cnn_html_media_title_new', 'cnn_html_media_title_new cnn_html_media_title_none',
                'cnnArticleGalleryCaptionControlText', 'articleGalleryNavContainer']},
            {'id':['articleGalleryNav00JumpPrev', 'articleGalleryNav00Prev',
                'articleGalleryNav00Next', 'articleGalleryNav00JumpNext']},
            {'style':['display:none']},
            dict(id=['ie_column']),
    ]
@ -58,3 +71,12 @@ class CNN(BasicNewsRecipe):
        ans = BasicNewsRecipe.get_article_url(self, article)
        return ans.partition('?')[0]
    def get_masthead_url(self):
        masthead = 'http://i.cdn.turner.com/cnn/.element/img/3.0/global/header/intl/hdr-globe-central.gif'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
--- a/recipes/ekathemerini.recipe
+++ b/recipes/ekathemerini.recipe
@ -0,0 +1,58 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
 class Ekathimerini(BasicNewsRecipe):
    title                  = 'ekathimerini'
    __author__ = 'Thomas Scholl'
    description            = 'News from Greece, English edition'
    masthead_url           = 'http://wwk.kathimerini.gr/webadmin/EnglishNew/gifs/logo.gif'
    max_articles_per_feed  = 100
    oldest_article         = 100
    publisher              = 'Kathimerini'
    category               = 'news, GR'
    language               = 'en_GR'
    encoding               = 'windows-1253'
    conversion_options     = { 'linearize_tables': True}
    no_stylesheets         = True
    delay                  = 1
    keep_only_tags         = [dict(name='td', attrs={'class':'news'})]
    rss_url = 'http://ws.kathimerini.gr/xml_files/latestnews.xml'
    def find_articles(self, idx, category):
        for article in idx.findAll('item'):
            cat = u''
            cat_elem = article.find('subcat')
            if cat_elem:
                cat = self.tag_to_string(cat_elem)
            if cat == category:
                desc_html = self.tag_to_string(article.find('description'))
                description = self.tag_to_string(BeautifulSoup(desc_html))
                a = {
                        'title':  self.tag_to_string(article.find('title')),
                        'url': self.tag_to_string(article.find('link')),
                        'description': description,
                        'date' : self.tag_to_string(article.find('pubdate')),
                        }
                yield a
    def parse_index(self):
        idx_contents = self.browser.open(self.rss_url).read()
        idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
        cats = list(set([self.tag_to_string(subcat) for subcat in idx.findAll('subcat')]))
        cats.sort()
        feeds = [(u'News',list(self.find_articles(idx, u'')))]
        for cat in cats:
            feeds.append((cat.capitalize(), list(self.find_articles(idx, cat))))
        return feeds
    def print_version(self, url):
       return url.replace('http://www.ekathimerini.com/4dcgi/', 'http://www.ekathimerini.com/4Dcgi/4dcgi/')
--- a/recipes/el_pais.recipe
+++ b/recipes/el_pais.recipe
@ -33,7 +33,7 @@ class ElPais(BasicNewsRecipe):
    remove_javascript = True
    no_stylesheets = True
-    keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','caja_despiece']})]
+    keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','cuerpo_noticia','caja_despiece']})]
    extra_css             = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} '
--- a/recipes/frandroid.recipe
+++ b/recipes/frandroid.recipe
@ -0,0 +1,8 @@
 # -*- coding: utf-8 -*-
 class BasicUserRecipe1318572550(AutomaticNewsRecipe):
    title          = u'FrAndroid'
    oldest_article = 2
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'FrAndroid', u'http://feeds.feedburner.com/Frandroid')]
--- a/recipes/googlemobileblog.recipe
+++ b/recipes/googlemobileblog.recipe
@ -0,0 +1,8 @@
 # -*- coding: utf-8 -*-
 class BasicUserRecipe1318572445(AutomaticNewsRecipe):
    title          = u'Google Mobile Blog'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Google Mobile Blog', u'http://googlemobile.blogspot.com/atom.xml')]
--- a/recipes/hankyoreh.recipe
+++ b/recipes/hankyoreh.recipe
@ -0,0 +1,50 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
 '''
 Profile to download The Hankyoreh
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class Hankyoreh(BasicNewsRecipe):
    title          = u'Hankyoreh'
    language = 'ko'
    description = u'The Hankyoreh News articles'
    __author__	= 'Seongkyoun Yoo'
    oldest_article = 5
    recursions = 1
    max_articles_per_feed = 5
    no_stylesheets         = True
    keep_only_tags    = [
 						dict(name='tr', attrs={'height':['60px']}),
 						dict(id=['fontSzArea'])
                        ]
    remove_tags = [
       dict(target='_blank'),
 	   dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
 	   dict(name='iframe', attrs={'width':['590']}),
                  ]
    remove_tags_after  = [
                          dict(target='_top')
                         ]
    feeds = [
 	('All News','http://www.hani.co.kr/rss/'),
    ('Politics','http://www.hani.co.kr/rss/politics/'),
    ('Economy','http://www.hani.co.kr/rss/economy/'),
    ('Society','http://www.hani.co.kr/rss/society/'),
    ('International','http://www.hani.co.kr/rss/international/'),
    ('Culture','http://www.hani.co.kr/rss/culture/'),
    ('Sports','http://www.hani.co.kr/rss/sports/'),
    ('Science','http://www.hani.co.kr/rss/science/'),
    ('Opinion','http://www.hani.co.kr/rss/opinion/'),
    ('Cartoon','http://www.hani.co.kr/rss/cartoon/'),
    ('English Edition','http://www.hani.co.kr/rss/english_edition/'),
    ('Specialsection','http://www.hani.co.kr/rss/specialsection/'),
    ('Hanionly','http://www.hani.co.kr/rss/hanionly/'),
    ('Hkronly','http://www.hani.co.kr/rss/hkronly/'),
    ('Multihani','http://www.hani.co.kr/rss/multihani/'),
    ('Lead','http://www.hani.co.kr/rss/lead/'),
    ('Newsrank','http://www.hani.co.kr/rss/newsrank/'),
           ]
--- a/recipes/hankyoreh21.recipe
+++ b/recipes/hankyoreh21.recipe
@ -0,0 +1,26 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
 '''
 Profile to download The Hankyoreh
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Hankyoreh21(BasicNewsRecipe):
    title          = u'Hankyoreh21'
    language = 'ko'
    description = u'The Hankyoreh21 Magazine articles'
    __author__	= 'Seongkyoun Yoo'
    oldest_article = 20
    recursions = 1
    max_articles_per_feed = 120
    no_stylesheets         = True
    remove_javascript     = True
    keep_only_tags    = [
 						dict(name='font', attrs={'class':'t18bk'}),
 						dict(id=['fontSzArea'])
                        ]
    feeds = [
 	('Hani21','http://h21.hani.co.kr/rss/ '),
           ]
--- a/recipes/korben.recipe
+++ b/recipes/korben.recipe
@ -0,0 +1,18 @@
 # -*- coding: utf-8 -*-
 class BasicUserRecipe1318619728(AutomaticNewsRecipe):
    title          = u'Korben'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Korben', u'http://feeds2.feedburner.com/KorbensBlog-UpgradeYourMind')]
    def get_masthead_url(self):
        masthead = 'http://korben.info/wp-content/themes/korben-steaw/hab/logo.png'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
--- a/recipes/korea_herald.recipe
+++ b/recipes/korea_herald.recipe
@ -1,36 +1,35 @@
-__license__   = 'GPL v3'
+__license__   = 'GPL v3'
-__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
+__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
-'''
+'''
-Profile to download KoreaHerald
+Profile to download KoreaHerald
-'''
+'''
-from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds.news import BasicNewsRecipe
-
+
-class KoreaHerald(BasicNewsRecipe):
+class KoreaHerald(BasicNewsRecipe):
-    title          = u'KoreaHerald'
+    title          = u'KoreaHerald'
-    language = 'en'
+    language = 'en'
-    description = u'Korea Herald News articles'
+    description = u'Korea Herald News articles'
-    __author__	= 'Seongkyoun Yoo'
+    __author__	= 'Seongkyoun Yoo'
-    oldest_article = 10
+    oldest_article = 15
-    recursions = 3
+    recursions = 3
-    max_articles_per_feed = 10
+    max_articles_per_feed = 15
-    no_stylesheets         = True
+    no_stylesheets         = True
-    keep_only_tags    = [
+    keep_only_tags    = [
-						dict(id=['contentLeft', '_article'])
+						dict(id=['contentLeft', '_article'])
-                        ]
+                        ]
-
+
-    remove_tags = [
+    remove_tags = [
-       dict(name='iframe'),
+       dict(name='iframe'),
-       dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
+       dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
-       dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
+       dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
-       ]
+       ]
-
+
-    feeds = [
+    feeds = [
-	('All News','http://www.koreaherald.com/rss/020000000000.xml'),
+    ('National','http://www.koreaherald.com/rss/020100000000.xml'),
-    ('National','http://www.koreaherald.com/rss/020100000000.xml'),
+    ('Business','http://www.koreaherald.com/rss/020200000000.xml'),
-    ('Business','http://www.koreaherald.com/rss/020200000000.xml'),
+    ('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
-    ('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
+    ('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
-    ('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
+    ('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
-    ('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
+    ('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
-    ('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
+    ('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
-    ('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
+	]
 	]
--- a/recipes/kstar.recipe
+++ b/recipes/kstar.recipe
@ -1,7 +1,7 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1282101454(BasicNewsRecipe):
-    title = 'Kansascity Star'
+    title = 'Kansas City Star'
    language = 'en'
    __author__ = 'TonytheBookworm'
    description = 'www.kansascity.com feed'
--- a/recipes/kyungyhang
+++ b/recipes/kyungyhang
@ -0,0 +1,37 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
 '''
 Profile to download The Kyungyhang
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Kyungyhang(BasicNewsRecipe):
    title          = u'Kyungyhang'
    language = 'ko'
    description = u'The Kyungyhang Shinmun articles'
    __author__	= 'Seongkyoun Yoo'
    oldest_article = 20
    recursions = 2
    max_articles_per_feed = 20
    no_stylesheets         = True
    remove_javascript     = True
    keep_only_tags    = [
                        dict(name='div', attrs ={'class':['article_title_wrap']}),
                        dict(name='div', attrs ={'class':['article_txt']})
                        ]
    remove_tags_after = dict(id={'sub_bottom'})
    remove_tags = [
       dict(name='iframe'),
       dict(id={'TdHot'}),
       dict(name='div', attrs={'class':['btn_list','bline','linebottom','bestArticle']}),
       dict(name='dl', attrs={'class':['CL']}),
       dict(name='ul', attrs={'class':['tab']}),
       ]
    feeds = [
 	('All News','http://www.khan.co.kr/rss/rssdata/total_news.xml'),
           ]
--- a/recipes/la_republica.recipe
+++ b/recipes/la_republica.recipe
@ -1,32 +1,37 @@
 __license__   = 'GPL v3'
 __author__    = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
 __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
-description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version'
+description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version'
 '''
 http://www.repubblica.it/
 '''
 import re
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
 class LaRepubblica(BasicNewsRecipe):
-    title                = 'La Repubblica'
+    title                   = 'La Repubblica'
-    __author__           = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
+    __author__              = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
-    description          = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
+    description             = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
-    masthead_url         = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
+    masthead_url            = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
-    publisher            = 'Gruppo editoriale L\'Espresso'
+    publisher               = 'Gruppo editoriale L\'Espresso'
-    category             = 'News, politics, culture, economy, general interest'
+    category                = 'News, politics, culture, economy, general interest'
-    language             = 'it'
+    language                = 'it'
-    timefmt              = '[%a, %d %b, %Y]'
+    timefmt                 = '[%a, %d %b, %Y]'
-    oldest_article       = 5
+    oldest_article          = 5
-    encoding             = 'utf8'
+    encoding                = 'utf8'
-    use_embedded_content = False
+    use_embedded_content    = False
-    #recursion           = 10
+    no_stylesheets          = True
-    no_stylesheets       = True
+    publication_type        = 'newspaper'
-    extra_css            = """
+    articles_are_obfuscated = True    
-                              img{display: block}
+    temp_files              = []    
-                           """
+    extra_css               = """
                               img{display: block}
                              """
    remove_attributes = ['width','height','lang','xmlns:og','xmlns:fb']
    preprocess_regexps = [
        (re.compile(r'.*?<head>', re.DOTALL|re.IGNORECASE), lambda match: '<head>'),
@ -35,11 +40,28 @@ class LaRepubblica(BasicNewsRecipe):
    ]
    def get_article_url(self, article):
-        link = article.get('id', article.get('guid', None))
+        link = BasicNewsRecipe.get_article_url(self, article)
-        if link is None:
+        if link and not '.repubblica.it/' in link:
-            return article
+            link2 = article.get('id', article.get('guid', None))
-        return link
+            if link2:
-    
+                link = link2
        return link.rpartition('?')[0]        
    def get_obfuscated_article(self, url):
        count = 0
        while (count < 10):
            try:
                response = self.browser.open(url)
                html = response.read()
                count = 10
            except:
                print "Retrying download..."
            count += 1        
        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
    keep_only_tags     = [
                          dict(attrs={'class':'articolo'}),
                          dict(attrs={'class':'body-text'}),
@ -49,7 +71,7 @@ class LaRepubblica(BasicNewsRecipe):
    remove_tags        = [
-                            dict(name=['object','link','meta']),
+                            dict(name=['object','link','meta','iframe','embed']),
                            dict(name='span',attrs={'class':'linkindice'}),
                            dict(name='div', attrs={'class':'bottom-mobile'}),
                            dict(name='div', attrs={'id':['rssdiv','blocco']}),
@ -80,3 +102,11 @@ class LaRepubblica(BasicNewsRecipe):
                       (u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
                      ]
    def preprocess_html(self, soup):
        for item in soup.findAll(['hgroup','deresponsabilizzazione','per']):
            item.name = 'div'
            item.attrs = []            
        for item in soup.findAll(style=True):
            del item['style']           
        return soup
--- a/recipes/lepoint.recipe
+++ b/recipes/lepoint.recipe
@ -0,0 +1,76 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
 '''
 LePoint.fr
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class lepoint(BasicNewsRecipe):
    title                  = 'Le Point'
    __author__             = 'calibre'
    description            = 'Actualités'
    encoding               = 'utf-8'
    publisher              = 'LePoint.fr'
    category               = 'news, France, world'
    language               = 'fr'
    use_embedded_content   = False
    timefmt                = ' [%d %b %Y]'
    max_articles_per_feed  = 15
    no_stylesheets         = True
    remove_empty_feeds     = True
    filterDuplicates       = True
    extra_css = '''
                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    .chapo {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
                    .info_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .media_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    remove_tags = [
            dict(name='iframe'),
            dict(name='div', attrs={'class':['entete_chroniqueur']}),
            dict(name='div', attrs={'class':['col_article']}),
            dict(name='div', attrs={'class':['signature_article']}),
            dict(name='div', attrs={'class':['util_font util_article']}),
            dict(name='div', attrs={'class':['util_article bottom']})
    ]
    keep_only_tags    = [dict(name='div', attrs={'class':['page_article']})]
    remove_tags_after  = dict(name='div', attrs={'class':['util_article bottom']})
    feeds = [
        (u'À la une', 'http://www.lepoint.fr/rss.xml'),
        ('International', 'http://www.lepoint.fr/monde/rss.xml'),
        ('Tech/Web', 'http://www.lepoint.fr/high-tech-internet/rss.xml'),
        ('Sciences', 'http://www.lepoint.fr/science/rss.xml'),
        ('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
        (u'Socièté', 'http://www.lepoint.fr/societe/rss.xml'),
        ('Politique', 'http://www.lepoint.fr/politique/rss.xml'),
        (u'Médias', 'http://www.lepoint.fr/medias/rss.xml'),
        ('Culture', 'http://www.lepoint.fr/culture/rss.xml'),
        (u'Santé', 'http://www.lepoint.fr/sante/rss.xml'),
        ('Sport', 'http://www.lepoint.fr/sport/rss.xml')
    ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    def get_masthead_url(self):
        masthead = 'http://www.lepoint.fr/images/commun/logo.png'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
--- a/recipes/lexpress.recipe
+++ b/recipes/lexpress.recipe
@ -0,0 +1,74 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
 '''
 Lexpress.fr
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class lepoint(BasicNewsRecipe):
    title                  = 'L\'express'
    __author__             = 'calibre'
    description            = 'Actualités'
    encoding               = 'cp1252'
    publisher              = 'LExpress.fr'
    category               = 'Actualité, France, Monde'
    language               = 'fr'
    use_embedded_content   = False
    timefmt                = ' [%d %b %Y]'
    max_articles_per_feed  = 15
    no_stylesheets         = True
    remove_empty_feeds     = True
    filterDuplicates       = True
    extra_css = '''
                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    .current_parent, p.heure, .ouverture {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    #contenu-article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                    .entete { font-weiht:bold;}
                '''
    remove_tags = [
            dict(name='iframe'),
            dict(name='div', attrs={'class':['barre-outil-fb']}),
            dict(name='div', attrs={'class':['barre-outils']}),
            dict(id='bloc-sommaire'),
            dict(id='footer-article')
    ]
    keep_only_tags    = [dict(name='div', attrs={'class':['bloc-article']})]
    remove_tags_after  = dict(id='content-article')
    feeds = [
        (u'À la une', 'http://www.lexpress.fr/rss/alaune.xml'),
        ('International', 'http://www.lexpress.fr/rss/monde.xml'),
        ('Tech/Web', 'http://www.lexpress.fr/rss/high-tech.xml'),
        (u'Sciences/Santé', 'http://www.lexpress.fr/rss/science-et-sante.xml'),
        (u'Envronnement', 'http://www.lexpress.fr/rss/environnement.xml'),
        ('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
        (u'Socièté', 'http://www.lexpress.fr/rss/societe.xml'),
        ('Politique', 'http://www.lexpress.fr/rss/politique.xml'),
        (u'Médias', 'http://www.lexpress.fr/rss/medias.xml'),
        ('Culture', 'http://www.lexpress.fr/rss/culture.xml'),
        ('Sport', 'http://www.lexpress.fr/rss/sport.xml')
    ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    def get_masthead_url(self):
        masthead = 'http://static.lexpress.fr/imgstat/logo_lexpress.gif'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
--- a/recipes/liberation.recipe
+++ b/recipes/liberation.recipe
@ -9,39 +9,72 @@ liberation.fr
 from calibre.web.feeds.news import BasicNewsRecipe
 class Liberation(BasicNewsRecipe):
    title                 = u'Liberation'
-    __author__            = 'Darko Miletic'
+    __author__            = 'calibre'
-    description           = 'News from France'
+    description           = 'Actualités'
-    language = 'fr'
+    category               = 'Actualités, France, Monde'
    language              = 'fr'
-    oldest_article        = 7
+    use_embedded_content   = False
-    max_articles_per_feed = 100
+    timefmt                = ' [%d %b %Y]'
-    no_stylesheets        = True
+    max_articles_per_feed  = 15
-    use_embedded_content  = False
+    no_stylesheets         = True
    remove_empty_feeds     = True
    filterDuplicates       = True
-    html2lrf_options = ['--base-font-size', '10']
+    extra_css = '''
                    h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
                    h4, h5, h2.rubrique,  {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .mna-body, entry-body  {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    keep_only_tags    = [
-                           dict(name='h1')
+                  dict(name='div', attrs={'class':'article'})
-                          #,dict(name='div', attrs={'class':'object-content text text-item'})
+                  ,dict(name='div', attrs={'class':'text-article m-bot-s1'})
-                          ,dict(name='div', attrs={'class':'article'})
+                  ,dict(name='div', attrs={'class':'entry'})
-                          #,dict(name='div', attrs={'class':'articleContent'})
+                  ,dict(name='div', attrs={'class':'col_contenu'})
-                          ,dict(name='div', attrs={'class':'entry'})
+    ]
-                        ]
+
-    remove_tags_after = [ dict(name='div',attrs={'class':'toolbox extra_toolbox'}) ]
+    remove_tags_after = [
        dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']})
        ,dict(name='p',attrs={'class':['chapo']})
        ,dict(id='_twitter_facebook')
    ]
    remove_tags    = [
-                        dict(name='p', attrs={'class':'clear'})
+                        dict(name='iframe')
-                       ,dict(name='ul', attrs={'class':'floatLeft clear'})
+                        ,dict(name='a', attrs={'class':'lnk-comments'})
-                       ,dict(name='div', attrs={'class':'clear floatRight'})
+                        ,dict(name='div', attrs={'class':'toolbox'})
-                       ,dict(name='object')
+                        ,dict(name='ul', attrs={'class':'share-box'})
-                       ,dict(name='div', attrs={'class':'toolbox'})
+                        ,dict(name='ul', attrs={'class':'tool-box'})
-                       ,dict(name='div', attrs={'class':'cartridge cartridge-basic-bubble cat-zoneabo'})
+                        ,dict(name='ul', attrs={'class':'rub'})
-                       #,dict(name='div', attrs={'class':'clear block block-call-items'})
+                        ,dict(name='p',attrs={'class':['chapo']})
-                       ,dict(name='div', attrs={'class':'block-content'})
+                        ,dict(name='p',attrs={'class':['tag']})
                        ,dict(name='div',attrs={'class':['blokLies']})
                        ,dict(name='div',attrs={'class':['alire']})
                        ,dict(id='_twitter_facebook')
                     ]
    feeds          = [
-                         (u'La une', u'http://www.liberation.fr/rss/laune')
+                         (u'La une', u'http://rss.liberation.fr/rss/9/')
-                        ,(u'Monde' , u'http://www.liberation.fr/rss/monde')
+                        ,(u'Monde' , u'http://www.liberation.fr/rss/10/')
-                        ,(u'Sports', u'http://www.liberation.fr/rss/sports')
+                        ,(u'Économie', u'http://www.liberation.fr/rss/13/')
                        ,(u'Politiques', u'http://www.liberation.fr/rss/11/')
                        ,(u'Société', u'http://www.liberation.fr/rss/12/')
                        ,(u'Cinéma', u'http://www.liberation.fr/rss/58/')
                        ,(u'Écran', u'http://www.liberation.fr/rss/53/')
                        ,(u'Sports', u'http://www.liberation.fr/rss/12/')
                     ]
    def get_masthead_url(self):
        masthead = 'http://s0.libe.com/libe/img/common/logo-liberation-150.png'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
--- a/recipes/los_tiempos_bo.recipe
+++ b/recipes/los_tiempos_bo.recipe
@ -22,7 +22,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
    publication_type      = 'newspaper'
    delay                 = 1
    remove_empty_feeds    = True
-    cover_url             = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_md_1.jpg')
+    cover_url             = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_gd_1.jpg')
    masthead_url          = 'http://www.lostiempos.com/img_stat/logo_tiempos_sin_beta.jpg'
    extra_css             = """ body{font-family: Arial,Helvetica,sans-serif }
                                img{margin-bottom: 0.4em}
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@ -4,26 +4,27 @@ __copyright__ = '2010-2011, Eddie Lau'
 # Region - Hong Kong, Vancouver, Toronto
 __Region__ = 'Hong Kong'
 # Users of Kindle 3 with limited system-level CJK support
-# please replace the following "True" with "False".
+# please replace the following "True" with "False". (Default: True)
 __MakePeriodical__ = True
-# Turn below to True if your device supports display of CJK titles
+# Turn below to True if your device supports display of CJK titles (Default: False)
 __UseChineseTitle__ = False
-# Set it to False if you want to skip images
+# Set it to False if you want to skip images (Default: True)
 __KeepImages__ = True
-# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source
+# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
 __UseLife__ = True
-# (HK only) It is to disable the column section which is now a premium content
+# (HK only) It is to disable premium content (Default: False)
-__InclCols__ = False
+__InclPremium__ = False
-# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats
+# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
-__ParsePFF__ = False
+__ParsePFF__ = True
-# (HK only) Turn below to True if you wish hi-res images
+# (HK only) Turn below to True if you wish hi-res images (Default: False)
 __HiResImg__ = False
 '''
 Change Log:
 2011/10/17: disable fetching of premium content, also improved txt source parsing
 2011/10/04: option to get hi-res photos for the articles
-2011/09/21: fetching "column" section is made optional.
+2011/09/21: fetching "column" section is made optional. 
 2011/09/18: parse "column" section stuff from source text file directly.
 2011/09/07: disable "column" section as it is no longer offered free.
 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
@ -72,7 +73,7 @@ class MPRecipe(BasicNewsRecipe):
                          dict(attrs={'class':['content']}),  # for content from txt
                          dict(attrs={'class':['photo']}),
                          dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}),  # content in printed version of life.mingpao.com
-                          dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}), # images for source from life.mingpao.com
+                          dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
                          dict(attrs={'class':['images']})   # for images from txt
                          ]
        if __KeepImages__:
@ -208,18 +209,21 @@ class MPRecipe(BasicNewsRecipe):
                                           (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
                                          ]:
-                    articles = self.parse_section2(url, keystr)
+                    if __InclPremium__ == True:
                        articles = self.parse_section2_txt(url, keystr)
                    else:
                        articles = self.parse_section2(url, keystr)
                    if articles:
                        feeds.append((title, articles))
-                if __InclCols__ == True:
+                if __InclPremium__ == True:
                    # parse column section articles directly from .txt files
                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
                                              ]:
                        articles = self.parse_section2_txt(url, keystr)
                        if articles:
                            feeds.append((title, articles))
-
+                        
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
                    articles = self.parse_section(url)
@ -253,10 +257,10 @@ class MPRecipe(BasicNewsRecipe):
                #    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
                for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
-                    articles = self.parse_section2(url, keystr)
+                    articles = self.parse_section2_txt(url, keystr)
                    if articles:
                        feeds.append((title, articles))
-
+                        
                #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
                #                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
                #    articles = self.parse_section(url)
@ -270,18 +274,18 @@ class MPRecipe(BasicNewsRecipe):
                for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
                                          ]:
-                    articles = self.parse_section2(url, keystr)
+                    articles = self.parse_section2_txt(url, keystr)
                    if articles:
                        feeds.append((title, articles))
-
+                        
-                if __InclCols__ == True:
+                if __InclPremium__ == True:
                    # parse column section articles directly from .txt files
                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
                                              ]:
                        articles = self.parse_section2_txt(url, keystr)
                        if articles:
                            feeds.append((title, articles))
-
+                            
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
                    articles = self.parse_section(url)
@ -333,7 +337,7 @@ class MPRecipe(BasicNewsRecipe):
            url = 'http://news.mingpao.com/' + dateStr + '/' +url
            # replace the url to the print-friendly version
            if __ParsePFF__ == True:
-                if url.rfind('Redirect') <> -1:
+                if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
                    url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
                    url = re.sub('%2F.*%2F', '/', url)
                    title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
@ -349,6 +353,8 @@ class MPRecipe(BasicNewsRecipe):
    # parse from life.mingpao.com
    def parse_section2(self, url, keystr):
        br = mechanize.Browser()
        br.set_handle_redirect(False)
        self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href=True)
@ -359,9 +365,13 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(i)
            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
-                url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
+                try: 
-                current_articles.append({'title': title, 'url': url, 'description': ''})
+                    br.open_novisit(url)
-                included_urls.append(url)
+                    url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
                    current_articles.append({'title': title, 'url': url, 'description': ''})
                    included_urls.append(url)
                except:
 				    print 'skipping a premium article'
        current_articles.reverse()
        return current_articles
@ -382,7 +392,7 @@ class MPRecipe(BasicNewsRecipe):
                included_urls.append(url)
        current_articles.reverse()
        return current_articles
-
+        
    # parse from www.mingpaovan.com
    def parse_section3(self, url, baseUrl):
        self.get_fetchdate()
@ -470,23 +480,23 @@ class MPRecipe(BasicNewsRecipe):
        #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
        if __HiResImg__ == True:
            # TODO: add a _ in front of an image url
-            if url.rfind('news.mingpao.com') > -1:
+            if url.rfind('news.mingpao.com') > -1: 
                imglist =  re.findall('src="?.*?jpg"', raw_html)
                br = mechanize.Browser()
                br.set_handle_redirect(False)
                for img in imglist:
                    gifimg = img.replace('jpg"', 'gif"')
-                    try:
+                    try: 
                        br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
                        raw_html = raw_html.replace(img, gifimg)
-                    except:
+                    except: 
                        # find the location of the first _
                        pos = img.find('_')
                        if pos > -1:
                            # if found, insert _ after the first _
                            newimg = img[0:pos] + '_' + img[pos:]
                            raw_html = raw_html.replace(img, newimg)
-                        else:
+                        else: 
                            # if not found, insert _ after "
                            raw_html = raw_html.replace(img[1:], '"_' + img[1:])
            elif url.rfind('life.mingpao.com') > -1:
@ -510,7 +520,7 @@ class MPRecipe(BasicNewsRecipe):
                        pos = img.rfind('/')
                        newimg = img[0:pos+1] + '_' + img[pos+1:]
                        #print 'newimg: ', newimg
-                        raw_html = raw_html.replace(img, newimg)
+                        raw_html = raw_html.replace(img, newimg) 
        if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
            return raw_html
        else:
@ -549,10 +559,11 @@ class MPRecipe(BasicNewsRecipe):
                        photo = photo.replace('class="photo"', '')
                        new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
                return new_raw_html + '</body></html>'
-            else:
+            else: 
                # .txt based file
                splitter = re.compile(r'\n') # Match non-digits
                new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
                next_is_mov_link = False
                next_is_img_txt = False
                title_started = False
                met_article_start_char = False
@ -561,24 +572,35 @@ class MPRecipe(BasicNewsRecipe):
                        met_article_start_char = True
                        new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
                    else:
-                        if next_is_img_txt == False:
+                        if next_is_img_txt == False and next_is_mov_link == False:
-                            if item.startswith('='):
+                            item = item.strip()
                            if item.startswith("=@"):
                                next_is_mov_link = True
                            elif item.startswith("=?"):
                                next_is_img_txt = True
                                new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
                            elif item.startswith('='):
                                next_is_img_txt = True
                                new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
                            else:
-                                if met_article_start_char == False:
+                                if item <> '': 
-                                    if title_started == False:
+                                    if next_is_img_txt == False and met_article_start_char == False:
-                                        new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
+                                        if title_started == False:
-                                        title_started = True
+                                            #print 'Title started at ', item
                                            new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
                                            title_started = True
                                        else:
                                            new_raw_html = new_raw_html + item + '\n'
                                    else:
-                                        new_raw_html = new_raw_html + item + '\n'
+                                        new_raw_html = new_raw_html + item + '<p>\n'
                                else:
                                    new_raw_html = new_raw_html + item + '<p>\n'
                        else:
-                            next_is_img_txt = False
+                            if next_is_mov_link == True:
-                            new_raw_html = new_raw_html + item + '\n'
+                                next_is_mov_link = False
                            else: 
                                next_is_img_txt = False
                                new_raw_html = new_raw_html + item + '\n'
                return new_raw_html + '</div></body></html>'
-
+            
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
@ -587,7 +609,7 @@ class MPRecipe(BasicNewsRecipe):
        for item in soup.findAll(stype=True):
            del item['absmiddle']
        return soup
-
+        
    def create_opf(self, feeds, dir=None):
        if dir is None:
            dir = self.output_dir
@ -678,7 +700,7 @@ class MPRecipe(BasicNewsRecipe):
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
-                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else ('Untitled Article'),
+                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
                                    play_order=po, author=auth, description=desc)
                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
--- a/recipes/omgubuntu.recipe
+++ b/recipes/omgubuntu.recipe
@ -0,0 +1,18 @@
 # -*- coding: utf-8 -*-
 class BasicUserRecipe1318619832(AutomaticNewsRecipe):
    title          = u'OmgUbuntu'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Omg Ubuntu', u'http://feeds.feedburner.com/d0od')]
    def get_masthead_url(self):
        masthead = 'http://cdn.omgubuntu.co.uk/wp-content/themes/omgubuntu/images/logo.png'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
--- a/recipes/phoronix.recipe
+++ b/recipes/phoronix.recipe
@ -0,0 +1,47 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
 '''
 Fetch phoronix.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class cdnet(BasicNewsRecipe):
    title                  = 'Phoronix'
    __author__             = 'calibre'
    description            = 'Actualités Phoronix'
    encoding               = 'utf-8'
    publisher              = 'Phoronix.com'
    category               = 'news, IT, linux'
    language               = 'en'
    use_embedded_content   = False
    timefmt                = ' [%d %b %Y]'
    max_articles_per_feed  = 25
    no_stylesheets         = True
    remove_empty_feeds     = True
    filterDuplicates       = True
    extra_css = '''
                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    h2 {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .KonaBody {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    remove_tags = []
    remove_tags_before = dict(id='phxcms_content_phx')
    remove_tags_after  = dict(name='div', attrs={'class':'KonaBody'})
    feeds =  [('Phoronix', 'http://feeds.feedburner.com/Phoronix')]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/usatoday.recipe
+++ b/recipes/usatoday.recipe
@ -10,27 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class USAToday(BasicNewsRecipe):
-    title = 'USA Today'
+    title                  = 'USA Today'
-    __author__ = 'Kovid Goyal'
+    __author__             = 'calibre'
-    oldest_article = 1
+    description            = 'newspaper'
-    publication_type = 'newspaper'
+    encoding               = 'utf-8'
-    timefmt  = ''
+    publisher              = 'usatoday.com'
-    max_articles_per_feed = 20
+    category               = 'news, usa'
-    language = 'en'
+    language               = 'en'
-    no_stylesheets = True
+
-    extra_css = '.headline      {text-align:    left;}\n    \
+    use_embedded_content   = False
-                 .byline        {font-family:   monospace;  \
+    timefmt                = ' [%d %b %Y]'
-                                 text-align:    left;       \
+    max_articles_per_feed  = 15
-                                 margin-bottom: 1em;}\n     \
+    no_stylesheets         = True
-                 .image         {text-align:    center;}\n  \
+    remove_empty_feeds     = True
-                 .caption       {text-align:    center;     \
+    filterDuplicates       = True
-                                 font-size:     smaller;    \
+
-                                 font-style:    italic}\n   \
+    extra_css = '''
-                 .credit        {text-align:    right;      \
+                    h1, h2 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
-                                 margin-bottom: 0em;        \
+                    #post-attributes, .info, .clear {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
-                                 font-size:     smaller;}\n \
+                    #post-body, #content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
-                 .articleBody   {text-align:    left;}\n    '
+                '''
-    #simultaneous_downloads = 1
+
    feeds =  [
                ('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
                ('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
@ -43,15 +44,18 @@ class USAToday(BasicNewsRecipe):
                ('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
                ('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
                ('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
-                ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'),
+                ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories')
                ]
    keep_only_tags = [dict(attrs={'class':'story'})]
    remove_tags = [
            dict(attrs={'class':[
                                'share',
                                'reprints',
                                'inline-h3',
-                                'info-extras',
+                                'info-extras rounded',
                                'inset',
                                'ppy-outer',
                                'ppy-caption',
                                'comments',
@ -61,9 +65,13 @@ class USAToday(BasicNewsRecipe):
                                'tags',
                                'bottom-tools',
                                'sponsoredlinks',
                                'corrections'
                                ]}),
            dict(name='ul', attrs={'class':'inside-copy'}),
            dict(id=['pluck']),
-                  ]
+            dict(id=['updated']),
            dict(id=['post-date-updated'])
    ]
    def get_masthead_url(self):
--- a/recipes/zdnet.fr.recipe
+++ b/recipes/zdnet.fr.recipe
@ -0,0 +1,68 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
 '''
 Fetch zdnet.fr
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class zdnet(BasicNewsRecipe):
    title                  = 'ZDNet.fr'
    __author__             = 'calibre'
    description            = 'Actualités'
    encoding               = 'utf-8'
    publisher              = 'ZDNet.fr'
    category               = 'Actualité, Informatique, IT'
    language               = 'fr'
    use_embedded_content   = False
    timefmt                = ' [%d %b %Y]'
    max_articles_per_feed  = 15
    no_stylesheets         = True
    remove_empty_feeds     = True
    filterDuplicates       = True
    extra_css = '''
                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    .contentmetadata p {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    #content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    remove_tags = [
        dict(name='iframe'),
        dict(name='div', attrs={'class':['toolbox']}),
        dict(name='div', attrs={'class':['clear clearfix']}),
        dict(id='emailtoafriend'),
        dict(id='storyaudio'),
        dict(id='fbtwContainer'),
        dict(name='h5')
    ]
    remove_tags_before = dict(id='leftcol')
    remove_tags_after  = dict(id='content')
    feeds =  [
            ('Informatique', 'http://www.zdnet.fr/feeds/rss/actualites/informatique/'),
            ('Internet', 'http://www.zdnet.fr/feeds/rss/actualites/internet/'),
            ('Telecom', 'http://www.zdnet.fr/feeds/rss/actualites/telecoms/')
    ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    def get_masthead_url(self):
        masthead = 'http://www.zdnet.fr/images/base/logo.png'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
--- a/resources/templates/rtf.xsl
+++ b/resources/templates/rtf.xsl
@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <xsl:stylesheet version="1.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-    xmlns:html="http://www.w3.org/1999/xhtml"
+    xmlns="http://www.w3.org/1999/xhtml"
    xmlns:rtf="http://rtf2xml.sourceforge.net/"
    xmlns:c="calibre"
    extension-element-prefixes="c"
@ -63,11 +63,16 @@
    </xsl:template>
    <xsl:template name = "para">
-        <xsl:if test = "normalize-space(.) or child::*">
+        <xsl:element name = "p">
-            <xsl:element name = "p">
+            <xsl:choose>
-                <xsl:call-template name = "para-content"/>
+                <xsl:when test = "normalize-space(.) or child::*">
-            </xsl:element>
+                    <xsl:call-template name = "para-content"/>
-        </xsl:if>
+                </xsl:when>
                <xsl:otherwise>
                    <xsl:text>&#160;</xsl:text>
                </xsl:otherwise>
            </xsl:choose>
        </xsl:element>
    </xsl:template>
    <xsl:template name = "para_off">
@ -149,7 +154,7 @@
    <xsl:template match="rtf:doc-information" mode="header">
          <link rel="stylesheet" type="text/css" href="styles.css"/>
          <xsl:if test="not(rtf:title)">
-              <title>unamed</title>
+              <title>unnamed</title>
          </xsl:if>
        <xsl:apply-templates/>
    </xsl:template>
@ -445,7 +450,10 @@
    <xsl:template match = "rtf:field[@type='hyperlink']">
        <xsl:element name ="a">
-            <xsl:attribute name = "href"><xsl:if test="not(contains(@link, '/'))">#</xsl:if><xsl:value-of select = "@link"/></xsl:attribute>
+            <xsl:attribute name = "href">
                <xsl:if test = "not(contains(@link, '/'))">#</xsl:if>
                <xsl:value-of select = "@link"/>
            </xsl:attribute>
            <xsl:apply-templates/>
        </xsl:element>
    </xsl:template>
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -49,6 +49,15 @@ class ANDROID(USBMS):
                       0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
                       0x70c6 : [0x226]
                     },
            # Freescale
            0x15a2 : {
                0x0c01 : [0x226]
            },
            # Alcatel
            0x05c6 : {
                0x9018 : [0x0226],
            },
            # Sony Ericsson
            0xfce : {
@ -139,7 +148,8 @@ class ANDROID(USBMS):
    VENDOR_NAME      = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
            'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
-            'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'VIZIO']
+            'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
            'VIZIO', 'GOOGLE', 'FREESCAL']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -150,7 +160,7 @@ class ANDROID(USBMS):
            'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
            'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
            'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
-            'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008']
+            'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -62,7 +62,7 @@ class DevicePlugin(Plugin):
    #: Icon for this device
    icon = I('reader.png')
-    # Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations()
+    # Encapsulates an annotation fetched from the device
    UserAnnotation = namedtuple('Annotation','type, value')
    #: GUI displays this as a message if not None. Useful if opening can take a
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -13,6 +13,8 @@ import datetime, os, re, sys, json, hashlib
 from calibre.devices.kindle.apnx import APNXBuilder
 from calibre.devices.kindle.bookmark import Bookmark
 from calibre.devices.usbms.driver import USBMS
 from calibre.ebooks.metadata import MetaInformation
 from calibre import strftime
 '''
 Notes on collections:
@ -164,6 +166,121 @@ class KINDLE(USBMS):
        # This returns as job.result in gui2.ui.annotations_fetched(self,job)
        return bookmarked_books
    def generate_annotation_html(self, bookmark):
        from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
        # Returns <div class="user_annotations"> ... </div>
        last_read_location = bookmark.last_read_location
        timestamp = datetime.datetime.utcfromtimestamp(bookmark.timestamp)
        percent_read = bookmark.percent_read
        ka_soup = BeautifulSoup()
        dtc = 0
        divTag = Tag(ka_soup,'div')
        divTag['class'] = 'user_annotations'
        # Add the last-read location
        spanTag = Tag(ka_soup, 'span')
        spanTag['style'] = 'font-weight:bold'
        if bookmark.book_format == 'pdf':
            spanTag.insert(0,NavigableString(
                _("%(time)s<br />Last Page Read: %(loc)d (%(pr)d%%)") % \
                            dict(time=strftime(u'%x', timestamp.timetuple()),
                            loc=last_read_location,
                            pr=percent_read)))
        else:
            spanTag.insert(0,NavigableString(
                _("%(time)s<br />Last Page Read: Location %(loc)d (%(pr)d%%)") % \
                            dict(time=strftime(u'%x', timestamp.timetuple()),
                            loc=last_read_location,
                            pr=percent_read)))
        divTag.insert(dtc, spanTag)
        dtc += 1
        divTag.insert(dtc, Tag(ka_soup,'br'))
        dtc += 1
        if bookmark.user_notes:
            user_notes = bookmark.user_notes
            annotations = []
            # Add the annotations sorted by location
            # Italicize highlighted text
            for location in sorted(user_notes):
                if user_notes[location]['text']:
                    annotations.append(
                            _('<b>Location %(dl)d &bull; %(typ)s</b><br />%(text)s<br />') % \
                                        dict(dl=user_notes[location]['displayed_location'],
                                            typ=user_notes[location]['type'],
                                            text=(user_notes[location]['text'] if \
                                            user_notes[location]['type'] == 'Note' else \
                                            '<i>%s</i>' % user_notes[location]['text'])))
                else:
                    if bookmark.book_format == 'pdf':
                        annotations.append(
                                _('<b>Page %(dl)d &bull; %(typ)s</b><br />') % \
                                    dict(dl=user_notes[location]['displayed_location'],
                                        typ=user_notes[location]['type']))
                    else:
                        annotations.append(
                                _('<b>Location %(dl)d &bull; %(typ)s</b><br />') % \
                                    dict(dl=user_notes[location]['displayed_location'],
                                        typ=user_notes[location]['type']))
            for annotation in annotations:
                divTag.insert(dtc, annotation)
                dtc += 1
        ka_soup.insert(0,divTag)
        return ka_soup
    def add_annotation_to_library(self, db, db_id, annotation):
        from calibre.ebooks.BeautifulSoup import Tag
        bm = annotation
        ignore_tags = set(['Catalog', 'Clippings'])
        if bm.type == 'kindle_bookmark':
            mi = db.get_metadata(db_id, index_is_id=True)
            user_notes_soup = self.generate_annotation_html(bm.value)
            if mi.comments:
                a_offset = mi.comments.find('<div class="user_annotations">')
                ad_offset = mi.comments.find('<hr class="annotations_divider" />')
                if a_offset >= 0:
                    mi.comments = mi.comments[:a_offset]
                if ad_offset >= 0:
                    mi.comments = mi.comments[:ad_offset]
                if set(mi.tags).intersection(ignore_tags):
                    return
                if mi.comments:
                    hrTag = Tag(user_notes_soup,'hr')
                    hrTag['class'] = 'annotations_divider'
                    user_notes_soup.insert(0, hrTag)
                mi.comments += unicode(user_notes_soup.prettify())
            else:
                mi.comments = unicode(user_notes_soup.prettify())
            # Update library comments
            db.set_comment(db_id, mi.comments)
            # Add bookmark file to db_id
            db.add_format_with_hooks(db_id, bm.value.bookmark_extension,
                                            bm.value.path, index_is_id=True)
        elif bm.type == 'kindle_clippings':
            # Find 'My Clippings' author=Kindle in database, or add
            last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple())
            mc_id = list(db.data.search_getting_ids('title:"My Clippings"', ''))
            if mc_id:
                db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'],
                        index_is_id=True)
                mi = db.get_metadata(mc_id[0], index_is_id=True)
                mi.comments = last_update
                db.set_metadata(mc_id[0], mi)
            else:
                mi = MetaInformation('My Clippings', authors = ['Kindle'])
                mi.tags = ['Clippings']
                mi.comments = last_update
                db.add_books([bm.value['path']], ['txt'], [mi])
 class KINDLE2(KINDLE):
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -16,6 +16,7 @@ from calibre.devices.usbms.driver import USBMS, debug_print
 from calibre import prints
 from calibre.devices.usbms.books import CollectionsBookList
 from calibre.utils.magick.draw import save_cover_data_to
 from calibre.ptempfile import PersistentTemporaryFile
 class KOBO(USBMS):
@ -76,6 +77,11 @@ class KOBO(USBMS):
        self.book_class = Book
        self.dbversion = 7
    def create_annotations_path(self, mdata, device_path=None):
        if device_path:
            return device_path
        return USBMS.create_annotations_path(self, mdata)
    def books(self, oncard=None, end_session=True):
        from calibre.ebooks.metadata.meta import path_to_ext
@ -750,9 +756,12 @@ class KOBO(USBMS):
        blists = {}
        for i in paths:
-            if booklists[i] is not None:
+            try:
-               #debug_print('Booklist: ', i)
+                if booklists[i] is not None:
-               blists[i] = booklists[i]
+                    #debug_print('Booklist: ', i)
                    blists[i] = booklists[i]
            except IndexError:
                pass
        opts = self.settings()
        if opts.extra_customization:
            collections = [x.lower().strip() for x in
@ -865,3 +874,21 @@ class KOBO(USBMS):
                else:
                    debug_print("ImageID could not be retreived from the database")
    def prepare_addable_books(self, paths):
        '''
        The Kobo supports an encrypted epub refered to as a kepub
        Unfortunately Kobo decided to put the files on the device
        with no file extension.  I just hope that decision causes
        them as much grief as it does me :-)
        This has to make a temporary copy of the book files with a
        epub extension to allow Calibre's normal processing to
        deal with the file appropriately
        '''
        for idx, path in enumerate(paths):
            if path.find('kepub') >= 0:
                with closing(open(path)) as r:
                    tf = PersistentTemporaryFile(suffix='.epub')
                    tf.write(r.read())
                    paths[idx] = tf.name
        return paths
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -1068,6 +1068,12 @@ class Device(DeviceConfig, DevicePlugin):
        '''
        return {}
    def add_annotation_to_library(self, db, db_id, annotation):
        '''
        Add an annotation to the calibre library
        '''
        pass
    def create_upload_path(self, path, mdata, fname, create_dirs=True):
        path = os.path.abspath(path)
        maxlen = self.MAX_PATH_LEN
@ -1147,3 +1153,6 @@ class Device(DeviceConfig, DevicePlugin):
            os.makedirs(filedir)
        return filepath
    def create_annotations_path(self, mdata, device_path=None):
         return self.create_upload_path(os.path.abspath('/<storage>'), mdata, 'x.bookmark', create_dirs=False)
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -22,7 +22,7 @@ class CHMInput(InputFormatPlugin):
    def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
        from calibre.ebooks.chm.reader import CHMReader
        log.debug('Opening CHM file')
-        rdr = CHMReader(chm_path, log, self.opts)
+        rdr = CHMReader(chm_path, log, input_encoding=self.opts.input_encoding)
        log.debug('Extracting CHM to %s' % output_dir)
        rdr.extract_content(output_dir, debug_dump=debug_dump)
        self._chm_reader = rdr
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -40,14 +40,14 @@ class CHMError(Exception):
    pass
 class CHMReader(CHMFile):
-    def __init__(self, input, log, opts):
+    def __init__(self, input, log, input_encoding=None):
        CHMFile.__init__(self)
        if isinstance(input, unicode):
            input = input.encode(filesystem_encoding)
        if not self.LoadCHM(input):
            raise CHMError("Unable to open CHM file '%s'"%(input,))
        self.log = log
-        self.opts = opts
+        self.input_encoding = input_encoding
        self._sourcechm = input
        self._contents = None
        self._playorder = 0
@ -156,8 +156,8 @@ class CHMReader(CHMFile):
                    break
    def _reformat(self, data, htmlpath):
-        if self.opts.input_encoding:
+        if self.input_encoding:
-            data = data.decode(self.opts.input_encoding)
+            data = data.decode(self.input_encoding)
        try:
            data = xml_to_unicode(data, strip_encoding_pats=True)[0]
            soup = BeautifulSoup(data)
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -693,6 +693,8 @@ OptionRecommendation(name='sr3_replace',
    def unarchive(self, path, tdir):
        extract(path, tdir)
        files = list(walk(tdir))
        files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding)
                for f in files]
        from calibre.customize.ui import available_input_formats
        fmts = available_input_formats()
        for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -305,11 +305,13 @@ class RTFInput(InputFormatPlugin):
        html = 'index.xhtml'
        with open(html, 'wb') as f:
            res = transform.tostring(result)
-            res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
+            # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
            #clean multiple \n
            res = re.sub('\n+', '\n', res)
            # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
-            res = re.sub('\s*<body>', '<body>', res)
+            # res = re.sub('\s*<body>', '<body>', res)
-            res = re.sub('(?<=\n)\n{2}',
+            # res = re.sub('(?<=\n)\n{2}',
-                    u'<p>\u00a0</p>\n'.encode('utf-8'), res)
+                    # u'<p>\u00a0</p>\n'.encode('utf-8'), res)
            f.write(res)
        self.write_inline_css(inline_class, border_styles)
        stream.seek(0)
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -376,13 +376,13 @@ class ParseRtf:
                msg += 'self.__run_level is "%s"\n' % self.__run_level
                raise RtfInvalidCodeException, msg
            if self.__run_level > 1:
-                sys.stderr.write(_('File could be older RTF...\n'))
+                sys.stderr.write('File could be older RTF...\n')
            if found_destination:
                if self.__run_level > 1:
-                    sys.stderr.write(_(
+                    sys.stderr.write(
                        'File also has newer RTF.\n'
                        'Will do the best to convert.\n'
-                    ))
+                    )
            add_brackets_obj = add_brackets.AddBrackets(
                    in_file = self.__temp_file,
                    bug_handler = RtfInvalidCodeException,
--- a/src/calibre/ebooks/rtf2xml/add_brackets.py
+++ b/src/calibre/ebooks/rtf2xml/add_brackets.py
@ -11,11 +11,11 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import sys, os,  tempfile
+import sys, os, tempfile
 from calibre.ebooks.rtf2xml import copy, check_brackets
 # note to self. This is the first module in which I use tempfile. A good idea?
-"""
+
 """
 class AddBrackets:
    """
    Add brackets for old RTF.
@ -41,6 +41,7 @@ class AddBrackets:
        self.__copy = copy
        self.__write_to = tempfile.mktemp()
        self.__run_level = run_level
    def __initiate_values(self):
        """
        """
@ -82,14 +83,16 @@ class AddBrackets:
        'cw<ci<subscript_' ,
        'cw<ci<superscrip',
        'cw<ci<underlined' ,
-        'cw<ul<underlined' ,
+        # 'cw<ul<underlined' ,
        ]
    def __before_body_func(self, line):
        """
        """
        if self.__token_info == 'mi<mk<body-open_':
            self.__state = 'in_body'
        self.__write_obj.write(line)
    def __in_body_func(self, line):
        """
        """
@ -108,6 +111,7 @@ class AddBrackets:
            self.__state = 'after_control_word'
        else:
            self.__write_obj.write(line)
    def __after_control_word_func(self, line):
        """
        """
@ -122,6 +126,7 @@ class AddBrackets:
                self.__ignore_count = self.__ob_count
            else:
                self.__state = 'in_body'
    def __write_group(self):
        """
        """
@ -141,6 +146,7 @@ class AddBrackets:
            self.__write_obj.write(inline_string)
            self.__open_bracket = 1
        self.__temp_group = []
    def __change_permanent_group(self):
        """
        use temp group to change permanent group
@ -150,6 +156,7 @@ class AddBrackets:
            if token_info in self.__accept:
                att = line[20:-1]
                self.__inline[token_info] = att
    def __ignore_func(self, line):
        """
        Don't add any brackets while inside of brackets RTF has already
@ -159,12 +166,14 @@ class AddBrackets:
        if self.__token_info == 'cb<nu<clos-brack'and\
            self.__cb_count == self.__ignore_count:
            self.__state = 'in_body'
    def __check_brackets(self, in_file):
        self.__check_brack_obj = check_brackets.CheckBrackets\
            (file = in_file)
        good_br =  self.__check_brack_obj.check_brackets()[0]
        if not good_br:
            return 1
    def add_brackets(self):
        """
        """
--- a/src/calibre/gui2/actions/annotate.py
+++ b/src/calibre/gui2/actions/annotate.py
@ -5,14 +5,57 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os, datetime
 from PyQt4.Qt import pyqtSignal, QModelIndex, QThread, Qt
 from calibre.gui2 import error_dialog
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
 from calibre import strftime
 from calibre.gui2.actions import InterfaceAction
 from calibre.devices.usbms.device import Device
 from calibre.gui2.dialogs.progress import ProgressDialog
 class Updater(QThread): # {{{
    update_progress = pyqtSignal(int)
    update_done     = pyqtSignal()
    def __init__(self, parent, db, device, annotation_map, done_callback):
        QThread.__init__(self, parent)
        self.errors = {}
        self.db = db
        self.keep_going = True
        self.pd = ProgressDialog(_('Merging user annotations into database'), '',
                0, len(annotation_map), parent=parent)
        self.device = device
        self.annotation_map = annotation_map
        self.done_callback = done_callback
        self.pd.canceled_signal.connect(self.canceled)
        self.pd.setModal(True)
        self.pd.show()
        self.update_progress.connect(self.pd.set_value,
                type=Qt.QueuedConnection)
        self.update_done.connect(self.pd.hide, type=Qt.QueuedConnection)
    def canceled(self):
        self.keep_going = False
        self.pd.hide()
    def run(self):
        for i, id_ in enumerate(self.annotation_map):
            if not self.keep_going:
                break
            bm = Device.UserAnnotation(self.annotation_map[id_][0],
                    self.annotation_map[id_][1])
            try:
                self.device.add_annotation_to_library(self.db, id_, bm)
            except:
                import traceback
                self.errors[id_] = traceback.format_exc()
            self.update_progress.emit(i)
        self.update_done.emit()
        self.done_callback(self.annotation_map.keys(), self.errors)
 # }}}
 class FetchAnnotationsAction(InterfaceAction):
@ -41,13 +84,21 @@ class FetchAnnotationsAction(InterfaceAction):
                    fmts.append(format.lower())
            return fmts
        def get_device_path_from_id(id_):
            paths = []
            for x in ('memory', 'card_a', 'card_b'):
                x = getattr(self.gui, x+'_view').model()
                paths += x.paths_for_db_ids(set([id_]), as_map=True)[id_]
            return paths[0].path if paths else None
        def generate_annotation_paths(ids, db, device):
            # Generate path templates
            # Individual storage mount points scanned/resolved in driver.get_annotations()
            path_map = {}
            for id in ids:
                path = get_device_path_from_id(id)
                mi = db.get_metadata(id, index_is_id=True)
-                a_path = device.create_upload_path(os.path.abspath('/<storage>'), mi, 'x.bookmark', create_dirs=False)
+                a_path = device.create_annotations_path(mi, device_path=path)
                path_map[id] = dict(path=a_path, fmts=get_formats(id))
            return path_map
@ -78,166 +129,6 @@ class FetchAnnotationsAction(InterfaceAction):
                path_map)
    def annotations_fetched(self, job):
        from calibre.devices.usbms.device import Device
        from calibre.ebooks.metadata import MetaInformation
        from calibre.gui2.dialogs.progress import ProgressDialog
        from calibre.library.cli import do_add_format
        class Updater(QThread): # {{{
            update_progress = pyqtSignal(int)
            update_done     = pyqtSignal()
            FINISHED_READING_PCT_THRESHOLD = 96
            def __init__(self, parent, db, annotation_map, done_callback):
                QThread.__init__(self, parent)
                self.db = db
                self.pd = ProgressDialog(_('Merging user annotations into database'), '',
                        0, len(job.result), parent=parent)
                self.am = annotation_map
                self.done_callback = done_callback
                self.pd.canceled_signal.connect(self.canceled)
                self.pd.setModal(True)
                self.pd.show()
                self.update_progress.connect(self.pd.set_value,
                        type=Qt.QueuedConnection)
                self.update_done.connect(self.pd.hide, type=Qt.QueuedConnection)
            def generate_annotation_html(self, bookmark):
                # Returns <div class="user_annotations"> ... </div>
                last_read_location = bookmark.last_read_location
                timestamp = datetime.datetime.utcfromtimestamp(bookmark.timestamp)
                percent_read = bookmark.percent_read
                ka_soup = BeautifulSoup()
                dtc = 0
                divTag = Tag(ka_soup,'div')
                divTag['class'] = 'user_annotations'
                # Add the last-read location
                spanTag = Tag(ka_soup, 'span')
                spanTag['style'] = 'font-weight:bold'
                if bookmark.book_format == 'pdf':
                    spanTag.insert(0,NavigableString(
                        _("%(time)s<br />Last Page Read: %(loc)d (%(pr)d%%)") % \
                                    dict(time=strftime(u'%x', timestamp.timetuple()),
                                    loc=last_read_location,
                                    pr=percent_read)))
                else:
                    spanTag.insert(0,NavigableString(
                        _("%(time)s<br />Last Page Read: Location %(loc)d (%(pr)d%%)") % \
                                    dict(time=strftime(u'%x', timestamp.timetuple()),
                                    loc=last_read_location,
                                    pr=percent_read)))
                divTag.insert(dtc, spanTag)
                dtc += 1
                divTag.insert(dtc, Tag(ka_soup,'br'))
                dtc += 1
                if bookmark.user_notes:
                    user_notes = bookmark.user_notes
                    annotations = []
                    # Add the annotations sorted by location
                    # Italicize highlighted text
                    for location in sorted(user_notes):
                        if user_notes[location]['text']:
                            annotations.append(
                                    _('<b>Location %(dl)d &bull; %(typ)s</b><br />%(text)s<br />') % \
                                                dict(dl=user_notes[location]['displayed_location'],
                                                    typ=user_notes[location]['type'],
                                                    text=(user_notes[location]['text'] if \
                                                    user_notes[location]['type'] == 'Note' else \
                                                    '<i>%s</i>' % user_notes[location]['text'])))
                        else:
                            if bookmark.book_format == 'pdf':
                                annotations.append(
                                        _('<b>Page %(dl)d &bull; %(typ)s</b><br />') % \
                                            dict(dl=user_notes[location]['displayed_location'],
                                                typ=user_notes[location]['type']))
                            else:
                                annotations.append(
                                        _('<b>Location %(dl)d &bull; %(typ)s</b><br />') % \
                                            dict(dl=user_notes[location]['displayed_location'],
                                                typ=user_notes[location]['type']))
                    for annotation in annotations:
                        divTag.insert(dtc, annotation)
                        dtc += 1
                ka_soup.insert(0,divTag)
                return ka_soup
            '''
            def mark_book_as_read(self,id):
                read_tag = gprefs.get('catalog_epub_mobi_read_tag')
                if read_tag:
                    self.db.set_tags(id, [read_tag], append=True)
            '''
            def canceled(self):
                self.pd.hide()
            def run(self):
                ignore_tags = set(['Catalog','Clippings'])
                for (i, id) in enumerate(self.am):
                    bm = Device.UserAnnotation(self.am[id][0],self.am[id][1])
                    if bm.type == 'kindle_bookmark':
                        mi = self.db.get_metadata(id, index_is_id=True)
                        user_notes_soup = self.generate_annotation_html(bm.value)
                        if mi.comments:
                            a_offset = mi.comments.find('<div class="user_annotations">')
                            ad_offset = mi.comments.find('<hr class="annotations_divider" />')
                            if a_offset >= 0:
                                mi.comments = mi.comments[:a_offset]
                            if ad_offset >= 0:
                                mi.comments = mi.comments[:ad_offset]
                            if set(mi.tags).intersection(ignore_tags):
                                continue
                            if mi.comments:
                                hrTag = Tag(user_notes_soup,'hr')
                                hrTag['class'] = 'annotations_divider'
                                user_notes_soup.insert(0,hrTag)
                            mi.comments += user_notes_soup.prettify()
                        else:
                            mi.comments = unicode(user_notes_soup.prettify())
                        # Update library comments
                        self.db.set_comment(id, mi.comments)
                        '''
                        # Update 'read' tag except for Catalogs/Clippings
                        if bm.value.percent_read >= self.FINISHED_READING_PCT_THRESHOLD:
                            if not set(mi.tags).intersection(ignore_tags):
                                self.mark_book_as_read(id)
                        '''
                        # Add bookmark file to id
                        self.db.add_format_with_hooks(id, bm.value.bookmark_extension,
                                                      bm.value.path, index_is_id=True)
                        self.update_progress.emit(i)
                    elif bm.type == 'kindle_clippings':
                        # Find 'My Clippings' author=Kindle in database, or add
                        last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple())
                        mc_id = list(db.data.parse('title:"My Clippings"'))
                        if mc_id:
                            do_add_format(self.db, mc_id[0], 'TXT', bm.value['path'])
                            mi = self.db.get_metadata(mc_id[0], index_is_id=True)
                            mi.comments = last_update
                            self.db.set_metadata(mc_id[0], mi)
                        else:
                            mi = MetaInformation('My Clippings', authors = ['Kindle'])
                            mi.tags = ['Clippings']
                            mi.comments = last_update
                            self.db.add_books([bm.value['path']], ['txt'], [mi])
                self.update_done.emit()
                self.done_callback(self.am.keys())
        # }}}
        if not job.result: return
@ -246,9 +137,25 @@ class FetchAnnotationsAction(InterfaceAction):
                    _('User annotations generated from main library only'),
                    show=True)
        db = self.gui.library_view.model().db
        device = self.gui.device_manager.device
-        self.__annotation_updater = Updater(self.gui, db, job.result,
+        self.__annotation_updater = Updater(self.gui, db, device, job.result,
-                self.Dispatcher(self.gui.library_view.model().refresh_ids))
+                self.Dispatcher(self.annotations_updated))
        self.__annotation_updater.start()
    def annotations_updated(self, ids, errors):
        self.gui.library_view.model().refresh_ids(ids)
        if errors:
            db = self.gui.library_view.model().db
            entries = []
            for id_, tb in errors.iteritems():
                title = id_
                if isinstance(id_, type(1)):
                    title = db.title(id_, index_is_id=True)
                entries.extend([title, tb, ''])
            error_dialog(self.gui, _('Some errors'),
                    _('Could not fetch annotations for some books. Click '
                        'show details to see which ones.'),
                    det_msg='\n'.join(entries), show=True)
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@ -1239,11 +1239,14 @@ class DeviceBooksModel(BooksModel): # {{{
    def paths(self, rows):
        return [self.db[self.map[r.row()]].path for r in rows ]
-    def paths_for_db_ids(self, db_ids):
+    def paths_for_db_ids(self, db_ids, as_map=False):
-        res = []
+        res = defaultdict(list) if as_map else []
        for r,b in enumerate(self.db):
            if b.application_id in db_ids:
-                res.append((r,b))
+                if as_map:
                    res[b.application_id].append(b)
                else:
                    res.append((r,b))
        return res
    def get_collections_with_ids(self):
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -242,6 +242,10 @@ Replace ``192.168.1.2`` with the local IP address of the computer running |app|.
 If you get timeout errors while browsing the calibre catalog in Stanza, try increasing the connection timeout value in the stanza settings. Go to Info->Settings and increase the value of Download Timeout.
 .. note::
    As of iOS version 5 Stanza no longer works on Apple devices. Alternatives to Stanza are discussed `here <http://www.mobileread.com/forums/showthread.php?t=152789>`_.
 Using iBooks
 **************
@ -251,7 +255,7 @@ Start the Safari browser and type in the IP address and port of the computer run
 Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address.
-You wills ee a list of books in Safari, just click on the epub link for whichever book you want to read, Safari will then prompt you to open it with iBooks.
+You will see a list of books in Safari, just click on the epub link for whichever book you want to read, Safari will then prompt you to open it with iBooks.
 With the USB cable + iTunes