Merge from trunk

2025-11-10 08:36:58 -05:00 · 2011-12-23 09:13:13 +01:00 · 2011-12-23 09:13:13 +01:00 · b7f2bb23df
commit b7f2bb23df
parent b6531615d9 ad86b2bc00
147 changed files with 25886 additions and 21914 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,81 @@
 #   new recipes:
 #     - title: 

+- version: 0.8.32
+  date: 2011-12-23
+
+  new features:
+    - title: "Linux: When deleting books, send them to the recycle bin, instead of permanently deleting. This is the same behavior as on Windows and OS X."
+
+    - title: "Add a checkbox to allow users to disable the popup that asks if books should be auto-converted before sending to device"
+
+    - title: "Drivers for Droid Razr, Samsung GT-I9003 and Bookeen Odyssey"
+      tickets: [906356, 906056, 905862] 
+
+    - title: "Allow passing multiple filenames as command line arguments to calibre, to add multiple books."
+      tickets: [907968]
+ 
+  bug fixes:
+    - title: "MOBI Output: Fix regression in 0.8.30 that caused the use of hidden heading elements for the TOC to generate links in the wrong place."
+      tickets: [907156]
+
+    - title: "EPUB Output: Ensure directories have the correct permissions bits set when unzipping an epub with unzip on Unix"
+
+    - title: "Fix bottom most shortcuts in keyboard shortcuts for viewer not editable"
+
+    - title: "EPUB Output: Fix handling of self closing <audio> tags."
+      tickets: [906521]
+
+    - title: "MOBI Input: Map invalid <o:p> tags to <p> tags before parsing, to handle broken nesting."
+      tickets: [905715] 
+
+    - title: "Conversion pipeline: HTML5 parsing: Fix handling of XML namespaces. Fixes regression in 0.8.30 that caused some articles in some news downloads to appear blank when viewed in Adobe Digital Editions based readers"
+
+    - title: "Get Books: Gandalf store, fix price and cover detection"
+
+    - title: "EPUB Output: Fix the Flatten filenames option in EPUB Output causing duplicated manifest ids in rare cases."
+      tickets: [905692]
+
+    - title: "When adding books via ISBN, show the user the list of invalid ISBNs that will be ignored, if any, before starting the add operation."
+      tickets: [905690]
+
+    - title: "Fix unsmarten punctuation conversion option broken in 0.8.31."
+      tickets: [905596]
+
+    - title: "Fix broken evaluation of composite columns in save-to-disk"
+
+  improved recipes:
+    - Cosmopolitan UK
+    - Hindustan Times
+    - HVG
+    - moneynews.com
+    - Ming Pao
+    - Glasgow Herald
+    - Times of India
+    - Focus Magazine
+    - Hacker News
+    - Independent
+    - Sueddeutsche
+
+  new recipes:
+    - title: Prospect Magazine UK 
+      author: Barty and duoloz
+
+    - title: Elet es Irodalom and NOL 
+      author: Bigpapa
+
+    - title: Salonica Press News
+      author: SteliosGero
+
+    - title: Echo Online 
+      author: Armin Geller
+
+    - title: Various Polish news sources 
+      author: fenuks
+
+    - title: Various Italian news sources 
+      author: faber1971
+
 - version: 0.8.31
  date: 2011-12-16

--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@ -1,19 +1,38 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
+import re
 class Adventure_zone(BasicNewsRecipe):
    title          = u'Adventure Zone'
    __author__        = 'fenuks'
    description   = 'Adventure zone - adventure games from A to Z'
    category       = 'games'
    language       = 'pl'
-    oldest_article = 15
-    max_articles_per_feed = 100
    no_stylesheets = True
+    oldest_article = 20
+    max_articles_per_feed = 100
+    use_embedded_content=False
+    preprocess_regexps     = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
    remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
-    remove_tags_after= dict(name='td', attrs={'class':'main-body middle-border'})
+    remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
+    remove_tags_after= dict(id='comments')
    extra_css              = '.main-bg{text-align: left;}  td.capmain{ font-size: 22px; }'
    feeds          = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]

+    def parse_feeds (self): 
+      feeds = BasicNewsRecipe.parse_feeds(self) 
+      soup=self.index_to_soup(u'http://www.adventure-zone.info/fusion/feeds/news.php')
+      tag=soup.find(name='channel')
+      titles=[]
+      for r in tag.findAll(name='image'):
+          r.extract()
+      art=tag.findAll(name='item')
+      for i in art:
+            titles.append(i.title.string)
+      for feed in feeds:
+        for article in feed.articles[:]:
+            article.title=titles[feed.articles.index(article)]
+      return feeds
+
+
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
        cover=soup.find(id='box_OstatninumerAZ')
@ -22,17 +41,10 @@ class Adventure_zone(BasicNewsRecipe):


    def skip_ad_pages(self, soup):
-        skip_tag = soup.body.findAll(name='a')
-        if skip_tag is not None:
-            for r in skip_tag:
-                 if 'articles.php?' in r['href']:
-                     if r.strong is not None:
-                         word=r.strong.string
-                         if ('zapowied' or 'recenzj') in word:
-                             return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item_id'+r['href'][r['href'].find('_id')+3:], raw=True)
-        else:
-            None
-
-    def print_version(self, url):
-        return url.replace('news.php?readmore', 'print.php?type=N&item_id')
-
+        skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
+        skip_tag = skip_tag.findAll(name='a')
+        for r in skip_tag:
+           if r.strong:
+                 word=r.strong.string
+                 if word and (('zapowied' in word) or ('recenzj' in word)  or ('solucj' in word)):
+                   return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
--- a/recipes/astro_news_pl.recipe
+++ b/recipes/astro_news_pl.recipe
@ -1,5 +1,4 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
 class AstroNEWS(BasicNewsRecipe):
    title          = u'AstroNEWS'
    __author__        = 'fenuks'
@ -8,11 +7,16 @@ class AstroNEWS(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
-    auto_cleanup = True
+    #extra_css= 'table {text-align: left;}'
+    no_stylesheets=True
    cover_url='http://news.astronet.pl/img/logo_news.jpg'
-   # no_stylesheets= True
+    remove_tags=[dict(name='hr')]
    feeds          = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]

    def print_version(self, url):
        return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')

+    def preprocess_html(self, soup):
+        for item in soup.findAll(align=True):
+            del item['align']
+        return soup
--- a/recipes/biolog_pl.recipe
+++ b/recipes/biolog_pl.recipe
@ -0,0 +1,19 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class Biolog_pl(BasicNewsRecipe):
+    title          = u'Biolog.pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_empty_feeds=True
+    __author__        = 'fenuks'
+    description   = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.'
+    category       = 'biology'
+    language       = 'pl'
+    cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
+    no_stylesheets = True
+    #keeps_only_tags=[dict(id='main')]
+    remove_tags_before=dict(id='main')
+    remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
+    remove_tags=[dict(name='img', attrs={'alt':'Komentarze'})]
+    feeds          = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')]
--- a/recipes/birmingham_post.recipe
+++ b/recipes/birmingham_post.recipe
@ -0,0 +1,44 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class AdvancedUserRecipe1306097511(BasicNewsRecipe):
+    title          = u'Birmingham post'
+    description = 'News for Birmingham UK'
+    timefmt = ''
+    __author__ = 'Dave Asbury'
+    cover_url = 'http://1.bp.blogspot.com/_GwWyq5eGw9M/S9BHPHxW55I/AAAAAAAAB6Q/iGCWl0egGzg/s320/Birmingham+post+Lite+front.JPG'
+    oldest_article = 1
+    max_articles_per_feed = 20
+    remove_empty_feeds = True
+    remove_javascript     = True
+    auto_cleanup = True
+    language = 'en_GB'
+
+
+    masthead_url        = 'http://www.pressgazette.co.uk/Pictures/web/t/c/g/birmingham_post.jpg'
+
+
+    keep_only_tags = [
+    #dict(name='h1',attrs={'id' : 'article-headline'}),
+                    #dict(attrs={'class':['article-meta-author','article-meta-date','article main','art-o art-align-center otm-1 ']}),
+    #dict(name='p')
+    #dict(attrs={'id' : 'three-col'})
+        ]
+    remove_tags    = [
+             # dict(name='div',attrs={'class' : 'span-33 last header-links'})
+
+                               ]
+    feeds          = [
+        #(u'News',u'http://www.birminghampost.net/news/rss.xml'),
+        (u'Local News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
+        (u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'),
+        (u'Sports',u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'),
+        (u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml')
+
+         ]
+    extra_css  = '''
+                    body {font: sans-serif medium;}'
+    h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
+                h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
+                    span{ font-size:9.5px; font-weight:bold;font-style:italic}
+                    p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
+
+     '''
--- a/recipes/computerworld_pl.recipe
+++ b/recipes/computerworld_pl.recipe
@ -0,0 +1,22 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class Computerworld_pl(BasicNewsRecipe):
+    title          = u'Computerworld.pl'
+    __author__        = 'fenuks'
+    description   = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
+    category       = 'IT'
+    language       = 'pl'
+    no_stylesheets=True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    keep_only_tags=[dict(name='div', attrs={'id':'s'})]
+    remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
+    remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
+    feeds          = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.computerworld.pl/')
+        cover=soup.find(name='img', attrs={'class':'prawo'})
+        self.cover_url=cover['src']
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/cosmopolitan_uk.recipe
+++ b/recipes/cosmopolitan_uk.recipe
@ -7,6 +7,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'

    __author__ = 'Dave Asbury'
+    #last update 21/12/11
    # greyscale code by Starson
    cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
    no_stylesheets = True
@ -31,8 +32,9 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
                              dict(name='div',attrs={'class' : ['blogInfo','viral_toolbar','comment_number','prevEntry nav']}),
                              dict(name='div',attrs={'class' : 'blog_module_about_the_authors'}),
                              dict(attrs={'id': ['breadcrumbs','comment','related_links_list','right_rail','content_sec_fb_more','content_sec_mostpopularstories','content-sec_fb_frame_viewfb_bot']}),
-                              dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']})
-                            ]
+                              dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']}),
+                              dict(name='li',attrs={'class' : 'thumb'})
+              ]

    feeds          = [
        (u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')]
@ -48,4 +50,3 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
            img.type = "GrayscaleType"
            img.save(iurl)
        return soup
-
--- a/recipes/datasport.recipe
+++ b/recipes/datasport.recipe
@ -0,0 +1,15 @@
+__license__   = 'GPL v3'
+__author__    = 'faber1971'
+description   = 'Italian soccer news website - v1.00 (17, December 2011)'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1324114272(BasicNewsRecipe):
+    title          = u'Datasport'
+    language = 'it'
+    __author__ = 'faber1971'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    feeds          = [(u'Datasport', u'http://www.datasport.it/calcio/rss.xml')]
--- a/recipes/dziennik_pl.recipe
+++ b/recipes/dziennik_pl.recipe
@ -0,0 +1,58 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class Dziennik_pl(BasicNewsRecipe):
+    title          = u'Dziennik.pl'
+    __author__        = 'fenuks'
+    description   = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.'
+    category       = 'newspaper'
+    language       = 'pl'
+    cover_url='http://6.s.dziennik.pl/images/og_dziennik.jpg'
+    no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_javascript=True
+    remove_empty_feeds=True
+    preprocess_regexps     = [(re.compile("Komentarze:"), lambda m: '')]
+    keep_only_tags=[dict(id='article')]
+    remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget']}), dict(name='a', attrs={'class':'komentarz'})]
+    feeds          = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
+		(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
+		(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
+		(u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'),
+		(u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'),
+		(u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'),
+		(u'Film', u'http://rss.dziennik.pl/Dziennik-Film'),
+		(u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'),
+		(u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'),
+		(u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'),
+		(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
+		(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]
+
+    def append_page(self, soup, appendtag):
+        tag=soup.find('a', attrs={'class':'page_next'})
+        if tag:
+            appendtag.find('div', attrs={'class':'article_paginator'}).extract()
+        while tag:
+            soup2= self.index_to_soup(tag['href'])
+            tag=soup2.find('a', attrs={'class':'page_next'})
+            if not tag:
+                for r in appendtag.findAll('div', attrs={'class':'art_src'}):
+                    r.extract()
+            pagetext = soup2.find(name='div', attrs={'class':'article_body'})
+            for dictionary in self.remove_tags:
+                 v=pagetext.findAll(name=dictionary['name'], attrs=dictionary['attrs'])
+                 for delete in v:
+                     delete.extract()
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+            if appendtag.find('div', attrs={'class':'article_paginator'}):
+                appendtag.find('div', attrs={'class':'article_paginator'}).extract()
+
+
+
+
+    def preprocess_html(self, soup):
+         self.append_page(soup, soup.body)
+         return soup
--- a/recipes/echo_online.recipe
+++ b/recipes/echo_online.recipe
@ -0,0 +1,47 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
+'''
+Fetch echo-online.de
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+class Echo_Online(BasicNewsRecipe):
+    title          = u' Echo Online'
+    description = '-Echo Online-'
+    publisher = 'Echo Online GmbH'
+    category = 'News, Germany'
+    __author__ = 'Armin Geller' # 2011-12-17
+    language = 'de'
+    lang = 'de-DE'
+    encoding = 'iso-8859-1'
+    timefmt = ' [%a, %d %b %Y]'
+
+    oldest_article = 7
+    max_articles_per_feed = 2
+    no_stylesheets = True
+    auto_cleanup = True
+    remove_javascript = True
+
+    feeds = [
+              (u'Topnews', u'http://www.echo-online.de/storage/rss/rss/topnews.xml'),
+              (u'Darmstadt', u'http://www.echo-online.de/rss/darmstadt.xml'),
+              (u'Darmstadt-Dieburg', u'http://www.echo-online.de/rss/darmstadtdieburg.xml'),
+              (u'Kreis Gro\xdf-Gerau', u'http://www.echo-online.de/rss/kreisgrossgerau.xml'),
+              (u'R\xfcsselsheim', u'http://www.echo-online.de/rss/ruesselsheim.xml'),
+              (u'Kreis Bergstra\xdfe', u'http://www.echo-online.de/rss/bergstrasse.xml'),
+              (u'Odenwaldkreis', u'http://www.echo-online.de/rss/odenwald.xml'),
+              (u'SV 98', u'http://www.echo-online.de/rss/sv98.xml'),
+              (u'Kino', u'http://www.echo-online.de/rss/kino.xml'),
+              (u'Ausstellungen', u'http://www.echo-online.de/rss/ausstellungen.xml'),
+              (u'Ausflug & Reise', u'http://www.echo-online.de/rss/ausflugreise.xml'),
+             ]
+
+    def print_version(self, url):
+          return self.browser.open_novisit(url).geturl() + '?_FRAME=33&_FORMAT=PRINT'
+
+    remove_tags = [dict(name='div', attrs={'class':["header", "name"]}),]
+    auto_cleanup_keep = '//div[@class="bild_gross w270"]'
+
+#    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-ash2/41801_145340745513489_893927_n.jpg' # 2011-12-16 AGe
+    cover_url = 'http://adcounter.darmstaedter-echo.de/webdav/files/config/gui/images/Zeitungsfaecher.gif' # 2011-12-16 AGe
+
--- a/recipes/elet_es_irodalom.recipe
+++ b/recipes/elet_es_irodalom.recipe
@ -0,0 +1,48 @@
+################################################################################
+#Description:	  http://es.hu/ RSS channel
+#Author: 	  Bigpapa (bigpapabig@hotmail.com)
+#Date:	  2010.12.01. - V1.0
+################################################################################
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class elet_es_irodalom(BasicNewsRecipe):
+    title                  = u'Elet es Irodalom'
+    __author__             = 'Bigpapa'
+    oldest_article         = 7
+    max_articles_per_feed  = 20	# Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
+    no_stylesheets         = True
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'iso-8859-2'
+    category               = 'Cikkek'
+    language               = 'hu'
+    publication_type       = 'newsportal'
+    extra_css              = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} '
+
+    keep_only_tags    = [
+                       dict(name='div', attrs={'class':['doc_author', 'doc_title', 'doc']})
+                    
+                       	 ]
+
+    remove_tags = [
+	 dict(name='a', attrs={'target':['_TOP']}),
+	dict(name='div', attrs={'style':['float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}),
+	
+
+                      	  ]
+
+    
+
+    feeds          = [
+	(u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'), 
+	(u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'), 
+	(u'Visszhang', 'http://www.feed43.com/3727375706873086.xml'), 
+	(u'P\xe1ratlan oldal', 'http://www.feed43.com/2525784782475057.xml'), 
+	(u'Feuilleton', 'http://www.feed43.com/7216025082703073.xml'), 
+	(u'Pr\xf3za', 'http://www.feed43.com/8760248802326384.xml'), 
+	(u'Vers', 'http://www.feed43.com/1737324675134275.xml'), 
+	(u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'), 	
+	(u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml')
+
+                   	 ]
--- a/recipes/emuzica_pl.recipe
+++ b/recipes/emuzica_pl.recipe
@ -0,0 +1,16 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class eMuzyka(BasicNewsRecipe):
+    title          = u'eMuzyka'
+    __author__        = 'fenuks'
+    description   = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
+    category       = 'music'
+    language       = 'pl'
+    cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
+    no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
+    remove_tags=[dict(name='span', attrs={'id':'date'})]
+    feeds          = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
--- a/recipes/fisco_oggi.recipe
+++ b/recipes/fisco_oggi.recipe
@ -0,0 +1,18 @@
+__license__   = 'GPL v3'
+__author__    = 'faber1971'
+description   = 'Website of Italian Governament Income Agency (about revenue, taxation, taxes)- v1.00 (17, December 2011)'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1324112023(BasicNewsRecipe):
+    title          = u'Fisco Oggi'
+    language = 'it'
+    __author__ = 'faber1971'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    remove_javascript = True
+    no_stylesheets = True
+
+    feeds          = [(u'Attualit\xe0', u'http://www.fiscooggi.it/taxonomy/term/1/feed'), (u'Normativa', u'http://www.fiscooggi.it/taxonomy/term/5/feed'), (u'Giurisprudenza', u'http://www.fiscooggi.it/taxonomy/term/8/feed'), (u'Dati e statistiche', u'http://www.fiscooggi.it/taxonomy/term/12/feed'), (u'Analisi e commenti', u'http://www.fiscooggi.it/taxonomy/term/13/feed'), (u'Bilancio e contabilit\xe0', u'http://www.fiscooggi.it/taxonomy/term/576/feed'), (u'Dalle regioni', u'http://www.fiscooggi.it/taxonomy/term/16/feed'), (u'Dal mondo', u'http://www.fiscooggi.it/taxonomy/term/17/feed')]
+
--- a/recipes/focus_pl.recipe
+++ b/recipes/focus_pl.recipe
@ -1,57 +1,68 @@
-# -*- coding: utf-8 -*-
+import re
+
 from calibre.web.feeds.news import BasicNewsRecipe

-class Focus_pl(BasicNewsRecipe):
-    title          = u'Focus.pl'
-    oldest_article = 15
-    max_articles_per_feed = 100
-    __author__        = 'fenuks'
-    language       = 'pl'
-    description ='polish scientific monthly magazine'
+class FocusRecipe(BasicNewsRecipe):
+    __license__ = 'GPL v3'
+    __author__ = u'intromatyk <intromatyk@gmail.com>'
+    language = 'pl'
+    version = 1
+
+    title = u'Focus'
+    publisher = u'Gruner + Jahr Polska'
+    category = u'News'
+    description = u'Newspaper'
    category='magazine'
    cover_url=''
    remove_empty_feeds= True
    no_stylesheets=True
-    remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'})
-    remove_tags_after=dict(name='div', attrs={'class':'clear'})
-    feeds          = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'),
-	(u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
-	(u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
-	(u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
-	(u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
-	(u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
-	(u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
-	(u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
-	(u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'),
+    oldest_article = 7
+    max_articles_per_feed = 100000
+    recursions = 0
+
+    no_stylesheets = True
+    remove_javascript = True
+    encoding = 'utf-8'
+    # Seems to work best, but YMMV
+    simultaneous_downloads = 5
+
+    r = re.compile('.*(?P<url>http:\/\/(www.focus.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'cll'}))
+    
+    remove_tags =[]
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulm noprint'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'txb'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'h2'}))
+    remove_tags.append(dict(name = 'ul', attrs = {'class' : 'txu'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulc'}))
+
+    extra_css = '''
+                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
+                    h1{text-align: left;}
+                    h2{font-size: medium; font-weight: bold;}
+                    p.lead {font-weight: bold; text-align: left;}
+                    .authordate {font-size: small; color: #696969;}
+                    .fot{font-size: x-small; color: #666666;}
+                    '''    


-
-]
+    feeds          = [
+                            ('Nauka', 'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
+                            ('Historia', 'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
+                            ('Cywilizacja', 'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
+                            ('Sport', 'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
+                            ('Technika', 'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
+                            ('Przyroda', 'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
+                            ('Technologie', 'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),                            
+                          ]

    def skip_ad_pages(self, soup):
-          tag=soup.find(name='a')
-          if tag:
-            new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True)
-            return new_soup
-
-    def append_page(self, appendtag):
-        tag=appendtag.find(name='div', attrs={'class':'arrows'})
-        if tag:
-            nexturl='http://www.focus.pl/'+tag.a['href']
-            for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
-                rem.extract()
-            while nexturl:
-                 soup2=self.index_to_soup(nexturl)
-                 nexturl=None
-                 pagetext=soup2.find(name='div', attrs={'class':'txt'})
-                 tag=pagetext.find(name='div', attrs={'class':'arrows'})
-                 for r in tag.findAll(name='a'):
-                     if u'Następne' in r.string:
-                         nexturl='http://www.focus.pl/'+r['href']
-                 for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
-                     rem.extract()
-                 pos = len(appendtag.contents)
-                 appendtag.insert(pos, pagetext)
+        if ('advertisement' in soup.find('title').string.lower()):
+            href = soup.find('a').get('href')
+            return self.index_to_soup(href, raw=True)
+        else:
+            return None

    def get_cover_url(self):
        soup=self.index_to_soup('http://www.focus.pl/magazyn/')
@ -60,7 +71,14 @@ class Focus_pl(BasicNewsRecipe):
            self.cover_url='http://www.focus.pl/' + tag.a['href']
            return getattr(self, 'cover_url', self.cover_url)

-
-    def preprocess_html(self, soup):
-         self.append_page(soup.body)
-         return soup
+    def print_version(self, url):
+     if url.count ('focus.pl.feedsportal.com'):
+            u = url.find('focus0Bpl')
+            u = 'http://www.focus.pl/' + url[u + 11:]
+            u = u.replace('0C', '/')
+            u = u.replace('A', '')
+            u = u.replace ('0E','-')
+            u = u.replace('/nc/1//story01.htm', '/do-druku/1')
+     else:
+            u = url.replace('/nc/1','/do-druku/1')           
+     return u
--- a/recipes/glasgow_herald.recipe
+++ b/recipes/glasgow_herald.recipe
@ -1,4 +1,3 @@
-
 from calibre.web.feeds.news import BasicNewsRecipe

 class GlasgowHerald(BasicNewsRecipe):
@ -9,12 +8,16 @@ class GlasgowHerald(BasicNewsRecipe):
    language = 'en_GB'

    __author__     = 'Kovid Goyal'
+    use_embedded_content = False

-    keep_only_tags = [dict(attrs={'class':'article'})]
-    remove_tags = [
-            dict(id=['pic-nav']),
-            dict(attrs={'class':['comments-top']})
-            ]
+    no_stylesheets = True
+    auto_cleanup = True
+
+    #keep_only_tags = [dict(attrs={'class':'article'})]
+    #remove_tags = [
+            #dict(id=['pic-nav']),
+            #dict(attrs={'class':['comments-top']})
+            #]


    feeds          = [
@ -25,5 +28,4 @@ class GlasgowHerald(BasicNewsRecipe):
                        (u'Arts & Entertainment',
                        u'http://www.heraldscotland.com/cmlink/1.768',),
                        (u'Columnists', u'http://www.heraldscotland.com/cmlink/1.658574')]
-
-
+ 
--- a/recipes/hackernews.recipe
+++ b/recipes/hackernews.recipe
@ -9,9 +9,9 @@ from calibre.ptempfile import PersistentTemporaryFile
 from urlparse import urlparse
 import re

-class HackerNews(BasicNewsRecipe):
-    title                 = 'Hacker News'
-    __author__            = 'Tom Scholl'
+class HNWithCommentsLink(BasicNewsRecipe):
+    title                 = 'HN With Comments Link'
+    __author__            = 'Tom Scholl & David Kerschner'
    description           = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.'
    publisher             = 'Y Combinator'
    category              = 'news, programming, it, technology'
@ -80,6 +80,11 @@ class HackerNews(BasicNewsRecipe):
        body = body + comments
        return u'<html><title>' + title + u'</title><body>' + body + '</body></html>'

+    def parse_feeds(self):
+        a = super(HNWithCommentsLink, self).parse_feeds()
+        self.hn_articles = a[0].articles
+        return a
+
    def get_obfuscated_article(self, url):
        if url.startswith('http://news.ycombinator.com'):
            content = self.get_hn_content(url)
@ -97,6 +102,13 @@ class HackerNews(BasicNewsRecipe):
            else:
                content = self.get_readable_content(url)

+            article = 0
+            for a in self.hn_articles:
+                if a.url == url:
+                    article = a
+
+        content = re.sub(r'</body>\s*</html>\s*$', '', content) + article.summary + '</body></html>'
+
        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
        self.temp_files[-1].write(content)
        self.temp_files[-1].close()
--- a/recipes/hindustan_times.recipe
+++ b/recipes/hindustan_times.recipe
@ -1,4 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+import urllib, re

 class HindustanTimes(BasicNewsRecipe):
    title          = u'Hindustan Times'
@ -26,4 +27,24 @@ class HindustanTimes(BasicNewsRecipe):
            'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
 ]

+    def get_article_url(self, article):
+        '''
+        HT uses a variant of the feedportal RSS ad display mechanism
+        '''
+        try:
+            s = article.summary
+            return urllib.unquote(
+                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
+        except:
+            pass
+        url = BasicNewsRecipe.get_article_url(self, article)
+        res = self.browser.open_novisit(url)
+        url = res.geturl().split('/')[-2]
+        encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
+                '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
+                'www.'}
+        for k, v in encoding.iteritems():
+            url = url.replace(k, v)
+        return url
+

--- a/recipes/hvg.recipe
+++ b/recipes/hvg.recipe
@ -1,44 +1,58 @@
-# -*- coding: utf-8 -*-
-import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
+################################################################################
+#Description:	  http://hvg.hu/ RSS channel
+#Author: 	  Bigpapa (bigpapabig@hotmail.com)
+#Date:	  2011.12.20. - V1.1
+################################################################################

-class HVG(BasicNewsRecipe):
-    title                 = 'HVG.HU'
-    __author__            = u'István Papp'
-    description           = u'Friss hírek a HVG-től'
-    timefmt               = ' [%Y. %b. %d., %a.]'
-    oldest_article        = 4
-    language              = 'hu'
+from calibre.web.feeds.news import BasicNewsRecipe

-    max_articles_per_feed = 100
-    no_stylesheets        = True
-    use_embedded_content  = False
-    encoding              = 'utf8'
-    publisher             = 'HVG Online'
-    category              = u'news, hírek, hvg'
-    extra_css             = 'body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
-    preprocess_regexps    = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
-    remove_tags_before    = dict(id='pg-content')
-    remove_javascript     = True
-    remove_empty_feeds    = True
+class hvg(BasicNewsRecipe):
+    title          = u'HVG'
+    __author__     = 'Bigpapa'
+    language = 'hu'
+    oldest_article = 5		# Hany napos legyen a legregebbi cikk amit leszedjen.
+    max_articles_per_feed = 5	# Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
+    no_stylesheets = True
+    encoding = 'utf8'
+    extra_css = ' h2 { font:bold 28px} '

-    feeds = [
-              (u'Itthon', u'http://hvg.hu/rss/itthon')
-             ,(u'Világ', u'http://hvg.hu/rss/vilag')
-             ,(u'Gazdaság', u'http://hvg.hu/rss/gazdasag')
-             ,(u'IT | Tudomány', u'http://hvg.hu/rss/tudomany')
-             ,(u'Panoráma', u'http://hvg.hu/rss/Panorama')
-             ,(u'Karrier', u'http://hvg.hu/rss/karrier')
-             ,(u'Gasztronómia', u'http://hvg.hu/rss/gasztronomia')
-             ,(u'Helyi érték', u'http://hvg.hu/rss/helyiertek')
-             ,(u'Kultúra', u'http://hvg.hu/rss/kultura')
-             ,(u'Cégautó', u'http://hvg.hu/rss/cegauto')
-             ,(u'Vállalkozó szellem', u'http://hvg.hu/rss/kkv')
-             ,(u'Egészség', u'http://hvg.hu/rss/egeszseg')
-             ,(u'Vélemény', u'http://hvg.hu/rss/velemeny')
-             ,(u'Sport', u'http://hvg.hu/rss/sport')
-            ]
+    remove_attributes = ['style','font', 'href']

-    def print_version(self, url):
-        return url.replace ('#rss', '/print')
+    keep_only_tags    = [
+		dict(name='div', attrs={'id':['pg-content']})
+	]

+    remove_tags = [ 
+	dict(name='div', attrs={'class':['box articlemenu', 'bannergoogle468', 'boxcontainer left', 'boxcontainer', 'commentbox']}),
+	dict(name='table', attrs={'class':['banner2', 'monocle']}),
+	dict(name='div', attrs={'id':['connect_widget_4cf63ca849ddf4577922632', 'sharetip', 'upprev_box']}),
+	dict(name='div', attrs={'style':['float: right; margin-bottom: 5px;', 'display: none;']}),
+	dict(name='h3', attrs={'class':['hthree']}),
+	dict(name='ul', attrs={'class':['defaultul']}),
+	dict(name='form', attrs={'id':['commentForm']}),
+	dict(name='h6', attrs={'class':['hthree']}),
+	dict(name='h6', attrs={'class':['more2']}),
+	dict(name='img', attrs={'class':['framed']}),
+	dict(name='td', attrs={'class':['greyboxbody','embedvideobody','embedvideofooter','embedvideobottom']}),
+
+
+
+	]
+
+    feeds          = [
+#	(u'\xd6sszes', 'http://hvg.hu/rss'),
+	(u'Itthon', 'http://hvg.hu/rss/itthon'),
+	(u'Vil\xe1g', 'http://hvg.hu/rss/vilag'),
+	(u'Gazdas\xe1g', 'http://hvg.hu/rss/gazdasag'),
+	(u'Tudom\xe1ny', 'http://hvg.hu/rss/tudomany'),
+	(u'Panor\xe1ma', 'http://hvg.hu/rss/panorama'),
+	(u'Karrier', 'http://hvg.hu/rss/karrier'),
+	(u'Gasztron\xf3mia', 'http://hvg.hu/rss/gasztronomia'),
+	(u'Helyi \xe9rt\xe9k', 'http://hvg.hu/rss/helyiertek'),
+	(u'Kult\xfara', 'http://hvg.hu/rss/kultura'),
+	(u'C\xe9gaut\xf3', 'http://hvg.hu/rss/cegauto'),
+	(u'V\xe1llalkoz\xf3 szellem', 'http://hvg.hu/rss/kkv'),
+	(u'Eg\xe9szs\xe9g', 'http://hvg.hu/rss/egeszseg'),
+	(u'V\xe9lem\xe9ny', 'http://hvg.hu/rss/velemeny'),
+	(u'Sport', 'http://hvg.hu/rss/sport')
+]
--- a/recipes/icons/biolog_pl.png
+++ b/recipes/icons/biolog_pl.png
--- a/recipes/icons/computerworld_pl.png
+++ b/recipes/icons/computerworld_pl.png
--- a/recipes/icons/dziennik_pl.png
+++ b/recipes/icons/dziennik_pl.png
--- a/recipes/icons/kosmonauta_pl.png
+++ b/recipes/icons/kosmonauta_pl.png
--- a/recipes/icons/mlody_technik_pl.png
+++ b/recipes/icons/mlody_technik_pl.png
--- a/recipes/icons/moneynews.png
+++ b/recipes/icons/moneynews.png
--- a/recipes/independent.recipe
+++ b/recipes/independent.recipe
@ -109,7 +109,6 @@ class TheIndependentNew(BasicNewsRecipe):
            picdiv = soup.find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,picdiv['src'])
- 

    def preprocess_html(self, soup):

@ -273,12 +272,15 @@ class TheIndependentNew(BasicNewsRecipe):


    def _insertRatingStars(self,soup,item):
-        if item.contents is None:
+        if item.contents is None or len(item.contents) < 1:
            return
        rating = item.contents[0]
-        if not rating.isdigit():
-            return None
-        rating = int(item.contents[0])
+
+        try:
+            rating = float(item.contents[0])
+        except:
+            print 'Could not convert decimal rating to star: malformatted float.'
+            return
        for i in range(1,6):
            star = Tag(soup,'img')
            if i <= rating:
--- a/recipes/kosmonauta_pl.recipe
+++ b/recipes/kosmonauta_pl.recipe
@ -0,0 +1,14 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class Kosmonauta(BasicNewsRecipe):
+    title          = u'Kosmonauta.net'
+    __author__        = 'fenuks'
+    description   = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
+    category       = 'astronomy'
+    language       = 'pl'
+    cover_url='http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
+    no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    feeds          = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/index.php/feed/rss.html')]
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@ -10,6 +10,10 @@ __MakePeriodical__ = True
 __UseChineseTitle__ = False
 # Set it to False if you want to skip images (Default: True)
 __KeepImages__ = True
+# Set it to True if you want to include a summary in Kindle's article view (Default: False)
+__IncludeSummary__ = False
+# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
+__IncludeThumbnails__ = True
 # (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
 __UseLife__ = True
 # (HK only) It is to disable premium content (Default: False)
@ -24,12 +28,15 @@ __Date__ = ''

 '''
 Change Log:
+2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
+            from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
+            download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
 2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
 2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
 2011/10/19: fix a bug in txt source parsing
 2011/10/17: disable fetching of premium content, also improved txt source parsing
 2011/10/04: option to get hi-res photos for the articles
-2011/09/21: fetching "column" section is made optional. 
+2011/09/21: fetching "column" section is made optional.
 2011/09/18: parse "column" section stuff from source text file directly.
 2011/09/07: disable "column" section as it is no longer offered free.
 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
@ -53,6 +60,7 @@ Change Log:
 2010/10/31: skip repeated articles in section pages
 '''

+from calibre.utils.date import now as nowf
 import os, datetime, re, mechanize
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
@ -60,11 +68,15 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.localization import canonicalize_lang

 # MAIN CLASS
 class MPRecipe(BasicNewsRecipe):
    if __Region__ == 'Hong Kong':
-        title       = 'Ming Pao - Hong Kong'
+        if __UseChineseTitle__ == True:
+            title = u'\u660e\u5831 (\u9999\u6e2f)'
+        else:
+            title   = 'Ming Pao - Hong Kong'
        description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
        category    = 'Chinese, News, Hong Kong'
        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
@ -109,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
                              lambda match: "</b>")
                             ]
    elif __Region__ == 'Vancouver':
-        title       = 'Ming Pao - Vancouver'
+        if __UseChineseTitle__ == True:
+            title   = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
+        else:
+            title   = 'Ming Pao - Vancouver'
        description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
        category    = 'Chinese, News, Vancouver'
        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -127,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
                              lambda match: ''),
                             ]
    elif __Region__ == 'Toronto':
-        title       = 'Ming Pao - Toronto'
+        if __UseChineseTitle__ == True:
+            title   = u'\u660e\u5831 (\u591a\u502b\u591a)'
+        else:
+            title   = 'Ming Pao - Toronto'
        description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
        category    = 'Chinese, News, Toronto'
        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -161,9 +179,9 @@ class MPRecipe(BasicNewsRecipe):
    def get_dtlocal(self):
        dt_utc = datetime.datetime.utcnow()
        if __Region__ == 'Hong Kong':
-            # convert UTC to local hk time - at HKT 5.30am, all news are available
-            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
-            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
+            # convert UTC to local hk time - at HKT 4.30am, all news are available
+            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
+            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
        elif __Region__ == 'Vancouver':
            # convert UTC to local Vancouver time - at PST time 5.30am, all news are available
            dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
@ -186,6 +204,18 @@ class MPRecipe(BasicNewsRecipe):
        else:
            return self.get_dtlocal().strftime("%Y-%m-%d")

+    def get_fetchyear(self):
+        if __Date__ <> '':
+            return __Date__[0:4]
+        else:
+            return self.get_dtlocal().strftime("%Y")
+
+    def get_fetchmonth(self):
+        if __Date__ <> '':
+            return __Date__[4:6]
+        else:
+            return self.get_dtlocal().strftime("%m")
+
    def get_fetchday(self):
        if __Date__ <> '':
            return __Date__[6:8]
@ -237,7 +267,7 @@ class MPRecipe(BasicNewsRecipe):
                        articles = self.parse_section2_txt(url, keystr)
                        if articles:
                            feeds.append((title, articles))
-                        
+
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
                    articles = self.parse_section(url)
@ -274,7 +304,7 @@ class MPRecipe(BasicNewsRecipe):
                    articles = self.parse_section2_txt(url, keystr)
                    if articles:
                        feeds.append((title, articles))
-                        
+
                #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
                #                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
                #    articles = self.parse_section(url)
@ -291,7 +321,7 @@ class MPRecipe(BasicNewsRecipe):
                    articles = self.parse_section2_txt(url, keystr)
                    if articles:
                        feeds.append((title, articles))
-                        
+
                if __InclPremium__ == True:
                    # parse column section articles directly from .txt files
                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
@ -299,7 +329,7 @@ class MPRecipe(BasicNewsRecipe):
                        articles = self.parse_section2_txt(url, keystr)
                        if articles:
                            feeds.append((title, articles))
-                            
+
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
                    articles = self.parse_section(url)
@ -379,7 +409,7 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(i)
            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
-                try: 
+                try:
                    br.open_novisit(url)
                    url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
                    current_articles.append({'title': title, 'url': url, 'description': ''})
@ -406,7 +436,7 @@ class MPRecipe(BasicNewsRecipe):
                included_urls.append(url)
        current_articles.reverse()
        return current_articles
-        
+
    # parse from www.mingpaovan.com
    def parse_section3(self, url, baseUrl):
        self.get_fetchdate()
@ -528,7 +558,7 @@ class MPRecipe(BasicNewsRecipe):
                        photo = photo.replace('class="photo"', '')
                        new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
                new_html = new_raw_html + '</body></html>'
-            else: 
+            else:
                # .txt based file
                splitter = re.compile(r'\n') # Match non-digits
                new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
@ -591,23 +621,23 @@ class MPRecipe(BasicNewsRecipe):
        #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
        if __HiResImg__ == True:
            # TODO: add a _ in front of an image url
-            if url.rfind('news.mingpao.com') > -1: 
+            if url.rfind('news.mingpao.com') > -1:
                imglist =  re.findall('src="?.*?jpg"', new_html)
                br = mechanize.Browser()
                br.set_handle_redirect(False)
                for img in imglist:
                    gifimg = img.replace('jpg"', 'gif"')
-                    try: 
+                    try:
                        br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
                        new_html = new_html.replace(img, gifimg)
-                    except: 
+                    except:
                        # find the location of the first _
                        pos = img.find('_')
                        if pos > -1:
                            # if found, insert _ after the first _
                            newimg = img[0:pos] + '_' + img[pos:]
                            new_html = new_html.replace(img, newimg)
-                        else: 
+                        else:
                            # if not found, insert _ after "
                            new_html = new_html.replace(img[1:], '"_' + img[1:])
            elif url.rfind('life.mingpao.com') > -1:
@ -644,7 +674,7 @@ class MPRecipe(BasicNewsRecipe):
                        #print 'Use hi-res img', newimg
                        new_html = new_html.replace(img, newimg)
        return new_html
-        
+
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
@ -653,78 +683,154 @@ class MPRecipe(BasicNewsRecipe):
        for item in soup.findAll(stype=True):
            del item['absmiddle']
        return soup
-        
+
+    def populate_article_metadata(self, article, soup, first):
+        # thumbnails shouldn't be available if using hi-res images
+        if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
+            img = soup.find('img')
+            if img is not None:
+                self.add_toc_thumbnail(article, img['src'])
+
+        try:
+            if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
+                # look for content
+                articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div',attrs={'class':'content'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div', attrs={'id':'font'})
+                if articlebodies:
+                    for articlebody in articlebodies:
+                        if articlebody:
+                            # the text may or may not be enclosed in <p></p> tag
+                            paras = articlebody.findAll('p')
+                            if not paras:
+                            	paras = articlebody
+                            textFound = False
+                            for p in paras:
+                                if not textFound:
+                                    summary_candidate = self.tag_to_string(p).strip()
+                                    summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
+                                    if len(summary_candidate) > 0:
+                                        article.summary = article.text_summary = summary_candidate
+                                        textFound = True
+            else:
+                # display a simple text
+                #article.summary = article.text_summary = u'\u66f4\u591a......'
+                # display word counts
+                counts = 0
+                articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div',attrs={'class':'content'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div', attrs={'id':'font'})
+                if articlebodies:
+                    for articlebody in articlebodies:
+                        # the text may or may not be enclosed in <p></p> tag
+                        paras = articlebody.findAll('p')
+                        if not paras:
+                            paras = articlebody
+                        for p in paras:
+                            summary_candidate = self.tag_to_string(p).strip()
+                            counts += len(summary_candidate)
+                    article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
+        except:
+            self.log("Error creating article descriptions")
+            return
+
+    # override from the one in version 0.8.31
    def create_opf(self, feeds, dir=None):
        if dir is None:
            dir = self.output_dir
-        if __UseChineseTitle__ == True:
-            if __Region__ == 'Hong Kong':
-                title = u'\u660e\u5831 (\u9999\u6e2f)'
-            elif __Region__ == 'Vancouver':
-                title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
-            elif __Region__ == 'Toronto':
-                title = u'\u660e\u5831 (\u591a\u502b\u591a)'
-        else:
-            title = self.short_title()
-        # if not generating a periodical, force date to apply in title
-        if __MakePeriodical__ == False:
+        title = self.short_title()
+        # change 1: allow our own flag to tell if a periodical is to be generated
+        # also use customed date instead of current time
+        if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
            title = title + ' ' + self.get_fetchformatteddate()
-        if True:
-            mi = MetaInformation(title, [self.publisher])
-            mi.publisher = self.publisher
-            mi.author_sort = self.publisher
-            if __MakePeriodical__ == True:
-                mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
-            else:
-                mi.publication_type = self.publication_type+':'+self.short_title()
-            #mi.timestamp = nowf()
-            mi.timestamp = self.get_dtlocal()
-            mi.comments = self.description
-            if not isinstance(mi.comments, unicode):
-                mi.comments = mi.comments.decode('utf-8', 'replace')
-            #mi.pubdate = nowf()
-            mi.pubdate = self.get_dtlocal()
-            opf_path = os.path.join(dir, 'index.opf')
-            ncx_path = os.path.join(dir, 'index.ncx')
-            opf = OPFCreator(dir, mi)
-            # Add mastheadImage entry to <guide> section
-            mp = getattr(self, 'masthead_path', None)
-            if mp is not None and os.access(mp, os.R_OK):
-                from calibre.ebooks.metadata.opf2 import Guide
-                ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
-                ref.type = 'masthead'
-                ref.title = 'Masthead Image'
-                opf.guide.append(ref)
+        # end of change 1
+        # change 2: __appname__ replaced by newspaper publisher
+        __appname__ = self.publisher
+        mi = MetaInformation(title, [__appname__])
+        mi.publisher = __appname__
+        mi.author_sort = __appname__
+        # change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
+        if __MakePeriodical__ == True:
+            mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        else:
+            mi.publication_type = self.publication_type+':'+self.short_title()
+        #mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        # change 4: in the following, all the nowf() are changed to adjusted time
+        # This one doesn't matter
+        mi.timestamp = nowf()
+        # change 5: skip listing the articles
+        #article_titles, aseen = [], set()
+        #for f in feeds:
+        #    for a in f:
+        #        if a.title and a.title not in aseen:
+        #            aseen.add(a.title)
+        #            article_titles.append(force_unicode(a.title, 'utf-8'))

-            manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
-            manifest.append(os.path.join(dir, 'index.html'))
-            manifest.append(os.path.join(dir, 'index.ncx'))
+        #mi.comments = self.description
+        #if not isinstance(mi.comments, unicode):
+        #    mi.comments = mi.comments.decode('utf-8', 'replace')
+        #mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
+        #        '\n\n'.join(article_titles))

-            # Get cover
-            cpath = getattr(self, 'cover_path', None)
-            if cpath is None:
-                pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
-                if self.default_cover(pf):
-                    cpath =  pf.name
-            if cpath is not None and os.access(cpath, os.R_OK):
-                opf.cover = cpath
-                manifest.append(cpath)
+        language = canonicalize_lang(self.language)
+        if language is not None:
+            mi.language = language
+        # This one affects the pub date shown in kindle title
+        #mi.pubdate = nowf()
+        # now appears to need the time field to be > 12.00noon as well
+        mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
+        opf_path = os.path.join(dir, 'index.opf')
+        ncx_path = os.path.join(dir, 'index.ncx')

-            # Get masthead
-            mpath = getattr(self, 'masthead_path', None)
-            if mpath is not None and os.access(mpath, os.R_OK):
-                manifest.append(mpath)
+        opf = OPFCreator(dir, mi)
+        # Add mastheadImage entry to <guide> section
+        mp = getattr(self, 'masthead_path', None)
+        if mp is not None and os.access(mp, os.R_OK):
+            from calibre.ebooks.metadata.opf2 import Guide
+            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+            ref.type = 'masthead'
+            ref.title = 'Masthead Image'
+            opf.guide.append(ref)
+
+        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+        manifest.append(os.path.join(dir, 'index.html'))
+        manifest.append(os.path.join(dir, 'index.ncx'))
+
+        # Get cover
+        cpath = getattr(self, 'cover_path', None)
+        if cpath is None:
+            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+            if self.default_cover(pf):
+                cpath =  pf.name
+        if cpath is not None and os.access(cpath, os.R_OK):
+            opf.cover = cpath
+            manifest.append(cpath)
+
+        # Get masthead
+        mpath = getattr(self, 'masthead_path', None)
+        if mpath is not None and os.access(mpath, os.R_OK):
+            manifest.append(mpath)
+
+        opf.create_manifest_from_files_in(manifest)
+        for mani in opf.manifest:
+            if mani.path.endswith('.ncx'):
+                mani.id = 'ncx'
+            if mani.path.endswith('mastheadImage.jpg'):
+                mani.id = 'masthead-image'
+
+        entries = ['index.html']
+        toc = TOC(base_path=dir)
+        self.play_order_counter = 0
+        self.play_order_map = {}

-            opf.create_manifest_from_files_in(manifest)
-            for mani in opf.manifest:
-                if mani.path.endswith('.ncx'):
-                    mani.id = 'ncx'
-                if mani.path.endswith('mastheadImage.jpg'):
-                    mani.id = 'masthead-image'
-            entries = ['index.html']
-            toc = TOC(base_path=dir)
-            self.play_order_counter = 0
-            self.play_order_map = {}

        def feed_index(num, parent):
            f = feeds[num]
@ -739,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
                        desc = None
                    else:
                        desc = self.description_limiter(desc)
+                    tt = a.toc_thumbnail if a.toc_thumbnail else None
                    entries.append('%sindex.html'%adir)
                    po = self.play_order_map.get(entries[-1], None)
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
-                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
-                                    play_order=po, author=auth, description=desc)
+                    parent.add_item('%sindex.html'%adir, None,
+                            a.title if a.title else _('Untitled Article'),
+                            play_order=po, author=auth,
+                            description=desc, toc_thumbnail=tt)
                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
                        prefix = os.path.commonprefix([opf_path, sp])
@ -762,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
                            templ = self.navbar.generate(True, num, j, len(f),
                                            not self.has_single_feed,
-                                            a.orig_url, self.publisher, prefix=prefix,
+                                            a.orig_url, __appname__, prefix=prefix,
                                            center=self.center_navbar)
                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
                            body.insert(len(body.contents), elem)
@ -785,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
                if not desc:
                    desc = None
                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
-                           f.title, play_order=po, description=desc, author=auth))
+                    f.title, play_order=po, description=desc, author=auth))

        else:
            entries.append('feed_%d/index.html'%0)
@ -799,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)

+
--- a/recipes/ming_pao_toronto.recipe
+++ b/recipes/ming_pao_toronto.recipe
@ -4,18 +4,41 @@ __copyright__ = '2010-2011, Eddie Lau'
 # Region - Hong Kong, Vancouver, Toronto
 __Region__ = 'Toronto'
 # Users of Kindle 3 with limited system-level CJK support
-# please replace the following "True" with "False".
+# please replace the following "True" with "False". (Default: True)
 __MakePeriodical__ = True
-# Turn below to true if your device supports display of CJK titles
+# Turn below to True if your device supports display of CJK titles (Default: False)
 __UseChineseTitle__ = False
-# Set it to False if you want to skip images
+# Set it to False if you want to skip images (Default: True)
 __KeepImages__ = True
-# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
+# Set it to True if you want to include a summary in Kindle's article view (Default: False)
+__IncludeSummary__ = False
+# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
+__IncludeThumbnails__ = True
+# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
 __UseLife__ = True
+# (HK only) It is to disable premium content (Default: False)
+__InclPremium__ = False
+# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
+__ParsePFF__ = True
+# (HK only) Turn below to True if you wish hi-res images (Default: False)
+__HiResImg__ = False
+# Override the date returned by the program if specifying a YYYYMMDD below
+__Date__ = ''


 '''
 Change Log:
+2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
+            from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
+            download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
+2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
+2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
+2011/10/19: fix a bug in txt source parsing
+2011/10/17: disable fetching of premium content, also improved txt source parsing
+2011/10/04: option to get hi-res photos for the articles
+2011/09/21: fetching "column" section is made optional.
+2011/09/18: parse "column" section stuff from source text file directly.
+2011/09/07: disable "column" section as it is no longer offered free.
 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
            provide options to remove all images in the file
 2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
@ -37,30 +60,38 @@ Change Log:
 2010/10/31: skip repeated articles in section pages
 '''

-import os, datetime, re
+from calibre.utils.date import now as nowf
+import os, datetime, re, mechanize
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.localization import canonicalize_lang

 # MAIN CLASS
 class MPRecipe(BasicNewsRecipe):
    if __Region__ == 'Hong Kong':
-        title       = 'Ming Pao - Hong Kong'
+        if __UseChineseTitle__ == True:
+            title = u'\u660e\u5831 (\u9999\u6e2f)'
+        else:
+            title   = 'Ming Pao - Hong Kong'
        description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
        category    = 'Chinese, News, Hong Kong'
-        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
+        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
        masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
        keep_only_tags = [dict(name='h1'),
                          dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
                          dict(name='font', attrs={'color':['AA0000']}), # for column articles title
+                          dict(attrs={'class':['heading']}),  # for heading from txt
                          dict(attrs={'id':['newscontent']}), # entertainment and column page content
                          dict(attrs={'id':['newscontent01','newscontent02']}),
+                          dict(attrs={'class':['content']}),  # for content from txt
                          dict(attrs={'class':['photo']}),
                          dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}),  # content in printed version of life.mingpao.com
-                          dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
+                          dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
+                          dict(attrs={'class':['images']})   # for images from txt
                          ]
        if __KeepImages__:
            remove_tags = [dict(name='style'),
@ -90,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
                              lambda match: "</b>")
                             ]
    elif __Region__ == 'Vancouver':
-        title       = 'Ming Pao - Vancouver'
+        if __UseChineseTitle__ == True:
+            title   = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
+        else:
+            title   = 'Ming Pao - Vancouver'
        description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
        category    = 'Chinese, News, Vancouver'
        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -108,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
                              lambda match: ''),
                             ]
    elif __Region__ == 'Toronto':
-        title       = 'Ming Pao - Toronto'
+        if __UseChineseTitle__ == True:
+            title   = u'\u660e\u5831 (\u591a\u502b\u591a)'
+        else:
+            title   = 'Ming Pao - Toronto'
        description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
        category    = 'Chinese, News, Toronto'
        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -139,49 +176,12 @@ class MPRecipe(BasicNewsRecipe):
    conversion_options = {'linearize_tables':True}
    timefmt = ''

-    def image_url_processor(cls, baseurl, url):
-        # trick: break the url at the first occurance of digit, add an additional
-        # '_' at the front
-        # not working, may need to move this to preprocess_html() method
-#        minIdx = 10000
-#        i0 = url.find('0')
-#        if i0 >= 0 and i0 < minIdx:
-#           minIdx = i0
-#        i1 = url.find('1')
-#        if i1 >= 0 and i1 < minIdx:
-#           minIdx = i1
-#        i2 = url.find('2')
-#        if i2 >= 0 and i2 < minIdx:
-#           minIdx = i2
-#        i3 = url.find('3')
-#        if i3 >= 0 and i0 < minIdx:
-#           minIdx = i3
-#        i4 = url.find('4')
-#        if i4 >= 0 and i4 < minIdx:
-#           minIdx = i4
-#        i5 = url.find('5')
-#        if i5 >= 0 and i5 < minIdx:
-#           minIdx = i5
-#        i6 = url.find('6')
-#        if i6 >= 0 and i6 < minIdx:
-#           minIdx = i6
-#        i7 = url.find('7')
-#        if i7 >= 0 and i7 < minIdx:
-#           minIdx = i7
-#        i8 = url.find('8')
-#        if i8 >= 0 and i8 < minIdx:
-#           minIdx = i8
-#        i9 = url.find('9')
-#        if i9 >= 0 and i9 < minIdx:
-#           minIdx = i9
-        return url
-
    def get_dtlocal(self):
        dt_utc = datetime.datetime.utcnow()
        if __Region__ == 'Hong Kong':
-            # convert UTC to local hk time - at HKT 5.30am, all news are available
-            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
-            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
+            # convert UTC to local hk time - at HKT 4.30am, all news are available
+            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
+            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
        elif __Region__ == 'Vancouver':
            # convert UTC to local Vancouver time - at PST time 5.30am, all news are available
            dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
@ -193,13 +193,34 @@ class MPRecipe(BasicNewsRecipe):
        return dt_local

    def get_fetchdate(self):
-        return self.get_dtlocal().strftime("%Y%m%d")
+        if __Date__ <> '':
+            return __Date__
+        else:
+            return self.get_dtlocal().strftime("%Y%m%d")

    def get_fetchformatteddate(self):
-        return self.get_dtlocal().strftime("%Y-%m-%d")
+        if __Date__ <> '':
+            return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%Y-%m-%d")
+
+    def get_fetchyear(self):
+        if __Date__ <> '':
+            return __Date__[0:4]
+        else:
+            return self.get_dtlocal().strftime("%Y")
+
+    def get_fetchmonth(self):
+        if __Date__ <> '':
+            return __Date__[4:6]
+        else:
+            return self.get_dtlocal().strftime("%m")

    def get_fetchday(self):
-        return self.get_dtlocal().strftime("%d")
+        if __Date__ <> '':
+            return __Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%d")

    def get_cover_url(self):
        if __Region__ == 'Hong Kong':
@ -230,12 +251,23 @@ class MPRecipe(BasicNewsRecipe):
                                           (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
                                           (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
                                           (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
-                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
-                                           (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
-                    articles = self.parse_section2(url, keystr)
+                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
+                                          ]:
+                    if __InclPremium__ == True:
+                        articles = self.parse_section2_txt(url, keystr)
+                    else:
+                        articles = self.parse_section2(url, keystr)
                    if articles:
                        feeds.append((title, articles))

+                if __InclPremium__ == True:
+                    # parse column section articles directly from .txt files
+                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
+                                              ]:
+                        articles = self.parse_section2_txt(url, keystr)
+                        if articles:
+                            feeds.append((title, articles))
+
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
                    articles = self.parse_section(url)
@ -244,15 +276,16 @@ class MPRecipe(BasicNewsRecipe):
            else:
                for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
                                   (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
-                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
+                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
+                                   (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
                    articles = self.parse_section(url)
                    if articles:
                        feeds.append((title, articles))

                # special- editorial
-                ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
-                if ed_articles:
-                    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
+                #ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
+                #if ed_articles:
+                #    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))

                for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
                                   (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
@ -263,20 +296,39 @@ class MPRecipe(BasicNewsRecipe):

                # special - finance
                #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
-                fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
-                if fin_articles:
-                    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+                #fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
+                #if fin_articles:
+                #    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))

-                for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
-                                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
-                    articles = self.parse_section(url)
+                for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
+                    articles = self.parse_section2_txt(url, keystr)
                    if articles:
                        feeds.append((title, articles))

+                #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+                #                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
+                #    articles = self.parse_section(url)
+                #    if articles:
+                #        feeds.append((title, articles))
+
                # special - entertainment
-                ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
-                if ent_articles:
-                    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+                #ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+                #if ent_articles:
+                #    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+
+                for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
+                                          ]:
+                    articles = self.parse_section2_txt(url, keystr)
+                    if articles:
+                        feeds.append((title, articles))
+
+                if __InclPremium__ == True:
+                    # parse column section articles directly from .txt files
+                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
+                                              ]:
+                        articles = self.parse_section2_txt(url, keystr)
+                        if articles:
+                            feeds.append((title, articles))

                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -284,11 +336,6 @@ class MPRecipe(BasicNewsRecipe):
                    if articles:
                        feeds.append((title, articles))

-
-                # special- columns
-                col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
-                if col_articles:
-                    feeds.append((u'\u5c08\u6b04 Columns', col_articles))
        elif __Region__ == 'Vancouver':
            for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
                               (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
@ -332,6 +379,16 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(a)
            url = a.get('href', False)
            url = 'http://news.mingpao.com/' + dateStr + '/' +url
+            # replace the url to the print-friendly version
+            if __ParsePFF__ == True:
+                if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
+                    url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
+                    url = re.sub('%2F.*%2F', '/', url)
+                    title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
+                    url = url.replace('%2Etxt', '_print.htm')
+                    url = url.replace('%5F', '_')
+                else:
+                    url = url.replace('.htm', '_print.htm')
            if url not in included_urls and url.rfind('Redirect') == -1:
                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
                included_urls.append(url)
@ -340,6 +397,8 @@ class MPRecipe(BasicNewsRecipe):

    # parse from life.mingpao.com
    def parse_section2(self, url, keystr):
+        br = mechanize.Browser()
+        br.set_handle_redirect(False)
        self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href=True)
@ -350,7 +409,29 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(i)
            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
-                url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
+                try:
+                    br.open_novisit(url)
+                    url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
+                    current_articles.append({'title': title, 'url': url, 'description': ''})
+                    included_urls.append(url)
+                except:
+				    print 'skipping a premium article'
+        current_articles.reverse()
+        return current_articles
+
+    # parse from text file of life.mingpao.com
+    def parse_section2_txt(self, url, keystr):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        a = soup.findAll('a', href=True)
+        a.reverse()
+        current_articles = []
+        included_urls = []
+        for i in a:
+            title = self.tag_to_string(i)
+            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
+                url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/')  # use printed version of the article
                current_articles.append({'title': title, 'url': url, 'description': ''})
                included_urls.append(url)
        current_articles.reverse()
@ -438,6 +519,162 @@ class MPRecipe(BasicNewsRecipe):
        current_articles.reverse()
        return current_articles

+    # preprocess those .txt and javascript based files
+    def preprocess_raw_html(self, raw_html, url):
+        new_html = raw_html
+        if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
+            if url.rfind('_print.htm') <> -1:
+                # javascript based file
+                splitter = re.compile(r'\n')
+                new_raw_html = '<html><head><title>Untitled</title></head>'
+                new_raw_html = new_raw_html + '<body>'
+                for item in splitter.split(raw_html):
+                    if item.startswith('var heading1 ='):
+                        heading = item.replace('var heading1 = \'', '')
+                        heading = heading.replace('\'', '')
+                        heading = heading.replace(';', '')
+                        new_raw_html = new_raw_html + '<div class="heading">' + heading
+                    if item.startswith('var heading2 ='):
+                        heading = item.replace('var heading2 = \'', '')
+                        heading = heading.replace('\'', '')
+                        heading = heading.replace(';', '')
+                        if heading <> '':
+                            new_raw_html = new_raw_html + '<br>' + heading + '</div>'
+                        else:
+                            new_raw_html = new_raw_html + '</div>'
+                    if item.startswith('var content ='):
+                        content = item.replace("var content = ", '')
+                        content = content.replace('\'', '')
+                        content = content.replace(';', '')
+                        new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
+                    if item.startswith('var photocontent ='):
+                        photo = item.replace('var photocontent = \'', '')
+                        photo = photo.replace('\'', '')
+                        photo = photo.replace(';', '')
+                        photo = photo.replace('<tr>', '')
+                        photo = photo.replace('<td>', '')
+                        photo = photo.replace('</tr>', '')
+                        photo = photo.replace('</td>', '<br>')
+                        photo = photo.replace('class="photo"', '')
+                        new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
+                new_html = new_raw_html + '</body></html>'
+            else:
+                # .txt based file
+                splitter = re.compile(r'\n') # Match non-digits
+                new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
+                next_is_img_txt = False
+                title_started = False
+                title_break_reached = False
+                met_article_start_char = False
+                for item in splitter.split(raw_html):
+                    item = item.strip()
+                    # if title already reached but break between title and content not yet found, record title_break_reached
+                    if title_started == True and title_break_reached == False and item == '':
+                        title_break_reached = True
+                    # if title reached and title_break_reached and met_article_start_char == False and item is not empty
+                    # start content
+                    elif title_started == True and title_break_reached == True and met_article_start_char == False:
+                        if item <> '':
+                            met_article_start_char = True
+                            new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
+                    #if item.startswith(u'\u3010'):
+                    #    met_article_start_char = True
+                    #    new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
+                    else:
+                        if next_is_img_txt == False:
+                            if item.startswith("=@"):
+                                print 'skip movie link'
+                            elif item.startswith("=?"):
+                                next_is_img_txt = True
+                                new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
+                            elif item.startswith('=='):
+                                next_is_img_txt = True
+                                if False:
+                                    # TODO: check existence of .gif first
+                                    newimg = '_' + item[2:].strip() + '.jpg'
+                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
+                                else:
+                                    new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
+                            elif item.startswith('='):
+                                next_is_img_txt = True
+                                if False:
+                                    # TODO: check existence of .gif first
+                                    newimg = '_' + item[1:].strip() + '.jpg'
+                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
+                                else:
+                                    new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
+                            else:
+                                if next_is_img_txt == False and met_article_start_char == False:
+                                    if item <> '':
+                                        if title_started == False:
+                                            #print 'Title started at ', item
+                                            new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
+                                            title_started = True
+                                        else:
+                                            new_raw_html = new_raw_html + item + '\n'
+                                else:
+                                    new_raw_html = new_raw_html + item + '<p>\n'
+                        else:
+                            next_is_img_txt = False
+                            new_raw_html = new_raw_html + item + '\n'
+                new_html = new_raw_html + '</div></body></html>'
+        #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
+        if __HiResImg__ == True:
+            # TODO: add a _ in front of an image url
+            if url.rfind('news.mingpao.com') > -1:
+                imglist =  re.findall('src="?.*?jpg"', new_html)
+                br = mechanize.Browser()
+                br.set_handle_redirect(False)
+                for img in imglist:
+                    gifimg = img.replace('jpg"', 'gif"')
+                    try:
+                        br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
+                        new_html = new_html.replace(img, gifimg)
+                    except:
+                        # find the location of the first _
+                        pos = img.find('_')
+                        if pos > -1:
+                            # if found, insert _ after the first _
+                            newimg = img[0:pos] + '_' + img[pos:]
+                            new_html = new_html.replace(img, newimg)
+                        else:
+                            # if not found, insert _ after "
+                            new_html = new_html.replace(img[1:], '"_' + img[1:])
+            elif url.rfind('life.mingpao.com') > -1:
+                imglist = re.findall('src=\'?.*?jpg\'', new_html)
+                br = mechanize.Browser()
+                br.set_handle_redirect(False)
+                #print 'Img list: ', imglist, '\n'
+                for img in imglist:
+                    #print 'Found img: ', img
+                    gifimg = img.replace('jpg\'', 'gif\'')
+                    try:
+                        gifurl = re.sub(r'dailynews.*txt', '', url)
+                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
+                        new_html = new_html.replace(img, gifimg)
+                    except:
+                        pos = img.rfind('/')
+                        newimg = img[0:pos+1] + '_' + img[pos+1:]
+                        new_html = new_html.replace(img, newimg)
+                # repeat with src quoted by double quotes, for text parsed from src txt
+                imglist = re.findall('src="?.*?jpg"', new_html)
+                for img in imglist:
+                    #print 'Found img: ', img
+                    gifimg = img.replace('jpg"', 'gif"')
+                    try:
+                        #print 'url', url
+                        pos = url.rfind('/')
+                        gifurl = url[:pos+1]
+                        #print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
+                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
+                        new_html = new_html.replace(img, gifimg)
+                    except:
+                        pos = img.find('"')
+                        newimg = img[0:pos+1] + '_' + img[pos+1:]
+                        #print 'Use hi-res img', newimg
+                        new_html = new_html.replace(img, newimg)
+        return new_html
+
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
@ -447,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
            del item['absmiddle']
        return soup

+    def populate_article_metadata(self, article, soup, first):
+        # thumbnails shouldn't be available if using hi-res images
+        if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
+            img = soup.find('img')
+            if img is not None:
+                self.add_toc_thumbnail(article, img['src'])
+
+        try:
+            if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
+                # look for content
+                articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div',attrs={'class':'content'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div', attrs={'id':'font'})
+                if articlebodies:
+                    for articlebody in articlebodies:
+                        if articlebody:
+                            # the text may or may not be enclosed in <p></p> tag
+                            paras = articlebody.findAll('p')
+                            if not paras:
+                            	paras = articlebody
+                            textFound = False
+                            for p in paras:
+                                if not textFound:
+                                    summary_candidate = self.tag_to_string(p).strip()
+                                    summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
+                                    if len(summary_candidate) > 0:
+                                        article.summary = article.text_summary = summary_candidate
+                                        textFound = True
+            else:
+                # display a simple text
+                #article.summary = article.text_summary = u'\u66f4\u591a......'
+                # display word counts
+                counts = 0
+                articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div',attrs={'class':'content'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div', attrs={'id':'font'})
+                if articlebodies:
+                    for articlebody in articlebodies:
+                        # the text may or may not be enclosed in <p></p> tag
+                        paras = articlebody.findAll('p')
+                        if not paras:
+                            paras = articlebody
+                        for p in paras:
+                            summary_candidate = self.tag_to_string(p).strip()
+                            counts += len(summary_candidate)
+                    article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
+        except:
+            self.log("Error creating article descriptions")
+            return
+
+    # override from the one in version 0.8.31
    def create_opf(self, feeds, dir=None):
        if dir is None:
            dir = self.output_dir
-        if __UseChineseTitle__ == True:
-            if __Region__ == 'Hong Kong':
-                title = u'\u660e\u5831 (\u9999\u6e2f)'
-            elif __Region__ == 'Vancouver':
-                title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
-            elif __Region__ == 'Toronto':
-                title = u'\u660e\u5831 (\u591a\u502b\u591a)'
-        else:
-            title = self.short_title()
-        # if not generating a periodical, force date to apply in title
-        if __MakePeriodical__ == False:
+        title = self.short_title()
+        # change 1: allow our own flag to tell if a periodical is to be generated
+        # also use customed date instead of current time
+        if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
            title = title + ' ' + self.get_fetchformatteddate()
-        if True:
-            mi = MetaInformation(title, [self.publisher])
-            mi.publisher = self.publisher
-            mi.author_sort = self.publisher
-            if __MakePeriodical__ == True:
-                mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
-            else:
-                mi.publication_type = self.publication_type+':'+self.short_title()
-            #mi.timestamp = nowf()
-            mi.timestamp = self.get_dtlocal()
-            mi.comments = self.description
-            if not isinstance(mi.comments, unicode):
-                mi.comments = mi.comments.decode('utf-8', 'replace')
-            #mi.pubdate = nowf()
-            mi.pubdate = self.get_dtlocal()
-            opf_path = os.path.join(dir, 'index.opf')
-            ncx_path = os.path.join(dir, 'index.ncx')
-            opf = OPFCreator(dir, mi)
-            # Add mastheadImage entry to <guide> section
-            mp = getattr(self, 'masthead_path', None)
-            if mp is not None and os.access(mp, os.R_OK):
-                from calibre.ebooks.metadata.opf2 import Guide
-                ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
-                ref.type = 'masthead'
-                ref.title = 'Masthead Image'
-                opf.guide.append(ref)
+        # end of change 1
+        # change 2: __appname__ replaced by newspaper publisher
+        __appname__ = self.publisher
+        mi = MetaInformation(title, [__appname__])
+        mi.publisher = __appname__
+        mi.author_sort = __appname__
+        # change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
+        if __MakePeriodical__ == True:
+            mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        else:
+            mi.publication_type = self.publication_type+':'+self.short_title()
+        #mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        # change 4: in the following, all the nowf() are changed to adjusted time
+        # This one doesn't matter
+        mi.timestamp = nowf()
+        # change 5: skip listing the articles
+        #article_titles, aseen = [], set()
+        #for f in feeds:
+        #    for a in f:
+        #        if a.title and a.title not in aseen:
+        #            aseen.add(a.title)
+        #            article_titles.append(force_unicode(a.title, 'utf-8'))

-            manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
-            manifest.append(os.path.join(dir, 'index.html'))
-            manifest.append(os.path.join(dir, 'index.ncx'))
+        #mi.comments = self.description
+        #if not isinstance(mi.comments, unicode):
+        #    mi.comments = mi.comments.decode('utf-8', 'replace')
+        #mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
+        #        '\n\n'.join(article_titles))

-            # Get cover
-            cpath = getattr(self, 'cover_path', None)
-            if cpath is None:
-                pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
-                if self.default_cover(pf):
-                    cpath =  pf.name
-            if cpath is not None and os.access(cpath, os.R_OK):
-                opf.cover = cpath
-                manifest.append(cpath)
+        language = canonicalize_lang(self.language)
+        if language is not None:
+            mi.language = language
+        # This one affects the pub date shown in kindle title
+        #mi.pubdate = nowf()
+        # now appears to need the time field to be > 12.00noon as well
+        mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
+        opf_path = os.path.join(dir, 'index.opf')
+        ncx_path = os.path.join(dir, 'index.ncx')

-            # Get masthead
-            mpath = getattr(self, 'masthead_path', None)
-            if mpath is not None and os.access(mpath, os.R_OK):
-                manifest.append(mpath)
+        opf = OPFCreator(dir, mi)
+        # Add mastheadImage entry to <guide> section
+        mp = getattr(self, 'masthead_path', None)
+        if mp is not None and os.access(mp, os.R_OK):
+            from calibre.ebooks.metadata.opf2 import Guide
+            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+            ref.type = 'masthead'
+            ref.title = 'Masthead Image'
+            opf.guide.append(ref)
+
+        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+        manifest.append(os.path.join(dir, 'index.html'))
+        manifest.append(os.path.join(dir, 'index.ncx'))
+
+        # Get cover
+        cpath = getattr(self, 'cover_path', None)
+        if cpath is None:
+            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+            if self.default_cover(pf):
+                cpath =  pf.name
+        if cpath is not None and os.access(cpath, os.R_OK):
+            opf.cover = cpath
+            manifest.append(cpath)
+
+        # Get masthead
+        mpath = getattr(self, 'masthead_path', None)
+        if mpath is not None and os.access(mpath, os.R_OK):
+            manifest.append(mpath)
+
+        opf.create_manifest_from_files_in(manifest)
+        for mani in opf.manifest:
+            if mani.path.endswith('.ncx'):
+                mani.id = 'ncx'
+            if mani.path.endswith('mastheadImage.jpg'):
+                mani.id = 'masthead-image'
+
+        entries = ['index.html']
+        toc = TOC(base_path=dir)
+        self.play_order_counter = 0
+        self.play_order_map = {}

-            opf.create_manifest_from_files_in(manifest)
-            for mani in opf.manifest:
-                if mani.path.endswith('.ncx'):
-                    mani.id = 'ncx'
-                if mani.path.endswith('mastheadImage.jpg'):
-                    mani.id = 'masthead-image'
-            entries = ['index.html']
-            toc = TOC(base_path=dir)
-            self.play_order_counter = 0
-            self.play_order_map = {}

        def feed_index(num, parent):
            f = feeds[num]
@ -532,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
                        desc = None
                    else:
                        desc = self.description_limiter(desc)
+                    tt = a.toc_thumbnail if a.toc_thumbnail else None
                    entries.append('%sindex.html'%adir)
                    po = self.play_order_map.get(entries[-1], None)
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
-                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
-                                    play_order=po, author=auth, description=desc)
+                    parent.add_item('%sindex.html'%adir, None,
+                            a.title if a.title else _('Untitled Article'),
+                            play_order=po, author=auth,
+                            description=desc, toc_thumbnail=tt)
                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
                        prefix = os.path.commonprefix([opf_path, sp])
@ -555,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
                            templ = self.navbar.generate(True, num, j, len(f),
                                            not self.has_single_feed,
-                                            a.orig_url, self.publisher, prefix=prefix,
+                                            a.orig_url, __appname__, prefix=prefix,
                                            center=self.center_navbar)
                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
                            body.insert(len(body.contents), elem)
@ -578,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
                if not desc:
                    desc = None
                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
-                           f.title, play_order=po, description=desc, author=auth))
+                    f.title, play_order=po, description=desc, author=auth))

        else:
            entries.append('feed_%d/index.html'%0)
@ -592,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)

+
--- a/recipes/ming_pao_vancouver.recipe
+++ b/recipes/ming_pao_vancouver.recipe
@ -4,18 +4,41 @@ __copyright__ = '2010-2011, Eddie Lau'
 # Region - Hong Kong, Vancouver, Toronto
 __Region__ = 'Vancouver'
 # Users of Kindle 3 with limited system-level CJK support
-# please replace the following "True" with "False".
+# please replace the following "True" with "False". (Default: True)
 __MakePeriodical__ = True
-# Turn below to true if your device supports display of CJK titles
+# Turn below to True if your device supports display of CJK titles (Default: False)
 __UseChineseTitle__ = False
-# Set it to False if you want to skip images
+# Set it to False if you want to skip images (Default: True)
 __KeepImages__ = True
-# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
+# Set it to True if you want to include a summary in Kindle's article view (Default: False)
+__IncludeSummary__ = False
+# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
+__IncludeThumbnails__ = True
+# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
 __UseLife__ = True
+# (HK only) It is to disable premium content (Default: False)
+__InclPremium__ = False
+# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
+__ParsePFF__ = True
+# (HK only) Turn below to True if you wish hi-res images (Default: False)
+__HiResImg__ = False
+# Override the date returned by the program if specifying a YYYYMMDD below
+__Date__ = ''


 '''
 Change Log:
+2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
+            from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
+            download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
+2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
+2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
+2011/10/19: fix a bug in txt source parsing
+2011/10/17: disable fetching of premium content, also improved txt source parsing
+2011/10/04: option to get hi-res photos for the articles
+2011/09/21: fetching "column" section is made optional.
+2011/09/18: parse "column" section stuff from source text file directly.
+2011/09/07: disable "column" section as it is no longer offered free.
 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
            provide options to remove all images in the file
 2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
@ -37,30 +60,38 @@ Change Log:
 2010/10/31: skip repeated articles in section pages
 '''

-import os, datetime, re
+from calibre.utils.date import now as nowf
+import os, datetime, re, mechanize
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.localization import canonicalize_lang

 # MAIN CLASS
 class MPRecipe(BasicNewsRecipe):
    if __Region__ == 'Hong Kong':
-        title       = 'Ming Pao - Hong Kong'
+        if __UseChineseTitle__ == True:
+            title = u'\u660e\u5831 (\u9999\u6e2f)'
+        else:
+            title   = 'Ming Pao - Hong Kong'
        description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
        category    = 'Chinese, News, Hong Kong'
-        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
+        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
        masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
        keep_only_tags = [dict(name='h1'),
                          dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
                          dict(name='font', attrs={'color':['AA0000']}), # for column articles title
+                          dict(attrs={'class':['heading']}),  # for heading from txt
                          dict(attrs={'id':['newscontent']}), # entertainment and column page content
                          dict(attrs={'id':['newscontent01','newscontent02']}),
+                          dict(attrs={'class':['content']}),  # for content from txt
                          dict(attrs={'class':['photo']}),
                          dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}),  # content in printed version of life.mingpao.com
-                          dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
+                          dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
+                          dict(attrs={'class':['images']})   # for images from txt
                          ]
        if __KeepImages__:
            remove_tags = [dict(name='style'),
@ -90,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
                              lambda match: "</b>")
                             ]
    elif __Region__ == 'Vancouver':
-        title       = 'Ming Pao - Vancouver'
+        if __UseChineseTitle__ == True:
+            title   = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
+        else:
+            title   = 'Ming Pao - Vancouver'
        description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
        category    = 'Chinese, News, Vancouver'
        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -108,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
                              lambda match: ''),
                             ]
    elif __Region__ == 'Toronto':
-        title       = 'Ming Pao - Toronto'
+        if __UseChineseTitle__ == True:
+            title   = u'\u660e\u5831 (\u591a\u502b\u591a)'
+        else:
+            title   = 'Ming Pao - Toronto'
        description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
        category    = 'Chinese, News, Toronto'
        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -139,49 +176,12 @@ class MPRecipe(BasicNewsRecipe):
    conversion_options = {'linearize_tables':True}
    timefmt = ''

-    def image_url_processor(cls, baseurl, url):
-        # trick: break the url at the first occurance of digit, add an additional
-        # '_' at the front
-        # not working, may need to move this to preprocess_html() method
-#        minIdx = 10000
-#        i0 = url.find('0')
-#        if i0 >= 0 and i0 < minIdx:
-#           minIdx = i0
-#        i1 = url.find('1')
-#        if i1 >= 0 and i1 < minIdx:
-#           minIdx = i1
-#        i2 = url.find('2')
-#        if i2 >= 0 and i2 < minIdx:
-#           minIdx = i2
-#        i3 = url.find('3')
-#        if i3 >= 0 and i0 < minIdx:
-#           minIdx = i3
-#        i4 = url.find('4')
-#        if i4 >= 0 and i4 < minIdx:
-#           minIdx = i4
-#        i5 = url.find('5')
-#        if i5 >= 0 and i5 < minIdx:
-#           minIdx = i5
-#        i6 = url.find('6')
-#        if i6 >= 0 and i6 < minIdx:
-#           minIdx = i6
-#        i7 = url.find('7')
-#        if i7 >= 0 and i7 < minIdx:
-#           minIdx = i7
-#        i8 = url.find('8')
-#        if i8 >= 0 and i8 < minIdx:
-#           minIdx = i8
-#        i9 = url.find('9')
-#        if i9 >= 0 and i9 < minIdx:
-#           minIdx = i9
-        return url
-
    def get_dtlocal(self):
        dt_utc = datetime.datetime.utcnow()
        if __Region__ == 'Hong Kong':
-            # convert UTC to local hk time - at HKT 5.30am, all news are available
-            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
-            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
+            # convert UTC to local hk time - at HKT 4.30am, all news are available
+            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
+            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
        elif __Region__ == 'Vancouver':
            # convert UTC to local Vancouver time - at PST time 5.30am, all news are available
            dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
@ -193,13 +193,34 @@ class MPRecipe(BasicNewsRecipe):
        return dt_local

    def get_fetchdate(self):
-        return self.get_dtlocal().strftime("%Y%m%d")
+        if __Date__ <> '':
+            return __Date__
+        else:
+            return self.get_dtlocal().strftime("%Y%m%d")

    def get_fetchformatteddate(self):
-        return self.get_dtlocal().strftime("%Y-%m-%d")
+        if __Date__ <> '':
+            return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%Y-%m-%d")
+
+    def get_fetchyear(self):
+        if __Date__ <> '':
+            return __Date__[0:4]
+        else:
+            return self.get_dtlocal().strftime("%Y")
+
+    def get_fetchmonth(self):
+        if __Date__ <> '':
+            return __Date__[4:6]
+        else:
+            return self.get_dtlocal().strftime("%m")

    def get_fetchday(self):
-        return self.get_dtlocal().strftime("%d")
+        if __Date__ <> '':
+            return __Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%d")

    def get_cover_url(self):
        if __Region__ == 'Hong Kong':
@ -230,12 +251,23 @@ class MPRecipe(BasicNewsRecipe):
                                           (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
                                           (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
                                           (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
-                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
-                                           (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
-                    articles = self.parse_section2(url, keystr)
+                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
+                                          ]:
+                    if __InclPremium__ == True:
+                        articles = self.parse_section2_txt(url, keystr)
+                    else:
+                        articles = self.parse_section2(url, keystr)
                    if articles:
                        feeds.append((title, articles))

+                if __InclPremium__ == True:
+                    # parse column section articles directly from .txt files
+                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
+                                              ]:
+                        articles = self.parse_section2_txt(url, keystr)
+                        if articles:
+                            feeds.append((title, articles))
+
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
                    articles = self.parse_section(url)
@ -244,15 +276,16 @@ class MPRecipe(BasicNewsRecipe):
            else:
                for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
                                   (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
-                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
+                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
+                                   (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
                    articles = self.parse_section(url)
                    if articles:
                        feeds.append((title, articles))

                # special- editorial
-                ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
-                if ed_articles:
-                    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
+                #ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
+                #if ed_articles:
+                #    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))

                for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
                                   (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
@ -263,20 +296,39 @@ class MPRecipe(BasicNewsRecipe):

                # special - finance
                #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
-                fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
-                if fin_articles:
-                    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+                #fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
+                #if fin_articles:
+                #    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))

-                for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
-                                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
-                    articles = self.parse_section(url)
+                for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
+                    articles = self.parse_section2_txt(url, keystr)
                    if articles:
                        feeds.append((title, articles))

+                #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+                #                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
+                #    articles = self.parse_section(url)
+                #    if articles:
+                #        feeds.append((title, articles))
+
                # special - entertainment
-                ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
-                if ent_articles:
-                    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+                #ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+                #if ent_articles:
+                #    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+
+                for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
+                                          ]:
+                    articles = self.parse_section2_txt(url, keystr)
+                    if articles:
+                        feeds.append((title, articles))
+
+                if __InclPremium__ == True:
+                    # parse column section articles directly from .txt files
+                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
+                                              ]:
+                        articles = self.parse_section2_txt(url, keystr)
+                        if articles:
+                            feeds.append((title, articles))

                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -284,11 +336,6 @@ class MPRecipe(BasicNewsRecipe):
                    if articles:
                        feeds.append((title, articles))

-
-                # special- columns
-                col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
-                if col_articles:
-                    feeds.append((u'\u5c08\u6b04 Columns', col_articles))
        elif __Region__ == 'Vancouver':
            for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
                               (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
@ -332,6 +379,16 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(a)
            url = a.get('href', False)
            url = 'http://news.mingpao.com/' + dateStr + '/' +url
+            # replace the url to the print-friendly version
+            if __ParsePFF__ == True:
+                if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
+                    url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
+                    url = re.sub('%2F.*%2F', '/', url)
+                    title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
+                    url = url.replace('%2Etxt', '_print.htm')
+                    url = url.replace('%5F', '_')
+                else:
+                    url = url.replace('.htm', '_print.htm')
            if url not in included_urls and url.rfind('Redirect') == -1:
                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
                included_urls.append(url)
@ -340,6 +397,8 @@ class MPRecipe(BasicNewsRecipe):

    # parse from life.mingpao.com
    def parse_section2(self, url, keystr):
+        br = mechanize.Browser()
+        br.set_handle_redirect(False)
        self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href=True)
@ -350,7 +409,29 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(i)
            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
-                url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
+                try:
+                    br.open_novisit(url)
+                    url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
+                    current_articles.append({'title': title, 'url': url, 'description': ''})
+                    included_urls.append(url)
+                except:
+				    print 'skipping a premium article'
+        current_articles.reverse()
+        return current_articles
+
+    # parse from text file of life.mingpao.com
+    def parse_section2_txt(self, url, keystr):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        a = soup.findAll('a', href=True)
+        a.reverse()
+        current_articles = []
+        included_urls = []
+        for i in a:
+            title = self.tag_to_string(i)
+            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
+                url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/')  # use printed version of the article
                current_articles.append({'title': title, 'url': url, 'description': ''})
                included_urls.append(url)
        current_articles.reverse()
@ -438,6 +519,162 @@ class MPRecipe(BasicNewsRecipe):
        current_articles.reverse()
        return current_articles

+    # preprocess those .txt and javascript based files
+    def preprocess_raw_html(self, raw_html, url):
+        new_html = raw_html
+        if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
+            if url.rfind('_print.htm') <> -1:
+                # javascript based file
+                splitter = re.compile(r'\n')
+                new_raw_html = '<html><head><title>Untitled</title></head>'
+                new_raw_html = new_raw_html + '<body>'
+                for item in splitter.split(raw_html):
+                    if item.startswith('var heading1 ='):
+                        heading = item.replace('var heading1 = \'', '')
+                        heading = heading.replace('\'', '')
+                        heading = heading.replace(';', '')
+                        new_raw_html = new_raw_html + '<div class="heading">' + heading
+                    if item.startswith('var heading2 ='):
+                        heading = item.replace('var heading2 = \'', '')
+                        heading = heading.replace('\'', '')
+                        heading = heading.replace(';', '')
+                        if heading <> '':
+                            new_raw_html = new_raw_html + '<br>' + heading + '</div>'
+                        else:
+                            new_raw_html = new_raw_html + '</div>'
+                    if item.startswith('var content ='):
+                        content = item.replace("var content = ", '')
+                        content = content.replace('\'', '')
+                        content = content.replace(';', '')
+                        new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
+                    if item.startswith('var photocontent ='):
+                        photo = item.replace('var photocontent = \'', '')
+                        photo = photo.replace('\'', '')
+                        photo = photo.replace(';', '')
+                        photo = photo.replace('<tr>', '')
+                        photo = photo.replace('<td>', '')
+                        photo = photo.replace('</tr>', '')
+                        photo = photo.replace('</td>', '<br>')
+                        photo = photo.replace('class="photo"', '')
+                        new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
+                new_html = new_raw_html + '</body></html>'
+            else:
+                # .txt based file
+                splitter = re.compile(r'\n') # Match non-digits
+                new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
+                next_is_img_txt = False
+                title_started = False
+                title_break_reached = False
+                met_article_start_char = False
+                for item in splitter.split(raw_html):
+                    item = item.strip()
+                    # if title already reached but break between title and content not yet found, record title_break_reached
+                    if title_started == True and title_break_reached == False and item == '':
+                        title_break_reached = True
+                    # if title reached and title_break_reached and met_article_start_char == False and item is not empty
+                    # start content
+                    elif title_started == True and title_break_reached == True and met_article_start_char == False:
+                        if item <> '':
+                            met_article_start_char = True
+                            new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
+                    #if item.startswith(u'\u3010'):
+                    #    met_article_start_char = True
+                    #    new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
+                    else:
+                        if next_is_img_txt == False:
+                            if item.startswith("=@"):
+                                print 'skip movie link'
+                            elif item.startswith("=?"):
+                                next_is_img_txt = True
+                                new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
+                            elif item.startswith('=='):
+                                next_is_img_txt = True
+                                if False:
+                                    # TODO: check existence of .gif first
+                                    newimg = '_' + item[2:].strip() + '.jpg'
+                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
+                                else:
+                                    new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
+                            elif item.startswith('='):
+                                next_is_img_txt = True
+                                if False:
+                                    # TODO: check existence of .gif first
+                                    newimg = '_' + item[1:].strip() + '.jpg'
+                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
+                                else:
+                                    new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
+                            else:
+                                if next_is_img_txt == False and met_article_start_char == False:
+                                    if item <> '':
+                                        if title_started == False:
+                                            #print 'Title started at ', item
+                                            new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
+                                            title_started = True
+                                        else:
+                                            new_raw_html = new_raw_html + item + '\n'
+                                else:
+                                    new_raw_html = new_raw_html + item + '<p>\n'
+                        else:
+                            next_is_img_txt = False
+                            new_raw_html = new_raw_html + item + '\n'
+                new_html = new_raw_html + '</div></body></html>'
+        #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
+        if __HiResImg__ == True:
+            # TODO: add a _ in front of an image url
+            if url.rfind('news.mingpao.com') > -1:
+                imglist =  re.findall('src="?.*?jpg"', new_html)
+                br = mechanize.Browser()
+                br.set_handle_redirect(False)
+                for img in imglist:
+                    gifimg = img.replace('jpg"', 'gif"')
+                    try:
+                        br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
+                        new_html = new_html.replace(img, gifimg)
+                    except:
+                        # find the location of the first _
+                        pos = img.find('_')
+                        if pos > -1:
+                            # if found, insert _ after the first _
+                            newimg = img[0:pos] + '_' + img[pos:]
+                            new_html = new_html.replace(img, newimg)
+                        else:
+                            # if not found, insert _ after "
+                            new_html = new_html.replace(img[1:], '"_' + img[1:])
+            elif url.rfind('life.mingpao.com') > -1:
+                imglist = re.findall('src=\'?.*?jpg\'', new_html)
+                br = mechanize.Browser()
+                br.set_handle_redirect(False)
+                #print 'Img list: ', imglist, '\n'
+                for img in imglist:
+                    #print 'Found img: ', img
+                    gifimg = img.replace('jpg\'', 'gif\'')
+                    try:
+                        gifurl = re.sub(r'dailynews.*txt', '', url)
+                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
+                        new_html = new_html.replace(img, gifimg)
+                    except:
+                        pos = img.rfind('/')
+                        newimg = img[0:pos+1] + '_' + img[pos+1:]
+                        new_html = new_html.replace(img, newimg)
+                # repeat with src quoted by double quotes, for text parsed from src txt
+                imglist = re.findall('src="?.*?jpg"', new_html)
+                for img in imglist:
+                    #print 'Found img: ', img
+                    gifimg = img.replace('jpg"', 'gif"')
+                    try:
+                        #print 'url', url
+                        pos = url.rfind('/')
+                        gifurl = url[:pos+1]
+                        #print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
+                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
+                        new_html = new_html.replace(img, gifimg)
+                    except:
+                        pos = img.find('"')
+                        newimg = img[0:pos+1] + '_' + img[pos+1:]
+                        #print 'Use hi-res img', newimg
+                        new_html = new_html.replace(img, newimg)
+        return new_html
+
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
@ -447,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
            del item['absmiddle']
        return soup

+    def populate_article_metadata(self, article, soup, first):
+        # thumbnails shouldn't be available if using hi-res images
+        if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
+            img = soup.find('img')
+            if img is not None:
+                self.add_toc_thumbnail(article, img['src'])
+
+        try:
+            if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
+                # look for content
+                articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div',attrs={'class':'content'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div', attrs={'id':'font'})
+                if articlebodies:
+                    for articlebody in articlebodies:
+                        if articlebody:
+                            # the text may or may not be enclosed in <p></p> tag
+                            paras = articlebody.findAll('p')
+                            if not paras:
+                            	paras = articlebody
+                            textFound = False
+                            for p in paras:
+                                if not textFound:
+                                    summary_candidate = self.tag_to_string(p).strip()
+                                    summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
+                                    if len(summary_candidate) > 0:
+                                        article.summary = article.text_summary = summary_candidate
+                                        textFound = True
+            else:
+                # display a simple text
+                #article.summary = article.text_summary = u'\u66f4\u591a......'
+                # display word counts
+                counts = 0
+                articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div',attrs={'class':'content'})
+                if not articlebodies:
+                    articlebodies = soup.findAll('div', attrs={'id':'font'})
+                if articlebodies:
+                    for articlebody in articlebodies:
+                        # the text may or may not be enclosed in <p></p> tag
+                        paras = articlebody.findAll('p')
+                        if not paras:
+                            paras = articlebody
+                        for p in paras:
+                            summary_candidate = self.tag_to_string(p).strip()
+                            counts += len(summary_candidate)
+                    article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
+        except:
+            self.log("Error creating article descriptions")
+            return
+
+    # override from the one in version 0.8.31
    def create_opf(self, feeds, dir=None):
        if dir is None:
            dir = self.output_dir
-        if __UseChineseTitle__ == True:
-            if __Region__ == 'Hong Kong':
-                title = u'\u660e\u5831 (\u9999\u6e2f)'
-            elif __Region__ == 'Vancouver':
-                title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
-            elif __Region__ == 'Toronto':
-                title = u'\u660e\u5831 (\u591a\u502b\u591a)'
-        else:
-            title = self.short_title()
-        # if not generating a periodical, force date to apply in title
-        if __MakePeriodical__ == False:
+        title = self.short_title()
+        # change 1: allow our own flag to tell if a periodical is to be generated
+        # also use customed date instead of current time
+        if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
            title = title + ' ' + self.get_fetchformatteddate()
-        if True:
-            mi = MetaInformation(title, [self.publisher])
-            mi.publisher = self.publisher
-            mi.author_sort = self.publisher
-            if __MakePeriodical__ == True:
-                mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
-            else:
-                mi.publication_type = self.publication_type+':'+self.short_title()
-            #mi.timestamp = nowf()
-            mi.timestamp = self.get_dtlocal()
-            mi.comments = self.description
-            if not isinstance(mi.comments, unicode):
-                mi.comments = mi.comments.decode('utf-8', 'replace')
-            #mi.pubdate = nowf()
-            mi.pubdate = self.get_dtlocal()
-            opf_path = os.path.join(dir, 'index.opf')
-            ncx_path = os.path.join(dir, 'index.ncx')
-            opf = OPFCreator(dir, mi)
-            # Add mastheadImage entry to <guide> section
-            mp = getattr(self, 'masthead_path', None)
-            if mp is not None and os.access(mp, os.R_OK):
-                from calibre.ebooks.metadata.opf2 import Guide
-                ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
-                ref.type = 'masthead'
-                ref.title = 'Masthead Image'
-                opf.guide.append(ref)
+        # end of change 1
+        # change 2: __appname__ replaced by newspaper publisher
+        __appname__ = self.publisher
+        mi = MetaInformation(title, [__appname__])
+        mi.publisher = __appname__
+        mi.author_sort = __appname__
+        # change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
+        if __MakePeriodical__ == True:
+            mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        else:
+            mi.publication_type = self.publication_type+':'+self.short_title()
+        #mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        # change 4: in the following, all the nowf() are changed to adjusted time
+        # This one doesn't matter
+        mi.timestamp = nowf()
+        # change 5: skip listing the articles
+        #article_titles, aseen = [], set()
+        #for f in feeds:
+        #    for a in f:
+        #        if a.title and a.title not in aseen:
+        #            aseen.add(a.title)
+        #            article_titles.append(force_unicode(a.title, 'utf-8'))

-            manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
-            manifest.append(os.path.join(dir, 'index.html'))
-            manifest.append(os.path.join(dir, 'index.ncx'))
+        #mi.comments = self.description
+        #if not isinstance(mi.comments, unicode):
+        #    mi.comments = mi.comments.decode('utf-8', 'replace')
+        #mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
+        #        '\n\n'.join(article_titles))

-            # Get cover
-            cpath = getattr(self, 'cover_path', None)
-            if cpath is None:
-                pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
-                if self.default_cover(pf):
-                    cpath =  pf.name
-            if cpath is not None and os.access(cpath, os.R_OK):
-                opf.cover = cpath
-                manifest.append(cpath)
+        language = canonicalize_lang(self.language)
+        if language is not None:
+            mi.language = language
+        # This one affects the pub date shown in kindle title
+        #mi.pubdate = nowf()
+        # now appears to need the time field to be > 12.00noon as well
+        mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
+        opf_path = os.path.join(dir, 'index.opf')
+        ncx_path = os.path.join(dir, 'index.ncx')

-            # Get masthead
-            mpath = getattr(self, 'masthead_path', None)
-            if mpath is not None and os.access(mpath, os.R_OK):
-                manifest.append(mpath)
+        opf = OPFCreator(dir, mi)
+        # Add mastheadImage entry to <guide> section
+        mp = getattr(self, 'masthead_path', None)
+        if mp is not None and os.access(mp, os.R_OK):
+            from calibre.ebooks.metadata.opf2 import Guide
+            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+            ref.type = 'masthead'
+            ref.title = 'Masthead Image'
+            opf.guide.append(ref)
+
+        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+        manifest.append(os.path.join(dir, 'index.html'))
+        manifest.append(os.path.join(dir, 'index.ncx'))
+
+        # Get cover
+        cpath = getattr(self, 'cover_path', None)
+        if cpath is None:
+            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+            if self.default_cover(pf):
+                cpath =  pf.name
+        if cpath is not None and os.access(cpath, os.R_OK):
+            opf.cover = cpath
+            manifest.append(cpath)
+
+        # Get masthead
+        mpath = getattr(self, 'masthead_path', None)
+        if mpath is not None and os.access(mpath, os.R_OK):
+            manifest.append(mpath)
+
+        opf.create_manifest_from_files_in(manifest)
+        for mani in opf.manifest:
+            if mani.path.endswith('.ncx'):
+                mani.id = 'ncx'
+            if mani.path.endswith('mastheadImage.jpg'):
+                mani.id = 'masthead-image'
+
+        entries = ['index.html']
+        toc = TOC(base_path=dir)
+        self.play_order_counter = 0
+        self.play_order_map = {}

-            opf.create_manifest_from_files_in(manifest)
-            for mani in opf.manifest:
-                if mani.path.endswith('.ncx'):
-                    mani.id = 'ncx'
-                if mani.path.endswith('mastheadImage.jpg'):
-                    mani.id = 'masthead-image'
-            entries = ['index.html']
-            toc = TOC(base_path=dir)
-            self.play_order_counter = 0
-            self.play_order_map = {}

        def feed_index(num, parent):
            f = feeds[num]
@ -532,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
                        desc = None
                    else:
                        desc = self.description_limiter(desc)
+                    tt = a.toc_thumbnail if a.toc_thumbnail else None
                    entries.append('%sindex.html'%adir)
                    po = self.play_order_map.get(entries[-1], None)
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
-                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
-                                    play_order=po, author=auth, description=desc)
+                    parent.add_item('%sindex.html'%adir, None,
+                            a.title if a.title else _('Untitled Article'),
+                            play_order=po, author=auth,
+                            description=desc, toc_thumbnail=tt)
                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
                        prefix = os.path.commonprefix([opf_path, sp])
@ -555,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
                            templ = self.navbar.generate(True, num, j, len(f),
                                            not self.has_single_feed,
-                                            a.orig_url, self.publisher, prefix=prefix,
+                                            a.orig_url, __appname__, prefix=prefix,
                                            center=self.center_navbar)
                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
                            body.insert(len(body.contents), elem)
@ -578,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
                if not desc:
                    desc = None
                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
-                           f.title, play_order=po, description=desc, author=auth))
+                    f.title, play_order=po, description=desc, author=auth))

        else:
            entries.append('feed_%d/index.html'%0)
@ -592,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)

+
--- a/recipes/mlody_technik_pl.recipe
+++ b/recipes/mlody_technik_pl.recipe
@ -0,0 +1,15 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class Mlody_technik(BasicNewsRecipe):
+    title          = u'Mlody technik'
+    __author__        = 'fenuks'
+    description   = u'Młody technik'
+    category       = 'science'
+    language       = 'pl'
+    cover_url='http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg'
+    no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    #keep_only_tags=[dict(id='container')]
+    feeds          = [(u'Artyku\u0142y', u'http://www.mt.com.pl/feed')]
--- a/recipes/moneynews.recipe
+++ b/recipes/moneynews.recipe
@ -1,9 +1,7 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
-moneynews.newsmax.com
+www.moneynews.com
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
@ -12,40 +10,40 @@ class MoneyNews(BasicNewsRecipe):
    title                 = 'Moneynews.com'
    __author__            = 'Darko Miletic'
    description           = 'Financial news worldwide'
-    publisher             = 'moneynews.com'
-    language = 'en'
-
+    publisher             = 'Newsmax.com'
+    language              = 'en'
    category              = 'news, finances, USA, business'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    encoding              = 'cp1252'
-
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
-
+    encoding              = 'utf8'
+    extra_css             = 'img{display: block} body{font-family: Arial, Helvetica, sans-serif}'
+    
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        , 'linearize_tables' : True
+                        }
+    
    feeds = [
-              (u'Street Talk'          , u'http://moneynews.newsmax.com/xml/streettalk.xml'  )
-             ,(u'Finance News'         , u'http://moneynews.newsmax.com/xml/FinanceNews.xml' )
-             ,(u'Economy'              , u'http://moneynews.newsmax.com/xml/economy.xml'     )
-             ,(u'Companies'            , u'http://moneynews.newsmax.com/xml/companies.xml'   )
-             ,(u'Markets'              , u'http://moneynews.newsmax.com/xml/Markets.xml'     )
-             ,(u'Investing & Analysis' , u'http://moneynews.newsmax.com/xml/investing.xml'   )
+              (u'Street Talk'          , u'http://www.moneynews.com/rss/StreetTalk/8.xml'  )
+             ,(u'Finance News'         , u'http://www.moneynews.com/rss/FinanceNews/4.xml' )
+             ,(u'Economy'              , u'http://www.moneynews.com/rss/Economy/2.xml'     )
+             ,(u'Companies'            , u'http://www.moneynews.com/rss/Companies/6.xml'   )
+             ,(u'Markets'              , u'http://www.moneynews.com/rss/Markets/7.xml'     )
+             ,(u'Investing & Analysis' , u'http://www.moneynews.com/rss/InvestingAnalysis/17.xml')
            ]

-
-    keep_only_tags = [dict(name='table', attrs={'class':'copy'})]
-
+    keep_only_tags = [dict(name='div', attrs={'class':'copy'})]
+    
    remove_tags = [
-                     dict(name='td'   , attrs={'id':'article_fontsize'})
-                    ,dict(name='table', attrs={'id':'toolbox'         })
-                    ,dict(name='tr'   , attrs={'id':'noprint3'        })
+                    dict(attrs={'class':['MsoNormal', 'MsoNoSpacing']}),
+                    dict(name=['object','link','embed','form','meta'])
                  ]
-
+    
+    def print_version(self, url):
+        nodeid = url.rpartition('/')[2]
+        return 'http://www.moneynews.com/PrintTemplate?nodeid=' + nodeid
--- a/recipes/naczytniki.recipe
+++ b/recipes/naczytniki.recipe
@ -7,6 +7,7 @@ class naczytniki(BasicNewsRecipe):
    language       = 'pl'
    description ='everything about e-readers'
    category='readers'
+    no_stylesheets=True
    oldest_article = 7
    max_articles_per_feed = 100
    remove_tags_after= dict(name='div', attrs={'class':'sociable'})
--- a/recipes/nol.recipe
+++ b/recipes/nol.recipe
@ -0,0 +1,54 @@
+################################################################################
+#Description:	  http://nol.hu/ RSS channel
+#Author: 	  Bigpapa (bigpapabig@hotmail.com)
+#Date:	  2011.12.18. - V1.1
+################################################################################
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class NOL(BasicNewsRecipe):
+    title = u'NOL'
+    __author__             = 'Bigpapa'
+    oldest_article         = 5
+    max_articles_per_feed  = 5	# Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
+    no_stylesheets         = True
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'utf8'
+    language               = 'hu'
+    publication_type       = 'newsportal'
+
+    conversion_options ={
+	'linearize_tables' : True,
+	}
+  
+    keep_only_tags    = [
+	dict(name='table', attrs={'class':['article-box']})
+	]
+
+    remove_tags = [
+
+	dict(name='div', attrs={'class':['h','ad-container-outer','tags noborder','ad-container-inner','image-container-lead','tags','related-container']}),
+	dict(name='h4'),
+	dict(name='tfoot'),
+	dict(name='td', attrs={'class':['foot']}),
+	dict(name='span', attrs={'class':['image-container-caption']}),
+	]
+
+
+    feeds          = [
+    #	(u'V\xe1logat\xe1s', 'http://nol.hu/feed/valogatas.rss'),
+	(u'Belf\xf6ld', 'http://nol.hu/feed/belfold.rss'),
+	(u'K\xfclf\xf6ld', 'http://nol.hu/feed/kulfold.rss'),
+	(u'Gazdas\xe1g', 'http://nol.hu/feed/gazdasag.rss'),
+	(u'V\xe9lem\xe9ny', 'http://nol.hu/feed/velemeny.rss'),
+	(u'Kult\xfara', 'http://nol.hu/feed/kult.rss'),
+	(u'Tud/Tech', 'http://nol.hu/feed/tud-tech.rss'),
+	(u'Sport', 'http://nol.hu/feed/sport.rss'),
+	(u'Noller', 'http://nol.hu/feed/noller.rss'),
+	(u'Mozaik', 'http://nol.hu/feed/mozaik.rss'),
+	(u'Utaz\xe1s', 'http://nol.hu/feed/utazas.rss'),
+	(u'Aut\xf3', 'http://nol.hu/feed/auto.rss'),
+	(u'Voks', 'http://nol.hu/feed/voks.rss'),
+
+                   	 ]
--- a/recipes/nowa_fantastyka.recipe
+++ b/recipes/nowa_fantastyka.recipe
@ -1,20 +1,21 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
-
 class Nowa_Fantastyka(BasicNewsRecipe):
    title          = u'Nowa Fantastyka'
    oldest_article = 7
    __author__        = 'fenuks'
    language       = 'pl'
+    encoding='latin2'
    description ='site for fantasy readers'
    category='fantasy'
    max_articles_per_feed = 100
    INDEX='http://www.fantastyka.pl/'
+    no_stylesheets=True
+    needs_subscription = 'optional'
    remove_tags_before=dict(attrs={'class':'belka1-tlo-md'})
    #remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'})
    remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'})
-    remove_tags=[dict(attrs={'class':'avatar2'})]
-    feeds          = []
+    remove_tags=[dict(attrs={'class':'avatar2'}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'})]

    def find_articles(self, url):
        articles = []
@ -45,3 +46,13 @@ class Nowa_Fantastyka(BasicNewsRecipe):
        cover=soup.find(name='img', attrs={'class':'okladka'})
        self.cover_url=self.INDEX+ cover['src']
        return getattr(self, 'cover_url', self.cover_url)
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('http://www.fantastyka.pl/')
+            br.select_form(nr=0)
+            br['login']   = self.username
+            br['pass'] = self.password
+            br.submit()
+        return br
--- a/recipes/prospectmaguk.recipe
+++ b/recipes/prospectmaguk.recipe
@ -0,0 +1,79 @@
+#!/usr/bin/env  python
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+
+__license__   = 'GPL v3'
+
+'''
+calibre recipe for prospectmagazine.co.uk (subscription)
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ProspectMagUK(BasicNewsRecipe):
+	title                   = u'Prospect Magazine'
+	description				= 'A general-interest publication offering analysis and commentary about politics, news and business.'
+	__author__				= 'barty, duluoz'
+	timefmt					= ' [%d %B %Y]'
+	no_stylesheets			= True
+	publication_type        = 'magazine'
+	masthead_url            = 'http://www.prospectmagazine.co.uk/wp-content/themes/prospect/images/titleMain.jpg'
+	category                = 'news, UK'
+	language                = 'en_GB'
+	max_articles_per_feed   = 100
+	auto_cleanup            = True
+	needs_subscription      = True
+
+	auto_cleanup_keep = '//div[@class="lead_image"]'
+	remove_tags       = [{'class':['shareinpost','postutils','postinfo']}]
+
+	INDEX             = 'http://www.prospectmagazine.co.uk/current-issue'
+
+	def get_browser(self):
+		br = BasicNewsRecipe.get_browser()
+		if self.username is not None and self.password is not None:
+			br.open('http://www.prospectmagazine.co.uk/wp-login.php')
+			br.select_form(name='loginform')
+			br['log'] = self.username
+			br['pwd'] = self.password
+			br.submit()
+		return br
+
+	def parse_index(self):
+		soup = self.index_to_soup(self.INDEX)
+		#div = soup.find('h1',text=re.compile(r'Issue \d+'))
+		#fname = self.tag_to_string( div) if div is not None else 'Current Issue'
+		div = soup.find('div', id='cover_image')
+		if div is not None:
+			img = div.find('img', src=True)
+			if img is not None:
+				src = img['src']
+				if src.startswith('/'):
+					src = 'http://www.prospectmagazine.co.uk' + src
+				self.cover_url = src
+		feeds = []
+		# loop through sections
+		for sect in soup.findAll('div',attrs={'class':'sectionheading'}):
+			fname = self.tag_to_string( sect).replace('>','').strip()
+			self.log('Found section', fname)
+			articles = []
+
+			# note: can't just find siblings with class='post' because that will also
+			#       grab all the articles belonging to the sections that follow.
+			for item in sect.findNextSiblings('div',attrs={'class':True}):
+				if not 'post' in item['class']: break
+				a = item.find('a', href=True)
+				if a is None: continue
+				url = a['href']
+				title = self.tag_to_string(a)
+				p = item.find('p')
+				desc = self.tag_to_string( p) if p is not None else ''
+				art = {'title':title, 'description':desc,'date':' ', 'url':url}
+				p = item.find(attrs={'class':re.compile('author')})
+				self.log('\tFound article:', title, '::', url)
+				if p is not None:
+					art['author'] = self.tag_to_string( p).strip()
+				articles.append(art)
+
+			feeds.append((fname, articles))
+		return feeds
--- a/recipes/salonica_press_news.recipe
+++ b/recipes/salonica_press_news.recipe
@ -0,0 +1,17 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class spn(BasicNewsRecipe):
+    title          = u'Salonica Press News'
+    language = 'gr'
+    __author__ = "SteliosGero"
+    oldest_article = 3
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    category               = 'news, GR'
+    language               = 'el'
+
+
+    feeds          = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae', u'http://www.spnews.gr/politiki?format=feed&amp;type=rss'), (u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1', u'http://www.spnews.gr/oikonomia?format=feed&amp;type=rss'), (u'\u0391\u03c5\u03c4\u03bf\u03b4\u03b9\u03bf\u03af\u03ba\u03b7\u03c3\u03b7', u'http://www.spnews.gr/aftodioikisi?format=feed&amp;type=rss'), (u'\u039a\u03bf\u03b9\u03bd\u03c9\u03bd\u03af\u03b1', u'http://www.spnews.gr/koinonia?format=feed&amp;type=rss'), (u'\u0391\u03b8\u03bb\u03b7\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/sports?format=feed&amp;type=rss'), (u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae', u'http://www.spnews.gr/diethni?format=feed&amp;type=rss'), (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/politismos?format=feed&amp;type=rss'), (u'Media', u'http://www.spnews.gr/media-news?format=feed&amp;type=rss'), (u'\u0396\u03c9\u03ae', u'http://www.spnews.gr/zoi?format=feed&amp;type=rss'), (u'\u03a4\u03b5\u03c7\u03bd\u03bf\u03bb\u03bf\u03b3\u03af\u03b1', u'http://spnews.gr/texnologia?format=feed&amp;type=rss'), (u'\u03a0\u03b5\u03c1\u03b9\u03b2\u03ac\u03bb\u03bb\u03bf\u03bd', u'http://spnews.gr/periballon?format=feed&amp;type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03c0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parapolitika?format=feed&amp;type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03b4\u03b7\u03bc\u03bf\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/paradimotika?format=feed&amp;type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03b1\u03b8\u03bb\u03b7\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parathlitika?format=feed&amp;type=rss'), (u'\u0391\u03c0\u03cc\u03c8\u03b5\u03b9\u03c2', u'http://spnews.gr/apopseis?format=feed&amp;type=rss'), (u'\u03a3\u03c5\u03bd\u03b5\u03cd\u03be\u03b5\u03b9\u03c2', u'http://spnews.gr/synenteykseis?format=feed&amp;type=rss'), (u'Alert!', u'http://spnews.gr/alert?format=feed&amp;type=rss')]
+
+    def print_version(self, url):
+        return url+'?tmpl=component&print=1&layout=default&page='
--- a/recipes/spiders_web_pl.recipe
+++ b/recipes/spiders_web_pl.recipe
@ -8,8 +8,8 @@ class SpidersWeb(BasicNewsRecipe):
    cover_url      = 'http://www.spidersweb.pl/wp-content/themes/spiderweb/img/Logo.jpg'
    category       = 'IT, WEB'
    language       = 'pl'
+    no_stylesheers=True
    max_articles_per_feed = 100
-    remove_tags_before=dict(name="h1", attrs={'class':'Title'})
-    remove_tags_after=dict(name="div", attrs={'class':'Text'})
-    remove_tags=[dict(name='div', attrs={'class':['Tags', 'CommentCount FloatL', 'Show FloatL']})]
+    keep_only_tags=[dict(id='Post')]
+    remove_tags=[dict(name='div', attrs={'class':['Comments', 'Shows', 'Post-Tags']})]
    feeds          = [(u'Wpisy', u'http://www.spidersweb.pl/feed')]
--- a/recipes/sueddeutsche.recipe
+++ b/recipes/sueddeutsche.recipe
@ -6,92 +6,49 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Fetch sueddeutsche.de
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
-
-
 class Sueddeutsche(BasicNewsRecipe):

    title = u'sueddeutsche.de'
    description = 'News from Germany'
-    __author__ = 'Oliver Niesner and Armin Geller' #AGe 2011-12-16
+    __author__ = 'Oliver Niesner and Armin Geller' #Update AGe 2011-12-16
    use_embedded_content   = False
    timefmt = ' [%d %b %Y]'
    oldest_article = 7
    max_articles_per_feed = 50
    no_stylesheets = True
    language = 'de'
-    auto_cleanup = True
    encoding = 'utf-8'
    remove_javascript = True
-    cover_url  = 'http://polpix.sueddeutsche.com/polopoly_fs/1.1236175.1323967473!/image/image.jpg_gen/derivatives/860x860/image.jpg' # 2011-12-16 AGe
-# 2011-12-16 AGe
-#    remove_tags = [ dict(name='link'), dict(name='iframe'),
-#                    dict(name='div', attrs={'id':["bookmarking","themenbox","artikelfoot","CAD_AD",
-#                          "SKY_AD","NT1_AD","navbar1","sdesiteheader"]}),
-#
-#                    dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg",
-#                                 "pages closed","basebox right narrow","headslot galleried"]}),
-#
-#                    dict(name='div', attrs={'class':["articleDistractor","listHeader","listHeader2","hr2",
-#                             "item","videoBigButton","articlefooter full-column",
-#                                                     "bildbanderolle full-column","footerCopy padleft5"]}),
-#
-#                    dict(name='p', attrs={'class':["ressortartikeln","artikelFliestext","entry-summary"]}),
-#                    dict(name='div', attrs={'style':["position:relative;"]}),
-#                    dict(name='span', attrs={'class':["nlinkheaderteaserschwarz","artikelLink","r10000000"]}),
-#                    dict(name='table', attrs={'class':["stoerBS","kommentare","footer","pageBoxBot","pageAktiv","bgcontent"]}),
-#                    dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav","actions"]}),
-#                    dict(name='td', attrs={'class':["artikelDruckenRight"]}),
-#                    dict(name='p', text = "ANZEIGE")
-#                     ]
-#    remove_tags_after = [dict(name='div', attrs={'class':["themenbox full-column"]})]
-#
-    extra_css = '''
-                    h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
-                    a{font-family:Arial,Helvetica,sans-serif; font-style:italic;}
-                    .dachzeile p{font-family:Arial,Helvetica,sans-serif; font-size: x-small; }
-                    h1{ font-family:Arial,Helvetica,sans-serif;  font-size:x-large; font-weight:bold;}
-                    .artikelTeaser{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-weight:bold; }
-                    body{font-family:Arial,Helvetica,sans-serif; }
-                    .photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;}                 '''
-#
+    auto_cleanup = True
+    cover_url  = 'http://polpix.sueddeutsche.com/polopoly_fs/1.1237395.1324054345!/image/image.jpg_gen/derivatives/860x860/image.jpg' # 2011-12-16 AGe
    feeds = [
-#              (u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'Geld', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EGeld%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'Kultur', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKultur%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'München & Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'), # AGe 2011-11-13
-#              (u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'Auto', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EAuto%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'Wissen', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWissen%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'Panorama', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPanorama%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'Reise', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EReise%24?output=rss'), #AGe 2011-12-16 deactivated
-#              (u'Technik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ETechnik%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated
-#              (u'Macht', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMacht%24?output=rss'),     # sometimes only #AGe 2011-12-16 deactivated
-#              (u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'),         # sometimes only #AGe 2011-12-16 deactivated
-#              (u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated
-#              (u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'),   # sometimes only #AGe 2011-12-16 deactivated
-              (u'Politik', u'http://www.sueddeutsche.de/app/service/rss/ressort/politik/rss.xml'),
-              (u'Wirtschaft', u'http://www.sueddeutsche.de/app/service/rss/ressort/wirtschaft/rss.xml'),
-              (u'Geld', u'http://www.sueddeutsche.de/app/service/rss/ressort/finanzen/rss.xml'),
-              (u'Kultur', u'http://www.sueddeutsche.de/app/service/rss/ressort/kultur/rss.xml'),
-              (u'Sport', u'http://www.sueddeutsche.de/app/service/rss/ressort/sport/rss.xml'),
-              (u'Leben', u'http://www.sueddeutsche.de/app/service/rss/ressort/leben/rss.xml'),
-              (u'Karriere', u'http://www.sueddeutsche.de/app/service/rss/ressort/karriere/rss.xml'),
-              (u'München & Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'),
-              (u'Bayern', u'http://www.sueddeutsche.de/app/service/rss/ressort/bayern/rss.xml'),
-              (u'Medien', u'http://www.sueddeutsche.de/app/service/rss/ressort/medien/rss.xml'),
-              (u'Digital', u'http://www.sueddeutsche.de/app/service/rss/ressort/computerwissen/rss.xml'),
-              (u'Auto', u'http://www.sueddeutsche.de/app/service/rss/ressort/autoreise/rss.xml'),
-              (u'Wissen', u'http://www.sueddeutsche.de/app/service/rss/ressort/wissen/rss.xml'),
-              (u'Panorama', u'http://www.sueddeutsche.de/app/service/rss/ressort/panorama/rss.xml'),
-              (u'Reise', u'http://www.sueddeutsche.de/app/service/rss/ressort/reise/rss.xml'),
+              (u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'),
+              (u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'),
+              (u'Geld', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EGeld%24?output=rss'),
+              (u'Kultur', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKultur%24?output=rss'),
+              (u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
+              (u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
+              (u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
+              (u'München & Region', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMünchen&Region%24?output=rss'),
+              (u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
+              (u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
+              (u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'),
+              (u'Auto', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EAuto%24?output=rss'),
+              (u'Wissen', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWissen%24?output=rss'),
+              (u'Panorama', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPanorama%24?output=rss'),
+              (u'Reise', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EReise%24?output=rss'),
+              (u'Technik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ETechnik%24?output=rss'), # sometimes only
+              (u'Macht', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMacht%24?output=rss'),     # sometimes only
+              (u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'),         # sometimes only
+              (u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only
+              (u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'),   # sometimes only
            ]
-
-#    def print_version(self, url):             #AGe 2011-12-16 deactivated
-#        main, sep, id = url.rpartition('/')   #AGe 2011-12-16 deactivated
-#        return main + '/2.220/' + id          #AGe 2011-12-16 deactivated
+# AGe 2011-12-16 Problem of Handling redirections solved by a solution of Recipes-Re-usable code from kiklop74.
+# Feed is:                    http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss
+# Article download source is: http://sz.de/1.1237295 (Ski Alpin: Der Erfolg kommt, der Trainer geht)
+# Article source is:          http://www.sueddeutsche.de/sport/ski-alpin-der-erfolg-kommt-der-trainer-geht-1.1237295
+# Article printversion is:    http://www.sueddeutsche.de/sport/2.220/ski-alpin-der-erfolg-kommt-der-trainer-geht-1.1237295
+    def print_version(self, url):
+        n_url=self.browser.open_novisit(url).geturl()
+        main, sep, id = n_url.rpartition('/')
+        return main + '/2.220/' + id
--- a/recipes/toi.recipe
+++ b/recipes/toi.recipe
@ -1,4 +1,4 @@
-import re
+import re, urllib
 from calibre.web.feeds.news import BasicNewsRecipe

 class TimesOfIndia(BasicNewsRecipe):
@ -17,7 +17,9 @@ class TimesOfIndia(BasicNewsRecipe):
            ]
    remove_tags = [
            {'class':re.compile('tabsintbgshow|prvnxtbg')},
-            {'id':['fbrecommend', 'relmaindiv']}
+            {'id':['fbrecommend', 'relmaindiv', 'shretxt', 'fbrecos', 'twtdiv',
+                'gpls', 'auim']},
+            {'class':['twitter-share-button', 'cmtmn']},
            ]

    feeds          = [
@ -46,25 +48,27 @@ class TimesOfIndia(BasicNewsRecipe):
 ]

    def get_article_url(self, article):
-        # Times of India sometimes serves an ad page instead of the article,
-        # this code, detects and circumvents that
-        url = BasicNewsRecipe.get_article_url(self, article)
-        if '/0Ltimesofindia' in url:
-            url = url.partition('/0L')[-1]
-            url = url.replace('0B', '.').replace('0N', '.com').replace('0C',
-                    '/').replace('0E', '-')
-            url = 'http://' + url.rpartition('/')[0]
-            match = re.search(r'/([0-9a-zA-Z]+?)\.cms', url)
-            if match is not None:
-                num = match.group(1)
-                num = re.sub(r'[^0-9]', '', num)
-                return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
-                    num)
-        else:
-            cms = re.search(r'/(\d+)\.cms', url)
-            if cms is not None:
-                return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
-                    cms.group(1))
+        try:
+            s = article.summary
+            return urllib.unquote(
+                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
+        except:
+            pass
+        link = article.get('link', None)
+        if link and link.split('/')[-1]=="story01.htm":
+            link=link.split('/')[-2]
+            encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
+                    '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://'}
+            for k, v in encoding.iteritems():
+                link = link.replace(k, v)
+            return link

-        return url
+    def print_version(self, url):
+        return url + '?prtpage=1'

+    def preprocess_html(self, soup, *args):
+        byl = soup.find(attrs={'class':'byline'})
+        if byl is not None:
+            for l in byl.findAll('label'):
+                l.extract()
+        return soup
--- a/recipes/tuttojove.recipe
+++ b/recipes/tuttojove.recipe
@ -0,0 +1,17 @@
+__license__   = 'GPL v3'
+__author__    = 'faber1971'
+description   = 'Italian website on Juventus F.C. - v1.00 (17, December 2011)'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1305984536(BasicNewsRecipe):
+    title          = u'tuttojuve'
+    description = 'Juventus'
+    language = 'it'
+    __author__ = 'faber1971'
+    oldest_article = 1
+    max_articles_per_feed = 100
+
+    feeds          = [(u'notizie', u'http://feeds.tuttojuve.com/rss/'), (u'da vinovo', u'http://feeds.tuttojuve.com/rss/?c=10'), (u'primo piano', u'http://feeds.tuttojuve.com/rss/?c=16'), (u'editoriale', u'http://feeds.tuttojuve.com/rss/?c=3'), (u'il punto', u'http://feeds.tuttojuve.com/rss/?c=8'), (u'pagelle', u'http://feeds.tuttojuve.com/rss/?c=9'), (u'avversario', u'http://feeds.tuttojuve.com/rss/?c=11')]
+    def print_version(self, url):
+        return self.browser.open_novisit(url).geturl()
--- a/setup/iso_639/es.po
+++ b/setup/iso_639/es.po
@ -8,14 +8,14 @@ msgstr ""
 "Project-Id-Version: calibre\n"
 "Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2011-11-26 08:48+0000\n"
+"PO-Revision-Date: 2011-12-17 09:29+0000\n"
 "Last-Translator: Jellby <Unknown>\n"
 "Language-Team: Spanish <es@li.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-11-27 05:24+0000\n"
-"X-Generator: Launchpad (build 14381)\n"
+"X-Launchpad-Export-Date: 2011-12-18 04:37+0000\n"
+"X-Generator: Launchpad (build 14525)\n"

 #. name for aaa
 msgid "Ghotuo"
@ -4963,7 +4963,7 @@ msgstr "Catawba"

 #. name for chd
 msgid "Chontal; Highland Oaxaca"
-msgstr ""
+msgstr "Chontal oaxaqueño de las tierras altas"

 #. name for che
 msgid "Chechen"
@ -4971,7 +4971,7 @@ msgstr "Checheno"

 #. name for chf
 msgid "Chontal; Tabasco"
-msgstr ""
+msgstr "Chontal de tabasco"

 #. name for chg
 msgid "Chagatai"
@ -4983,7 +4983,7 @@ msgstr "Chinook"

 #. name for chj
 msgid "Chinantec; Ojitlán"
-msgstr ""
+msgstr "Chinanteco de Ojitlán"

 #. name for chk
 msgid "Chuukese"
@ -5011,7 +5011,7 @@ msgstr "Chipewyan"

 #. name for chq
 msgid "Chinantec; Quiotepec"
-msgstr ""
+msgstr "Chinanteco de Quiotepec"

 #. name for chr
 msgid "Cherokee"
@ -5043,7 +5043,7 @@ msgstr "Cheyenne"

 #. name for chz
 msgid "Chinantec; Ozumacín"
-msgstr ""
+msgstr "Chinanteco de Ozumacín"

 #. name for cia
 msgid "Cia-Cia"
@ -5051,7 +5051,7 @@ msgstr "Cia-cia"

 #. name for cib
 msgid "Gbe; Ci"
-msgstr ""
+msgstr "Cigbe"

 #. name for cic
 msgid "Chickasaw"
@ -5215,7 +5215,7 @@ msgstr "Arameo moderno caldeo"

 #. name for cle
 msgid "Chinantec; Lealao"
-msgstr ""
+msgstr "Chinanteco de Lealao"

 #. name for clh
 msgid "Chilisso"
@ -5239,7 +5239,7 @@ msgstr "Clallam"

 #. name for clo
 msgid "Chontal; Lowland Oaxaca"
-msgstr ""
+msgstr "Chontal oaxaqueño de las tieras bajas"

 #. name for clu
 msgid "Caluyanun"
@ -5251,7 +5251,7 @@ msgstr "Chulym"

 #. name for cly
 msgid "Chatino; Eastern Highland"
-msgstr ""
+msgstr "Chatino Lachao-Yolotepec"

 #. name for cma
 msgid "Maa"
@ -5327,7 +5327,7 @@ msgstr ""

 #. name for cnl
 msgid "Chinantec; Lalana"
-msgstr ""
+msgstr "Chinanteco de Lalana"

 #. name for cno
 msgid "Con"
@ -5339,7 +5339,7 @@ msgstr "Asmat central"

 #. name for cnt
 msgid "Chinantec; Tepetotutla"
-msgstr ""
+msgstr "Chinanteco de Tepetotutla"

 #. name for cnu
 msgid "Chenoua"
@ -5355,7 +5355,7 @@ msgstr "Córnico medio"

 #. name for coa
 msgid "Malay; Cocos Islands"
-msgstr ""
+msgstr "Malayo de las Islas Cocos"

 #. name for cob
 msgid "Chicomuceltec"
@ -5391,7 +5391,7 @@ msgstr "Cochimi"

 #. name for cok
 msgid "Cora; Santa Teresa"
-msgstr ""
+msgstr "Cora de Santa Teresa"

 #. name for col
 msgid "Columbia-Wenatchi"
@ -5455,7 +5455,7 @@ msgstr "Chocho"

 #. name for cpa
 msgid "Chinantec; Palantla"
-msgstr ""
+msgstr "Chinanteco de Palantla"

 #. name for cpb
 msgid "Ashéninka; Ucayali-Yurúa"
@ -5599,7 +5599,7 @@ msgstr "Cruzeño"

 #. name for csa
 msgid "Chinantec; Chiltepec"
-msgstr ""
+msgstr "Chinanteco de Chiltepec"

 #. name for csb
 msgid "Kashubian"
@ -5651,7 +5651,7 @@ msgstr "Lengua de signos colombiana"

 #. name for cso
 msgid "Chinantec; Sochiapan"
-msgstr ""
+msgstr "Chinanteco de Sochiapan"

 #. name for csq
 msgid "Croatia Sign Language"
@ -5683,7 +5683,7 @@ msgstr "Coos"

 #. name for cta
 msgid "Chatino; Tataltepec"
-msgstr ""
+msgstr "Chatino de Tataltepec"

 #. name for ctc
 msgid "Chetco"
@ -5695,7 +5695,7 @@ msgstr ""

 #. name for cte
 msgid "Chinantec; Tepinapa"
-msgstr ""
+msgstr "Chinanteco de Tepinapa"

 #. name for ctg
 msgid "Chittagonian"
@ -5703,7 +5703,7 @@ msgstr "Chitagoniano"

 #. name for ctl
 msgid "Chinantec; Tlacoatzintepec"
-msgstr ""
+msgstr "Chinanteco de Tlacoatzintepec"

 #. name for ctm
 msgid "Chitimacha"
@ -5719,7 +5719,7 @@ msgstr "Emberá-catío"

 #. name for ctp
 msgid "Chatino; Western Highland"
-msgstr ""
+msgstr "Chatino"

 #. name for cts
 msgid "Bicolano; Northern Catanduanes"
@ -5735,7 +5735,7 @@ msgstr "Chol"

 #. name for ctz
 msgid "Chatino; Zacatepec"
-msgstr ""
+msgstr "Chatino de Zacatepec"

 #. name for cua
 msgid "Cua"
@ -5747,7 +5747,7 @@ msgstr "Cubeo"

 #. name for cuc
 msgid "Chinantec; Usila"
-msgstr ""
+msgstr "Chinanteco de Usila"

 #. name for cug
 msgid "Cung"
@ -5819,7 +5819,7 @@ msgstr "Chug"

 #. name for cvn
 msgid "Chinantec; Valle Nacional"
-msgstr ""
+msgstr "Chinanteco de Valle Nacional"

 #. name for cwa
 msgid "Kabwa"
@ -5847,7 +5847,7 @@ msgstr "Kuwaataay"

 #. name for cya
 msgid "Chatino; Nopala"
-msgstr ""
+msgstr "Chatino de Nopala"

 #. name for cyb
 msgid "Cayubaba"
@ -5871,7 +5871,7 @@ msgstr "Knaanic"

 #. name for czn
 msgid "Chatino; Zenzontepec"
-msgstr ""
+msgstr "Chatino de Zenzontepec"

 #. name for czo
 msgid "Chinese; Min Zhong"
@ -10603,7 +10603,7 @@ msgstr "Javanés"

 #. name for jax
 msgid "Malay; Jambi"
-msgstr ""
+msgstr "Malayo de Jambi"

 #. name for jay
 msgid "Yan-nhangu"
@ -14743,7 +14743,7 @@ msgstr "Lorung meridional"

 #. name for lrt
 msgid "Malay; Larantuka"
-msgstr ""
+msgstr "Malayo de Larantuka"

 #. name for lrv
 msgid "Larevat"
@ -15099,7 +15099,7 @@ msgstr ""

 #. name for max
 msgid "Malay; North Moluccan"
-msgstr ""
+msgstr "Malayo de las Molucas septentrional"

 #. name for maz
 msgid "Mazahua; Central"
@ -15127,7 +15127,7 @@ msgstr ""

 #. name for mbf
 msgid "Malay; Baba"
-msgstr ""
+msgstr "Malayo baba"

 #. name for mbh
 msgid "Mangseng"
@ -15467,7 +15467,7 @@ msgstr "Mende (Sierra Leona)"

 #. name for meo
 msgid "Malay; Kedah"
-msgstr ""
+msgstr "Malayo de Kedah"

 #. name for mep
 msgid "Miriwung"
@ -15511,7 +15511,7 @@ msgstr ""

 #. name for mfa
 msgid "Malay; Pattani"
-msgstr ""
+msgstr "Malayo de Pattani"

 #. name for mfb
 msgid "Bangka"
@ -15571,7 +15571,7 @@ msgstr ""

 #. name for mfp
 msgid "Malay; Makassar"
-msgstr ""
+msgstr "Malayo de Macasar"

 #. name for mfq
 msgid "Moba"
@ -16059,7 +16059,7 @@ msgstr ""

 #. name for mkn
 msgid "Malay; Kupang"
-msgstr ""
+msgstr "Malayo de Kupang"

 #. name for mko
 msgid "Mingang Doso"
@ -16207,7 +16207,7 @@ msgstr ""

 #. name for mlz
 msgid "Malaynon"
-msgstr ""
+msgstr "Malaynón"

 #. name for mma
 msgid "Mama"
@ -16623,7 +16623,7 @@ msgstr ""

 #. name for mqg
 msgid "Malay; Kota Bangun Kutai"
-msgstr ""
+msgstr "Malayo kutai de Kota Bangun"

 #. name for mqh
 msgid "Mixtec; Tlazoyaltepec"
@ -16839,7 +16839,7 @@ msgstr "Malgache masikoro"

 #. name for msi
 msgid "Malay; Sabah"
-msgstr ""
+msgstr "Malayo de Sabah"

 #. name for msj
 msgid "Ma (Democratic Republic of Congo)"
@ -22607,7 +22607,7 @@ msgstr ""

 #. name for sci
 msgid "Creole Malay; Sri Lankan"
-msgstr ""
+msgstr "Malo criollo de Sri Lanka"

 #. name for sck
 msgid "Sadri"
@ -27187,7 +27187,7 @@ msgstr ""

 #. name for vkt
 msgid "Malay; Tenggarong Kutai"
-msgstr ""
+msgstr "Malayo kutai de Tenggarong"

 #. name for vku
 msgid "Kurrama"
@ -28395,7 +28395,7 @@ msgstr "Edomita"

 #. name for xdy
 msgid "Dayak; Malayic"
-msgstr ""
+msgstr "Dayak malayo"

 #. name for xeb
 msgid "Eblan"
@ -28727,7 +28727,7 @@ msgstr "Lengua de signos malasia"

 #. name for xmm
 msgid "Malay; Manado"
-msgstr ""
+msgstr "Malayo de Manado"

 #. name for xmn
 msgid "Persian; Manichaean Middle"
--- a/setup/translations.py
+++ b/setup/translations.py
@ -233,7 +233,7 @@ class GetTranslations(Translations): # {{{

        if self.modified_translations:
            subprocess.check_call(['bzr', 'commit', '-m',
-                'IGN:Updated translations', self.PATH])
+                'IGN:Updated translations'])
        else:
            print('No updated translations available')

--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -527,7 +527,7 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252',
    ent = match.group(1)
    if ent in exceptions:
        return '&'+ent+';'
-    if ent == 'apos':
+    if ent in {'apos', 'squot'}: # squot is generated by some broken CMS software
        return check("'")
    if ent == 'hellips':
        ent = 'hellip'
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 8, 31)
+numeric_version = (0, 8, 32)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -48,7 +48,8 @@ class ANDROID(USBMS):
                       0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
                       0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216],
                       0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
-                       0x70c6 : [0x226]
+                       0x70c6 : [0x226],
+                       0x4316 : [0x216],
                     },
            # Freescale
            0x15a2 : {
@ -87,6 +88,7 @@ class ANDROID(USBMS):
                       0x6877 : [0x0400],
                       0x689e : [0x0400],
                       0xdeed : [0x0222],
+                       0x1234 : [0x0400],
                     },

            # Viewsonic/Vizio
@ -170,13 +172,14 @@ class ANDROID(USBMS):
            'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
            'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
            'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
-            'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107']
+            'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
+            'GT-I9003_CARD', 'XT912']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
            '__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL',
            'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
-            'A1-07___C0541A4F']
+            'A1-07___C0541A4F', 'XT912']

    OSX_MAIN_MEM = 'Android Device Main Memory'

--- a/src/calibre/devices/hanvon/driver.py
+++ b/src/calibre/devices/hanvon/driver.py
@ -170,8 +170,8 @@ class ODYSSEY(N516):
    description    = _('Communicate with the Cybook Odyssey eBook reader.')

    BCD = [0x316]
-    VENDOR_NAME      = 'LINUX'
-    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
+    VENDOR_NAME      = ['LINUX', 'BOOKEEN']
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['FILE-STOR_GADGET', 'FLASH_DISK']

    FORMATS     = ['epub', 'fb2', 'html', 'pdf', 'txt']

--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -266,12 +266,14 @@ class PRST1(USBMS):
        collections = booklist.get_collections(collections_attributes)

        with closing(sqlite.connect(dbpath)) as connection:
-            self.update_device_books(connection, booklist, source_id, plugboard)
+            self.update_device_books(connection, booklist, source_id,
+                    plugboard, dbpath)
            self.update_device_collections(connection, booklist, collections, source_id)

        debug_print('PRST1: finished update_device_database')

-    def update_device_books(self, connection, booklist, source_id, plugboard):
+    def update_device_books(self, connection, booklist, source_id, plugboard,
+            dbpath):
        opts = self.settings()
        upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
        refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
@ -284,12 +286,12 @@ class PRST1(USBMS):
            query = 'SELECT file_path, _id FROM books'
            cursor.execute(query)
        except DatabaseError:
-            raise DeviceError('The SONY database is corrupted. '
+            raise DeviceError(('The SONY database is corrupted. '
                    ' Delete the file %s on your reader and then disconnect '
                    ' reconnect it. If you are using an SD card, you '
                    ' should delete the file on the card as well. Note that '
-                    ' deleting this file may cause your reader to forget '
-                    ' any notes/highlights, etc.')
+                    ' deleting this file will cause your reader to forget '
+                    ' any notes/highlights, etc.')%dbpath)

        db_books = {}
        for i, row in enumerate(cursor):
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -276,11 +276,11 @@ OptionRecommendation(name='duplicate_links_in_toc',

 OptionRecommendation(name='chapter',
        recommended_value="//*[((name()='h1' or name()='h2') and "
-              r"re:test(., 'chapter|book|section|part|prologue|epilogue\s+', 'i')) or @class "
+              r"re:test(., '\s*((chapter|book|section|part)\s+)|((prolog|prologue|epilogue)(\s+|$))', 'i')) or @class "
              "= 'chapter']", level=OptionRecommendation.LOW,
            help=_('An XPath expression to detect chapter titles. The default '
                'is to consider <h1> or <h2> tags that contain the words '
-                '"chapter","book","section" or "part" as chapter titles as '
+                '"chapter","book","section", "prologue", "epilogue", or "part" as chapter titles as '
                'well as any tags that have class="chapter". The expression '
                'used must evaluate to a list of elements. To disable chapter '
                'detection, use the expression "/". See the XPath Tutorial '
--- a/src/calibre/ebooks/epub/init.py
+++ b/src/calibre/ebooks/epub/init.py
@ -35,7 +35,7 @@ def initialize_container(path_to_container, opf_name='metadata.opf',
    '''.format(opf_name, extra_entries=rootfiles).encode('utf-8')
    zf = ZipFile(path_to_container, 'w')
    zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
-    zf.writestr('META-INF/', '', 0700)
+    zf.writestr('META-INF/', '', 0755)
    zf.writestr('META-INF/container.xml', CONTAINER)
    for path, _, data in extra_entries:
        zf.writestr(path, data)
--- a/src/calibre/ebooks/mobi/debug.py
+++ b/src/calibre/ebooks/mobi/debug.py
@ -1136,7 +1136,8 @@ class BinaryRecord(object): # {{{
        self.raw = record.raw
        sig = self.raw[:4]
        name = '%06d'%idx
-        if sig in (b'FCIS', b'FLIS', b'SRCS', b'DATP'):
+        if sig in {b'FCIS', b'FLIS', b'SRCS', b'DATP', b'RESC', b'BOUN',
+                b'FDST', b'AUDI', b'VIDE',}:
            name += '-' + sig.decode('ascii')
        elif sig == b'\xe9\x8e\r\n':
            name += '-' + 'EOF'
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -325,6 +325,7 @@ class MobiMLizer(object):
                elem.text = None
                elem.set('id', id_)
                elem.tail = tail
+                elem.tag = XHTML('a')
            else:
                return
        tag = barename(elem.tag)
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -502,6 +502,7 @@ class MobiReader(object):
        self.processed_html = self.processed_html.replace('> <', '>\n<')
        self.processed_html = self.processed_html.replace('<mbp: ', '<mbp:')
        self.processed_html = re.sub(r'<\?xml[^>]*>', '', self.processed_html)
+        self.processed_html = re.sub(r'<(/?)o:p', r'<\1p', self.processed_html)
        # Swap inline and block level elements, and order block level elements according to priority
        # - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
        self.processed_html = re.sub(r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html)
@ -974,7 +975,7 @@ class MobiReader(object):
            processed_records.append(i)
            data  = self.sections[i][0]
            if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
-                    b'RESC', b'BOUN', b'FDST', b'DATP'}:
+                    b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
                # A FLIS, FCIS, SRCS or EOF record, ignore
                continue
            buf = cStringIO.StringIO(data)
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -942,7 +942,13 @@ class Manifest(object):
            if isinstance(data, etree._Element):
                ans = xml2str(data, pretty_print=self.oeb.pretty_print)
                if self.media_type in OEB_DOCS:
-                    ans = re.sub(r'<(div|a|span)([^>]*)/>', r'<\1\2></\1>', ans)
+                    # Convert self closing div|span|a tags to normally closed
+                    # ones, as they are interpreted incorrectly by some browser
+                    # based renderers
+                    ans = re.sub(
+                        # tag name followed by either a space or a /
+                        r'<(?P<tag>div|a|span)(?=[\s/])(?P<arg>[^>]*)/>',
+                        r'<\g<tag>\g<arg>></\g<tag>>', ans)
                return ans
            if isinstance(data, unicode):
                return data.encode('utf-8')
--- a/src/calibre/ebooks/oeb/display/cfi.coffee
+++ b/src/calibre/ebooks/oeb/display/cfi.coffee
@ -4,6 +4,7 @@
 ###
 Copyright 2011, Kovid Goyal <kovid@kovidgoyal.net>
 Released under the GPLv3 License
+ Based on code originally written by Peter Sorotkin (epubcfi.js)
 ###
 #
 log = (error) ->
@ -159,6 +160,63 @@ class CanonicalFragmentIdentifier

        point = {}
        error = null
+        offset = null
+
+        if (r = cfi.match(/^:(\d+)/)) != null
+            # Character offset
+            offset = parseInt(r[1])
+            cfi = cfi.substr(r[0].length)
+
+        if (r = cfi.match(/^~(-?\d+(\.\d+)?)/)) != null
+            # Temporal offset
+            point.time = r[1] - 0 # Coerce to number
+            cfi = cfi.substr(r[0].length)
+
+        if (r = cfi.match(/^@(-?\d+(\.\d+)?),(-?\d+(\.\d+)?)/)) != null
+            # Spatial offset
+            point.x = r[1] - 0 # Coerce to number
+            point.y = r[3] - 0 # Coerce to number
+            cfi = cfi.substr(r[0].length)
+
+        if( (r = cfi.match(/^\[([^\]]+)\]/)) != null )
+            assertion = r[1]
+            cfi = cfi.substr(r[0].length)
+            if (r = assertion.match(/;s=([ab])$/)) != null
+                if r.index > 0 and assertion[r.index - 1] != '^'
+                    assertion = assertion.substr(0, r.index)
+                    point.forward = (r[1] == 'a')
+                assertion = unescape_from_cfi(assertion)
+                # TODO: Handle text assertion
+
+        # Find the text node that contains the offset
+        node?.parentNode?.normalize()
+        if offset != null
+            while true
+                len = node.nodeValue.length
+                if offset < len or (not point.forward and offset == len)
+                    break
+                next = false
+                while true
+                    nn = node.nextSibling
+                    if nn.nodeType in [3, 4, 5, 6] # Text node, entity, cdata
+                        next = nn
+                        break
+                if not next
+                    if offset > len
+                        error = "Offset out of range: #{ offset }"
+                        offset = len
+                    break
+                node = next
+                offset -= len
+            point.offset = offset
+
+        point.node = node
+        if error
+            point.error = error
+        else if cfi.length > 0
+            point.error = "Undecoded CFI: #{ cfi }"
+
+        log(point.error)

        point

@ -192,7 +250,7 @@ class CanonicalFragmentIdentifier
            cdoc = cd
            cwin = cdoc.defaultView

-        target.normalize()
+        (if target.parentNode then target.parentNode else target).normalize()

        if name in ['audio', 'video']
            tail = "~" + fstr target.currentTime
@ -214,6 +272,67 @@ class CanonicalFragmentIdentifier
        this.encode(doc, target, offset, tail)
    # }}}

+    point: (cfi, doc=window?.document) -> # {{{
+        r = this.decode(cfi, doc)
+        if not r
+            return null
+        node = r.node
+        ndoc = node.ownerDocument
+        if not ndoc
+            log("CFI node has no owner document: #{ cfi } #{ node }")
+            return null
+
+        nwin = ndoc.defaultView
+        x = null
+        y = null
+
+        if typeof(r.offset) == "number"
+            # Character offset
+            range = ndoc.createRange()
+            if r.forward
+                try_list = [{start:0, end:0, a:0.5}, {start:0, end:1, a:1}, {start:-1, end:0, a:0}]
+            else
+                try_list = [{start:0, end:0, a:0.5}, {start:-1, end:0, a:0}, {start:0, end:1, a:1}]
+            k = 0
+            a = null
+            rects = null
+            node_len = node.nodeValue.length
+            until rects or rects.length or k >= try_list.length
+                t = try_list[k++]
+                start_offset = r.offset + t.start
+                end_offset = r.offset + t.end
+                a = t.a
+                if start_offset < 0 or end_offset >= node_len
+                    continue
+                range.setStart(node, start_offset)
+                range.setEnd(node, end_offset)
+                rects = range.getClientRects()
+
+            if not rects or not rects.length
+                log("Could not find caret position: rects: #{ rects } offset: #{ r.offset }")
+                return null
+
+            rect = rects[0]
+            x = (a*rect.left + (1-a)*rect.right)
+            y = (rect.top + rect.bottom)/2
+        else
+            x = node.offsetLeft - nwin.scrollX
+            y = node.offsetTop - nwin.scrollY
+            if typeof(r.x) == "number" and node.offsetWidth
+                x += (r.x*node.offsetWidth)/100
+                y += (r.y*node.offsetHeight)/100
+
+        until ndoc == doc
+            node = nwin.frameElement
+            ndoc = node.ownerDocument
+            nwin = ndoc.defaultView
+            x += node.offsetLeft - nwin.scrollX
+            y += node.offsetTop - nwin.scrollY
+
+        {x:x, y:y, node:r.node, time:r.time}
+
+    # }}}
+
 if window?
    window.cfi = new CanonicalFragmentIdentifier()
 else if process?
--- a/src/calibre/ebooks/oeb/display/test/test.html
+++ b/src/calibre/ebooks/oeb/display/test/test.html
@ -2,9 +2,9 @@
 <html>
    <head>
        <title>Testing CFI functionality</title>
-        <script type="text/javascript" src="cfi.js"></script>
+        <script type="text/javascript" src="../cfi.coffee"></script>
        <script type="text/javascript" src="jquery.js"></script>
-        <script type="text/javascript" src="cfi-test.js"></script>
+        <script type="text/javascript" src="cfi-test.coffee"></script>
    </head>
    <body>
        <h1 id="first-h1" style="border: solid 1px red">Testing CFI functionality</h1>
--- a/src/calibre/ebooks/oeb/display/test/test.py
+++ b/src/calibre/ebooks/oeb/display/test/test.py
@ -18,8 +18,8 @@ except ImportError:


 def run_devel_server():
-    os.chdir(os.path.dirname(__file__))
-    serve(['../cfi.coffee', 'cfi-test.coffee'])
+    os.chdir(os.path.dirname(os.path.abspath(__file__)))
+    serve()

 if __name__ == '__main__':
    run_devel_server()
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@ -17,6 +17,7 @@ from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations

 RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True)
 XHTML_NS     = 'http://www.w3.org/1999/xhtml'
+XMLNS_NS     = 'http://www.w3.org/2000/xmlns/'

 class NotHTML(Exception):

@ -28,9 +29,7 @@ def barename(name):
    return name.rpartition('}')[-1]

 def namespace(name):
-    if '}' in name:
-        return name.split('}', 1)[0][1:]
-    return ''
+    return name.rpartition('}')[0][1:]

 def XHTML(name):
    return '{%s}%s' % (XHTML_NS, name)
@ -60,26 +59,86 @@ def merge_multiple_html_heads_and_bodies(root, log=None):
        log.warn('Merging multiple <head> and <body> sections')
    return root

-def _html5_parse(data):
+def clone_element(elem, nsmap={}, in_context=True):
+    if in_context:
+        maker = elem.getroottree().getroot().makeelement
+    else:
+        maker = etree.Element
+    nelem = maker(elem.tag, attrib=elem.attrib,
+            nsmap=nsmap)
+    nelem.text, nelem.tail = elem.text, elem.tail
+    nelem.extend(elem)
+    return nelem
+
+def html5_parse(data):
    import html5lib
    data = html5lib.parse(data, treebuilder='lxml').getroot()
-    html_ns = [ns for ns, val in data.nsmap.iteritems() if (val == XHTML_NS and
-            ns is not None)]
-    if html_ns:
-        # html5lib causes the XHTML namespace to not
-        # be set as the default namespace
-        nsmap = dict(data.nsmap)
-        nsmap[None] = XHTML_NS
-        for x in html_ns:
-            nsmap.pop(x)
-        nroot = etree.Element(data.tag, nsmap=nsmap,
-                attrib=dict(data.attrib))
-        nroot.text = data.text
-        nroot.tail = data.tail
-        for child in data:
-            nroot.append(child)
-        data = nroot
-    return data
+    # Set lang correctly
+    xl = data.attrib.pop('xmlU0003Alang', None)
+    if xl is not None and 'lang' not in data.attrib:
+        data.attrib['lang'] = xl
+
+    # html5lib has the most inelegant handling of namespaces I have ever seen
+    # Try to reconstitute destroyed namespace info
+    xmlns_declaration = '{%s}'%XMLNS_NS
+    non_html5_namespaces = {}
+    seen_namespaces = set()
+    for elem in tuple(data.iter()):
+        elem.attrib.pop('xmlns', None)
+        namespaces = {}
+        for x in tuple(elem.attrib):
+            if x.startswith('xmlnsU') or x.startswith(xmlns_declaration):
+                # A namespace declaration
+                val = elem.attrib.pop(x)
+                if x.startswith('xmlnsU0003A'):
+                    prefix = x[11:]
+                    namespaces[prefix] = val
+
+        if namespaces:
+            # Some destroyed namespace declarations were found
+            p = elem.getparent()
+            if p is None:
+                # We handle the root node later
+                non_html5_namespaces = namespaces
+            else:
+                idx = p.index(elem)
+                p.remove(elem)
+                elem = clone_element(elem, nsmap=namespaces)
+                p.insert(idx, elem)
+
+        b = barename(elem.tag)
+        idx = b.find('U0003A')
+        if idx > -1:
+            prefix, tag = b[:idx], b[idx+6:]
+            ns = elem.nsmap.get(prefix, None)
+            if ns is None:
+                ns = non_html5_namespaces.get(prefix, None)
+            if ns is not None:
+                elem.tag = '{%s}%s'%(ns, tag)
+
+        for b in tuple(elem.attrib):
+            idx = b.find('U0003A')
+            if idx > -1:
+                prefix, tag = b[:idx], b[idx+6:]
+                ns = elem.nsmap.get(prefix, None)
+                if ns is None:
+                    ns = non_html5_namespaces.get(prefix, None)
+                if ns is not None:
+                    elem.attrib['{%s}%s'%(ns, tag)] = elem.attrib.pop(b)
+
+        seen_namespaces |= set(elem.nsmap.itervalues())
+
+    nsmap = dict(html5lib.constants.namespaces)
+    nsmap[None] = nsmap.pop('html')
+    non_html5_namespaces.update(nsmap)
+    nsmap = non_html5_namespaces
+
+    data = clone_element(data, nsmap=nsmap, in_context=False)
+
+    # Remove unused namespace declarations
+    fnsmap = {k:v for k,v in nsmap.iteritems() if v in seen_namespaces and v !=
+            XMLNS_NS}
+    return clone_element(data, nsmap=fnsmap, in_context=False)

 def _html4_parse(data, prefer_soup=False):
    if prefer_soup:
@ -177,7 +236,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
        except etree.XMLSyntaxError:
            log.debug('Parsing %s as HTML' % filename)
            try:
-                data = _html5_parse(data)
+                data = html5_parse(data)
            except:
                log.exception(
                    'HTML 5 parsing failed, falling back to older parsers')
@ -261,6 +320,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
            nroot.append(elem)
        data = nroot

+
    data = merge_multiple_html_heads_and_bodies(data, log)
    # Ensure has a <head/>
    head = xpath(data, '/h:html/h:head')
--- a/src/calibre/ebooks/oeb/transforms/filenames.py
+++ b/src/calibre/ebooks/oeb/transforms/filenames.py
@ -159,15 +159,18 @@ class FlatFilenames(object): # {{{
                continue

            data = item.data
+            isp = item.spine_position
            nhref = oeb.manifest.generate(href=nhref)[1]
+            if isp is not None:
+                oeb.spine.remove(item)
+            oeb.manifest.remove(item)
+
            nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data,
                                     fallback=item.fallback)
            self.rename_map[item.href] = nhref
            self.renamed_items_map[nhref] = item
-            if item.spine_position is not None:
-                oeb.spine.insert(item.spine_position, nitem, item.linear)
-                oeb.spine.remove(item)
-            oeb.manifest.remove(item)
+            if isp is not None:
+                oeb.spine.insert(isp, nitem, item.linear)

        if self.rename_map:
            self.log('Found non-flat filenames, renaming to support broken'
--- a/src/calibre/ebooks/oeb/transforms/unsmarten.py
+++ b/src/calibre/ebooks/oeb/transforms/unsmarten.py
@ -16,7 +16,7 @@ class UnsmartenPunctuation(object):

    def unsmarten(self, root):
        for x in self.html_tags(root):
-            if not barename(x) == 'pre':
+            if not barename(x.tag) == 'pre':
                if getattr(x, 'text', None):
                    x.text = unsmarten_text(x.text)
                if getattr(x, 'tail', None) and x.tail:
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -273,11 +273,34 @@ def error_dialog(parent, title, msg, det_msg='', show=False,
    return d

 def question_dialog(parent, title, msg, det_msg='', show_copy_button=False,
-        default_yes=True):
+        default_yes=True,
+        # Skippable dialogs
+        # Set skip_dialog_name to a unique name for this dialog
+        # Set skip_dialog_msg to a message displayed to the user
+        skip_dialog_name=None, skip_dialog_msg=_('Show this confirmation again'),
+        skip_dialog_skipped_value=True, skip_dialog_skip_precheck=True):
    from calibre.gui2.dialogs.message_box import MessageBox
+
+    auto_skip = set(gprefs.get('questions_to_auto_skip', []))
+    if (skip_dialog_name is not None and skip_dialog_name in auto_skip):
+        return bool(skip_dialog_skipped_value)
+
    d = MessageBox(MessageBox.QUESTION, title, msg, det_msg, parent=parent,
                    show_copy_button=show_copy_button, default_yes=default_yes)
-    return d.exec_() == d.Accepted
+
+    if skip_dialog_name is not None and skip_dialog_msg:
+        tc = d.toggle_checkbox
+        tc.setVisible(True)
+        tc.setText(skip_dialog_msg)
+        tc.setChecked(bool(skip_dialog_skip_precheck))
+
+    ret = d.exec_() == d.Accepted
+
+    if skip_dialog_name is not None and not d.toggle_checkbox.isChecked():
+        auto_skip.add(skip_dialog_name)
+        gprefs.set('questions_to_auto_skip', list(auto_skip))
+
+    return ret

 def info_dialog(parent, title, msg, det_msg='', show=False,
        show_copy_button=True):
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -683,7 +683,7 @@ class DeviceMixin(object): # {{{
        return self.ask_a_yes_no_question(
                _('No suitable formats'), msg,
                ans_when_user_unavailable=True,
-                det_msg=autos
+                det_msg=autos, skip_dialog_name='auto_convert_before_send'
        )

    def set_default_thumbnail(self, height):
--- a/src/calibre/gui2/dialogs/add_from_isbn.py
+++ b/src/calibre/gui2/dialogs/add_from_isbn.py
@ -12,7 +12,7 @@ from PyQt4.Qt import QDialog, QApplication
 from calibre.gui2.dialogs.add_from_isbn_ui import Ui_Dialog
 from calibre.ebooks.metadata import check_isbn
 from calibre.constants import iswindows
-from calibre.gui2 import gprefs
+from calibre.gui2 import gprefs, question_dialog, error_dialog

 class AddFromISBN(QDialog, Ui_Dialog):

@ -44,6 +44,7 @@ class AddFromISBN(QDialog, Ui_Dialog):
        tags = list(filter(None, [x.strip() for x in tags]))
        gprefs['add from ISBN tags'] = tags
        self.set_tags = tags
+        bad = set()
        for line in unicode(self.isbn_box.toPlainText()).strip().splitlines():
            line = line.strip()
            if not line:
@ -64,5 +65,19 @@ class AddFromISBN(QDialog, Ui_Dialog):
                        os.access(parts[1], os.R_OK) and os.path.isfile(parts[1]):
                        book['path'] = parts[1]
                    self.books.append(book)
+            else:
+                bad.add(parts[0])
+        if bad:
+            if self.books:
+                if not question_dialog(self, _('Some invalid ISBNs'),
+                    _('Some of the ISBNs you entered were invalid. They will'
+                        ' be ignored. Click Show Details to see which ones.'
+                        ' Do you want to proceed?'), det_msg='\n'.join(bad),
+                    show_copy_button=True):
+                    return
+            else:
+                return error_dialog(self, _('All invalid ISBNs'),
+                        _('All the ISBNs you entered were invalid. No books'
+                            ' can be added.'), show=True)
        QDialog.accept(self, *args)

--- a/src/calibre/gui2/dialogs/message_box.py
+++ b/src/calibre/gui2/dialogs/message_box.py
@ -44,6 +44,7 @@ class MessageBox(QDialog, Ui_Dialog): # {{{
        self.msg.setText(msg)
        self.det_msg.setPlainText(det_msg)
        self.det_msg.setVisible(False)
+        self.toggle_checkbox.setVisible(False)

        if show_copy_button:
            self.ctc_button = self.bb.addButton(_('&Copy to clipboard'),
--- a/src/calibre/gui2/dialogs/message_box.ui
+++ b/src/calibre/gui2/dialogs/message_box.ui
@ -53,7 +53,7 @@
     </property>
    </widget>
   </item>
-   <item row="2" column="0" colspan="2">
+   <item row="3" column="0" colspan="2">
    <widget class="QDialogButtonBox" name="bb">
     <property name="orientation">
      <enum>Qt::Horizontal</enum>
@ -63,6 +63,13 @@
     </property>
    </widget>
   </item>
+   <item row="2" column="0" colspan="2">
+    <widget class="QCheckBox" name="toggle_checkbox">
+     <property name="text">
+      <string/>
+     </property>
+    </widget>
+   </item>
  </layout>
 </widget>
 <resources>
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -143,12 +143,12 @@ class GuiRunner(QObject):
        add_filesystem_book = partial(main.iactions['Add Books'].add_filesystem_book, allow_device=False)
        sys.excepthook = main.unhandled_exception
        if len(self.args) > 1:
-            p = os.path.abspath(self.args[1])
-            if os.path.isdir(p):
-                prints('Ignoring directory passed as command line argument:',
-                        self.args[1])
-            else:
-                add_filesystem_book(p)
+            files = [os.path.abspath(p) for p in self.args[1:] if not
+                    os.path.isdir(p)]
+            if len(files) < len(sys.argv[1:]):
+                prints('Ignoring directories passed as command line arguments')
+            if files:
+                add_filesystem_book(files)
        self.app.file_event_hook = add_filesystem_book
        self.main = main

--- a/src/calibre/gui2/preferences/behavior.py
+++ b/src/calibre/gui2/preferences/behavior.py
@ -162,6 +162,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        for key in dynamic.keys():
            if key.endswith('_again') and dynamic[key] is False:
                dynamic[key] = True
+        gprefs['questions_to_auto_skip'] = []
        info_dialog(self, _('Done'),
                _('Confirmation dialogs have all been reset'), show=True)

--- a/src/calibre/gui2/shortcuts.py
+++ b/src/calibre/gui2/shortcuts.py
@ -260,11 +260,11 @@ class ShortcutConfig(QWidget):
        self.view.setModel(model)
        self.delegate = Delegate()
        self.view.setItemDelegate(self.delegate)
-        self.delegate.sizeHintChanged.connect(self.scrollTo)
+        self.delegate.sizeHintChanged.connect(self.scrollTo,
+                type=Qt.QueuedConnection)

    def scrollTo(self, index):
-        self.view.scrollTo(index)
-
+        self.view.scrollTo(index, self.view.EnsureVisible)

    @property
    def is_editing(self):
--- a/src/calibre/gui2/store/stores/gandalf_plugin.py
+++ b/src/calibre/gui2/store/stores/gandalf_plugin.py
@ -54,12 +54,12 @@ class GandalfStore(BasicStoreConfig, StorePlugin):
                if not id:
                    continue

-                cover_url = ''.join(data.xpath('.//img/@src'))
+                cover_url = ''.join(data.xpath('.//div[@class="info"]/h3/a/@id'))
                title = ''.join(data.xpath('.//div[@class="info"]/h3/a/@title'))
-                formats = title.split()
-                formats = formats[-1]
+                formats = ''.join(data.xpath('.//div[@class="info"]/p[1]/text()'))
+                formats = re.findall(r'\((.*?)\)',formats)[0]
                author = ''.join(data.xpath('.//div[@class="info"]/h4/text() | .//div[@class="info"]/h4/span/text()'))
-                price = ''.join(data.xpath('.//h3[@class="promocja"]/text()'))
+                price = ''.join(data.xpath('.//div[@class="options"]/h3/text()'))
                price = re.sub('PLN', 'zł', price)
                price = re.sub('\.', ',', price)
                drm = data.xpath('boolean(.//div[@class="info" and contains(., "Zabezpieczenie: DRM")])')
@ -67,7 +67,7 @@ class GandalfStore(BasicStoreConfig, StorePlugin):
                counter -= 1

                s = SearchResult()
-                s.cover_url = cover_url
+                s.cover_url = 'http://imguser.gandalf.com.pl/' + re.sub('p', 'p_', cover_url) + '.jpg'
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -407,11 +407,14 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
        return getattr(self, '__systray_minimized', False)

    def ask_a_yes_no_question(self, title, msg, det_msg='',
-            show_copy_button=False, ans_when_user_unavailable=True):
+            show_copy_button=False, ans_when_user_unavailable=True,
+            skip_dialog_name=None, skipped_value=True):
        if self.is_minimized_to_tray:
            return ans_when_user_unavailable
        return question_dialog(self, title, msg, det_msg=det_msg,
-                show_copy_button=show_copy_button)
+                show_copy_button=show_copy_button,
+                skip_dialog_name=skip_dialog_name,
+                skip_dialog_skipped_value=skipped_value)

    def hide_windows(self):
        for window in QApplication.topLevelWidgets():
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -269,7 +269,7 @@ How do I use |app| with my Android phone/tablet?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 There are two ways that you can connect your Android device to calibre. Using a USB cable-- or wirelessly, over the air.
-**The USB cable method only works if your Android device can act as a USB disk, that means in windows it must have a drive letter, like K:**.
+**The USB cable method only works if your Android device can act as a USB disk, that means in windows it must have a drive letter, like K:**. Some devices may have a setting to put them in "disk mode" or "USB Transfer mode" that is needed before they act as USB disks.

 Using a USB cable
 ^^^^^^^^^^^^^^^^^^^^
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
@ -4,9 +4,9 @@
 #
 msgid ""
 msgstr ""
-"Project-Id-Version: calibre 0.8.31\n"
-"POT-Creation-Date: 2011-12-16 09:38+IST\n"
-"PO-Revision-Date: 2011-12-16 09:38+IST\n"
+"Project-Id-Version: calibre 0.8.32\n"
+"POT-Creation-Date: 2011-12-23 08:40+IST\n"
+"PO-Revision-Date: 2011-12-23 08:40+IST\n"
 "Last-Translator: Automatically generated\n"
 "Language-Team: LANGUAGE\n"
 "MIME-Version: 1.0\n"
@ -24,8 +24,8 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/db/cache.py:105
 #: /home/kovid/work/calibre/src/calibre/db/cache.py:108
 #: /home/kovid/work/calibre/src/calibre/db/cache.py:119
-#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:282
-#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:283
+#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:285
+#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:286
 #: /home/kovid/work/calibre/src/calibre/devices/hanvon/driver.py:99
 #: /home/kovid/work/calibre/src/calibre/devices/hanvon/driver.py:100
 #: /home/kovid/work/calibre/src/calibre/devices/jetbook/driver.py:74
@ -36,8 +36,8 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:71
 #: /home/kovid/work/calibre/src/calibre/devices/prs500/books.py:267
 #: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:660
-#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:328
-#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:329
+#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:330
+#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:331
 #: /home/kovid/work/calibre/src/calibre/devices/usbms/driver.py:485
 #: /home/kovid/work/calibre/src/calibre/ebooks/chm/input.py:106
 #: /home/kovid/work/calibre/src/calibre/ebooks/chm/input.py:109
@ -97,17 +97,17 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:85
 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:128
 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:169
-#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:748
-#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1005
-#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1007
-#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1009
+#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:749
+#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1006
+#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1008
+#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1010
 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/utils.py:299
 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/writer2/indexer.py:497
 #: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:145
 #: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:147
 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:818
-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:273
-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:277
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:333
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:337
 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:142
 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:149
 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/jacket.py:66
@ -137,8 +137,8 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/ebooks/pdf/writer.py:103
 #: /home/kovid/work/calibre/src/calibre/ebooks/rtf/input.py:320
 #: /home/kovid/work/calibre/src/calibre/ebooks/rtf/input.py:322
-#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:387
-#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:395
+#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:410
+#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:418
 #: /home/kovid/work/calibre/src/calibre/gui2/actions/add.py:157
 #: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:378
 #: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:381
@ -889,15 +889,15 @@ msgstr ""
 msgid "Communicate with Android phones."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:152
+#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:154
 msgid "Comma separated list of directories to send e-books to on the device. The first one that exists will be used"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:222
+#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:225
 msgid "Communicate with S60 phones."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:241
+#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:244
 msgid "Communicate with WebOS tablets."
 msgstr ""

@ -993,8 +993,8 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:102
 #: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:447
 #: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:470
-#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:547
-#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:566
+#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:549
+#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:568
 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1052
 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1058
 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1093
@ -2036,7 +2036,7 @@ msgid "When creating a TOC from links in the input document, allow duplicate ent
 msgstr ""

 #: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:281
-msgid "An XPath expression to detect chapter titles. The default is to consider <h1> or <h2> tags that contain the words \"chapter\",\"book\",\"section\" or \"part\" as chapter titles as well as any tags that have class=\"chapter\". The expression used must evaluate to a list of elements. To disable chapter detection, use the expression \"/\". See the XPath Tutorial in the calibre User Manual for further help on using this feature."
+msgid "An XPath expression to detect chapter titles. The default is to consider <h1> or <h2> tags that contain the words \"chapter\",\"book\",\"section\", \"prologue\", \"epilogue\", or \"part\" as chapter titles as well as any tags that have class=\"chapter\". The expression used must evaluate to a list of elements. To disable chapter detection, use the expression \"/\". See the XPath Tutorial in the calibre User Manual for further help on using this feature."
 msgstr ""

 #: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:295
@ -2934,7 +2934,7 @@ msgid ""
 msgstr ""

 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/opf2.py:1417
-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1238
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1244
 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:943
 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:41
 msgid "Cover"
@ -3085,70 +3085,70 @@ msgstr ""
 msgid "No details available"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1239
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1245
 msgid "Title Page"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1240
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1246
 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/htmltoc.py:15
 #: /home/kovid/work/calibre/src/calibre/gui2/viewer/main.py:56
 #: /home/kovid/work/calibre/src/calibre/gui2/viewer/main_ui.py:199
 msgid "Table of Contents"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1241
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1247
 msgid "Index"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1242
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1248
 msgid "Glossary"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1243
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1249
 msgid "Acknowledgements"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1244
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1250
 msgid "Bibliography"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1245
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1251
 msgid "Colophon"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1246
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1252
 msgid "Copyright"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1247
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1253
 msgid "Dedication"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1248
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1254
 msgid "Epigraph"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1249
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1255
 msgid "Foreword"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1250
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1256
 msgid "List of Illustrations"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1251
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1257
 msgid "List of Tables"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1252
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1258
 msgid "Notes"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1253
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1259
 msgid "Preface"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1254
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1260
 msgid "Main Text"
 msgstr ""

@ -3681,7 +3681,11 @@ msgstr ""
 msgid "tag browser categories not to display"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:506
+#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:280
+msgid "Show this confirmation again"
+msgstr ""
+
+#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:529
 msgid "Choose Files"
 msgstr ""

@ -4186,7 +4190,7 @@ msgid "Create a catalog of the books in your calibre library"
 msgstr ""

 #: /home/kovid/work/calibre/src/calibre/gui2/actions/convert.py:88
-#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:591
+#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:594
 msgid "Cannot convert"
 msgstr ""

@ -6643,7 +6647,7 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/gui2/convert/single_ui.py:117
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/book_info_ui.py:69
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/comicconf_ui.py:96
-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box_ui.py:52
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box_ui.py:21
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/progress_ui.py:53
 #: /home/kovid/work/calibre/src/calibre/gui2/store/mobileread_store_dialog_ui.py:61
 #: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/cache_progress_dialog_ui.py:50
@ -7279,6 +7283,22 @@ msgstr ""
 msgid "Reset author to Unknown"
 msgstr ""

+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:72
+msgid "Some invalid ISBNs"
+msgstr ""
+
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:73
+msgid "Some of the ISBNs you entered were invalid. They will be ignored. Click Show Details to see which ones. Do you want to proceed?"
+msgstr ""
+
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:79
+msgid "All invalid ISBNs"
+msgstr ""
+
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:80
+msgid "All the ISBNs you entered were invalid. No books can be added."
+msgstr ""
+
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn_ui.py:63
 msgid "Add books by ISBN"
 msgstr ""
@ -7920,35 +7940,35 @@ msgstr ""
 msgid "&Hide all jobs"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:49
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:50
 msgid "&Copy to clipboard"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:53
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:54
 msgid "Show &details"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:54
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:55
 msgid "Hide &details"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:58
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:59
 msgid "Show detailed information about this error"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:100
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:101
 #: /home/kovid/work/calibre/src/calibre/gui2/wizard/__init__.py:552
 msgid "Copied"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:138
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:139
 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:796
 #: /home/kovid/work/calibre/src/calibre/gui2/viewer/main_ui.py:205
 msgid "Copy to clipboard"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:184
-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:232
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:185
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:233
 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:860
 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:961
 msgid "View log"
@ -10063,7 +10083,7 @@ msgid "None"
 msgstr ""

 #: /home/kovid/work/calibre/src/calibre/gui2/keyboard.py:389
-#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:165
+#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:166
 msgid "Done"
 msgstr ""

@ -10404,7 +10424,7 @@ msgid "Failed to create calibre library at: %r."
 msgstr ""

 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:108
-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:170
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:169
 msgid "Choose a location for your new calibre e-book library"
 msgstr ""

@ -10412,74 +10432,74 @@ msgstr ""
 msgid "Initializing user interface..."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:164
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:163
 msgid "Repairing failed"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:165
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:164
 msgid "The database repair failed. Starting with a new empty library."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:179
-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:204
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:178
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:203
 msgid "Bad database location"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:180
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:179
 #, python-format
 msgid "Bad database location %r. calibre will now quit."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:192
-#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:494
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:191
+#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:497
 msgid "Corrupted database"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:193
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:192
 #, python-format
 msgid "The library database at %s appears to be corrupted. Do you want calibre to try and rebuild it automatically? The rebuild may not be completely successful. If you say No, a new empty calibre library will be created."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:205
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:204
 #, python-format
 msgid "Bad database location %r. Will start with  a new, empty calibre library"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:215
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:214
 #, python-format
 msgid "Starting %s: Loading books..."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:295
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:294
 msgid "If you are sure it is not running"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:298
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:297
 msgid "may be running in the system tray, in the"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:300
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:299
 msgid "upper right region of the screen."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:302
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:301
 msgid "lower right region of the screen."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:305
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:304
 msgid "try rebooting your computer."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:307
-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:321
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:306
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:320
 msgid "try deleting the file"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:310
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:309
 msgid "Cannot Start "
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/main.py:311
+#: /home/kovid/work/calibre/src/calibre/gui2/main.py:310
 #, python-format
 msgid "%s is already running."
 msgstr ""
@ -11097,7 +11117,7 @@ msgstr ""
 msgid "All on 1 tab"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:166
+#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:167
 msgid "Confirmation dialogs have all been reset"
 msgstr ""

@ -12623,7 +12643,7 @@ msgid "Here you can control how calibre will save your books when you click the
 msgstr ""

 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/server.py:70
-#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:432
+#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:435
 msgid "Failed to start content server"
 msgstr ""

@ -13791,39 +13811,39 @@ msgstr ""
 msgid "You have started calibre in debug mode. After you quit calibre, the debug log will be available in the file: %s<p>The log will be displayed automatically."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:495
+#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:498
 #, python-format
 msgid "The library database at %s appears to be corrupted. Do you want calibre to try and rebuild it automatically? The rebuild may not be completely successful."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:579
+#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:582
 msgid "Conversion Error"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:602
+#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:605
 msgid "Recipe Disabled"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:618
+#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:621
 msgid "<b>Failed</b>"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:652
+#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:655
 msgid "There are active jobs. Are you sure you want to quit?"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:655
+#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:658
 msgid ""
 " is communicating with the device!<br>\n"
 "                      Quitting may cause corruption on the device.<br>\n"
 "                      Are you sure you want to quit?"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:659
+#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:662
 msgid "Active jobs"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:727
+#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:730
 msgid "will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray."
 msgstr ""

--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/gu.po
+++ b/src/calibre/translations/gu.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/Show More
+++ b/Show More