Sync to trunk.

2025-08-11 09:13:57 -04:00 · 2011-12-26 17:56:09 -05:00 · 2011-12-26 17:56:09 -05:00 · d9babbc43b
commit d9babbc43b
parent 4f611b3abd 231e8aeca3
612 changed files with 206194 additions and 119528 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -2,6 +2,7 @@
 .check-cache.pickle
 src/calibre/plugins
 resources/images.qrc
 src/calibre/ebooks/oeb/display/test/*.js
 src/calibre/manual/.build/
 src/calibre/manual/cli/
 src/calibre/manual/template_ref.rst
@ -15,6 +16,7 @@ resources/ebook-convert-complete.pickle
 resources/builtin_recipes.xml
 resources/builtin_recipes.zip
 resources/template-functions.json
 resources/display/*.js
 setup/installer/windows/calibre/build.log
 src/calibre/translations/.errors
 src/cssutils/.svn/
--- a/Changelog.old.yaml
+++ b/Changelog.old.yaml
--- a/Changelog.yaml
+++ b/Changelog.yaml
--- a/recipes/abc_au.recipe
+++ b/recipes/abc_au.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Dean Cording'
+__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
 '''
 abc.net.au/news
 '''
@ -8,7 +8,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 class ABCNews(BasicNewsRecipe):
    title                  = 'ABC News'
-    __author__             = 'Dean Cording'
+    __author__             = 'Pat Stapleton, Dean Cording'
    description            = 'News from Australia'
    masthead_url           = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
    cover_url              = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
@ -23,7 +23,9 @@ class ABCNews(BasicNewsRecipe):
    category               = 'News, Australia, World'
    language               = 'en_AU'
    publication_type       = 'newsportal'
-    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+#    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
 #Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
    preprocess_regexps     = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
@ -32,23 +34,23 @@ class ABCNews(BasicNewsRecipe):
                            ,'linearize_tables': False
                         }
-    keep_only_tags    =  dict(id='article')
+    keep_only_tags = [dict(attrs={'class':['article section']})]
-    remove_tags = [dict(attrs={'class':['related', 'tags']}),
+    remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
-                     dict(id='statepromo')
+        'inline-content story left', 'inline-content map left contracted', 'published',
-                        ]
+        'story-map', 'statepromo', 'topics', ]})]
    remove_attributes = ['width','height']
    feeds          = [
-                      ('Top Stories', 'http://www.abc.net.au/news/syndicate/topstoriesrss.xml'),
+                      ('Top Stories', 'http://www.abc.net.au/news/feed/45910/rss.xml'),
-                      ('Canberra', 'http://www.abc.net.au/news/indexes/idx-act/rss.xml'),
+                      ('Canberra', 'http://www.abc.net.au/news/feed/6910/rss.xml'),
-                      ('Sydney', 'http://www.abc.net.au/news/indexes/sydney/rss.xml'),
+                      ('Sydney', 'http://www.abc.net.au/news/feed/10232/rss.xml'),
-                      ('Melbourne', 'http://www.abc.net.au/news/indexes/melbourne/rss.xml'),
+                      ('Melbourne', 'http://www.abc.net.au/news/feed/21708/rss.xml'),
-                      ('Brisbane', 'http://www.abc.net.au/news/indexes/brisbane/rss.xml'),
+                      ('Brisbane', 'http://www.abc.net.au/news/feed/12858/rss.xml'),
-                      ('Perth', 'http://www.abc.net.au/news/indexes/perth/rss.xml'),
+                      ('Perth', 'feed://www.abc.net.au/news/feed/24886/rss.xml'),
-                      ('Australia', 'http://www.abc.net.au/news/indexes/idx-australia/rss.xml'),
+                      ('Australia', 'http://www.abc.net.au/news/feed/46182/rss.xml'),
-                      ('World', 'http://www.abc.net.au/news/indexes/world/rss.xml'),
+                      ('World', 'http://www.abc.net.au/news/feed/52278/rss.xml'),
-                      ('Business', 'http://www.abc.net.au/news/indexes/business/rss.xml'),
+                      ('Business', 'http://www.abc.net.au/news/feed/51892/rss.xml'),
-                      ('Science and Technology', 'http://www.abc.net.au/news/tag/science-and-technology/rss.xml'),
+                      ('Science and Technology', 'http://www.abc.net.au/news/feed/2298/rss.xml'),
                    ]
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@ -1,19 +1,38 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
+import re
 class Adventure_zone(BasicNewsRecipe):
    title          = u'Adventure Zone'
    __author__        = 'fenuks'
    description   = 'Adventure zone - adventure games from A to Z'
    category       = 'games'
    language       = 'pl'
    oldest_article = 15
    max_articles_per_feed = 100
    no_stylesheets = True
    oldest_article = 20
    max_articles_per_feed = 100
    use_embedded_content=False
    preprocess_regexps     = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
    remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
-    remove_tags_after= dict(name='td', attrs={'class':'main-body middle-border'})
+    remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
    remove_tags_after= dict(id='comments')
    extra_css              = '.main-bg{text-align: left;}  td.capmain{ font-size: 22px; }'
    feeds          = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
    def parse_feeds (self): 
      feeds = BasicNewsRecipe.parse_feeds(self) 
      soup=self.index_to_soup(u'http://www.adventure-zone.info/fusion/feeds/news.php')
      tag=soup.find(name='channel')
      titles=[]
      for r in tag.findAll(name='image'):
          r.extract()
      art=tag.findAll(name='item')
      for i in art:
            titles.append(i.title.string)
      for feed in feeds:
        for article in feed.articles[:]:
            article.title=titles[feed.articles.index(article)]
      return feeds
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
        cover=soup.find(id='box_OstatninumerAZ')
@ -22,17 +41,10 @@ class Adventure_zone(BasicNewsRecipe):
    def skip_ad_pages(self, soup):
-        skip_tag = soup.body.findAll(name='a')
+        skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
-        if skip_tag is not None:
+        skip_tag = skip_tag.findAll(name='a')
-            for r in skip_tag:
+        for r in skip_tag:
-                 if 'articles.php?' in r['href']:
+           if r.strong:
-                     if r.strong is not None:
+                 word=r.strong.string
-                         word=r.strong.string
+                 if word and (('zapowied' in word) or ('recenzj' in word)  or ('solucj' in word)):
-                         if ('zapowied' or 'recenzj') in word:
+                   return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
                             return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item_id'+r['href'][r['href'].find('_id')+3:], raw=True)
        else:
            None
    def print_version(self, url):
        return url.replace('news.php?readmore', 'print.php?type=N&item_id')
--- a/recipes/astro_news_pl.recipe
+++ b/recipes/astro_news_pl.recipe
@ -1,5 +1,4 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AstroNEWS(BasicNewsRecipe):
    title          = u'AstroNEWS'
    __author__        = 'fenuks'
@ -8,11 +7,16 @@ class AstroNEWS(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
-    auto_cleanup = True
+    #extra_css= 'table {text-align: left;}'
    no_stylesheets=True
    cover_url='http://news.astronet.pl/img/logo_news.jpg'
-   # no_stylesheets= True
+    remove_tags=[dict(name='hr')]
    feeds          = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
    def print_version(self, url):
        return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')
    def preprocess_html(self, soup):
        for item in soup.findAll(align=True):
            del item['align']
        return soup
--- a/recipes/b365realitatea.recipe
+++ b/recipes/b365realitatea.recipe
@ -0,0 +1,52 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 b365.realitatea.net
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class b365Realitatea(BasicNewsRecipe):
    title                 = u'b365 Realitatea'
    __author__            = u'Silviu Cotoar\u0103'
    publisher             = u'b365 Realitatea'
    description           = u'b365 Realitatea'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Romania,Bucuresti'
    encoding              = 'utf-8'
    cover_url             = 'http://b365.realitatea.net/wp-content/themes/b/images/b365-logo.png'
    conversion_options = {
                'comments'    : description
                ,'tags'       : category
                ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
                      dict(name='div', attrs={'class':'newsArticle'})
                     ]
    remove_tags = [
             dict(name='div', attrs={'class':'date'})
           , dict(name='dic', attrs={'class':'addthis_toolbox addthis_default_style'})
           , dict(name='div', attrs={'class':'related_posts'})
           , dict(name='div', attrs={'id':'RelevantiWidget'})
                  ]
    remove_tags_after = [
                     dict(name='div', attrs={'id':'RelevantiWidget'})
                   ]
    feeds  = [
        (u'\u0218tiri', u'http://b365.realitatea.net/rss-full/')
         ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/bbc.recipe
+++ b/recipes/bbc.recipe
@ -1,61 +1,648 @@
-__license__   = 'GPL v3'
+##
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+## Title:        BBC News, Sport, and Blog Calibre Recipe
 ## Contact:      mattst - jmstanfield@gmail.com
 ##
 ## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
 ## Copyright:    mattst - jmstanfield@gmail.com
 ##
 ## Written:      November 2011
 ## Last Edited:  2011-11-19
 ##
 __license__     = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
 __copyright__   = 'mattst - jmstanfield@gmail.com'
 '''
-news.bbc.co.uk
+BBC News, Sport, and Blog Calibre Recipe
 '''
 # Import the regular expressions module.
 import re
 # Import the BasicNewsRecipe class which this class extends.
 from calibre.web.feeds.recipes import BasicNewsRecipe
-class BBC(BasicNewsRecipe):
+class BBCNewsSportBlog(BasicNewsRecipe):
-    title                  = 'BBC News'
+
-    __author__             = 'Darko Miletic, Starson17'
+    #
-    description            = 'News from UK. '
+    #    **** IMPORTANT USERS READ ME ****
-    oldest_article         = 2
+    #
-    max_articles_per_feed  = 100
+    #  First select the feeds you want then scroll down below the feeds list
-    no_stylesheets         = True
+    #  and select the values you want for the other user preferences, like
-    #delay                  = 1
+    #  oldest_article and such like.
-    use_embedded_content   = False
+    #
-    encoding               = 'utf8'
+    #
-    publisher              = 'BBC'
+    #  Select the BBC rss feeds which you want in your ebook.
-    category               = 'news, UK, world'
+    #  Selected feed have NO '#' at their start, de-selected feeds begin with a '#'.
-    language               = 'en_GB'
+    #
-    publication_type       = 'newsportal'
+    #  Eg.  ("News Home", "http://feeds.bbci.co.uk/... - include feed.
-    extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    #  Eg. #("News Home", "http://feeds.bbci.co.uk/... - do not include feed.
-    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+    #
-    conversion_options = {
+    # There are 68 feeds below which constitute the bulk of the available rss
-                             'comments'        : description
+    # feeds on the BBC web site. These include 5 blogs by editors and
-                            ,'tags'            : category
+    # correspondants, 16 sports feeds, 15 'sub' regional feeds (Eg. North West
-                            ,'language'        : language
+    # Wales, Scotland Business), and 7 Welsh language feeds.
-                            ,'publisher'       : publisher
+    #
-                            ,'linearize_tables': True
+    # Some of the feeds are low volume (Eg. blogs), or very low volume (Eg. Click)
    # so if "oldest_article = 1.5" (only articles published in the last 36 hours)
    # you may get some 'empty feeds' which will not then be included in the ebook.
    #
    # The 15 feeds currently selected below are simply my default ones.
    #
    # Note: With all 68 feeds selected, oldest_article set to 2,
    # max_articles_per_feed set to 100, and simultaneous_downloads set to 10,
    # the ebook creation took 29 minutes on my speedy 100 mbps net connection,
    # fairly high-end desktop PC running Linux (Ubuntu Lucid-Lynx).
    # More realistically with 15 feeds selected, oldest_article set to 1.5,
    # max_articles_per_feed set to 100, and simultaneous_downloads set to 20,
    # it took 6 minutes. If that's too slow increase 'simultaneous_downloads'.
    #
    # Select / de-select the feeds you want in your ebook.
    #
    feeds = [
              ("News Home", "http://feeds.bbci.co.uk/news/rss.xml"),
              ("UK", "http://feeds.bbci.co.uk/news/uk/rss.xml"),
              ("World", "http://feeds.bbci.co.uk/news/world/rss.xml"),
              #("England", "http://feeds.bbci.co.uk/news/england/rss.xml"),
              #("Scotland", "http://feeds.bbci.co.uk/news/scotland/rss.xml"),
              #("Wales", "http://feeds.bbci.co.uk/news/wales/rss.xml"),
              #("N. Ireland", "http://feeds.bbci.co.uk/news/northern_ireland/rss.xml"),
              #("Africa", "http://feeds.bbci.co.uk/news/world/africa/rss.xml"),
              #("Asia", "http://feeds.bbci.co.uk/news/world/asia/rss.xml"),
              #("Europe", "http://feeds.bbci.co.uk/news/world/europe/rss.xml"),
              #("Latin America", "http://feeds.bbci.co.uk/news/world/latin_america/rss.xml"),
              #("Middle East", "http://feeds.bbci.co.uk/news/world/middle_east/rss.xml"),
              ("US & Canada", "http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"),
              ("Politics", "http://feeds.bbci.co.uk/news/politics/rss.xml"),
              ("Science/Environment", "http://feeds.bbci.co.uk/news/science_and_environment/rss.xml"),
              ("Technology", "http://feeds.bbci.co.uk/news/technology/rss.xml"),
              ("Magazine", "http://feeds.bbci.co.uk/news/magazine/rss.xml"),
              ("Entertainment/Arts", "http://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"),
              #("Health", "http://feeds.bbci.co.uk/news/health/rss.xml"),
              #("Education/Family", "http://feeds.bbci.co.uk/news/education/rss.xml"),
              ("Business", "http://feeds.bbci.co.uk/news/business/rss.xml"),
              ("Special Reports", "http://feeds.bbci.co.uk/news/special_reports/rss.xml"),
              ("Also in the News", "http://feeds.bbci.co.uk/news/also_in_the_news/rss.xml"),
              #("Newsbeat", "http://www.bbc.co.uk/newsbeat/rss.xml"),
              #("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"),
              ("Blog: Nick Robinson (Political Editor)", "http://feeds.bbci.co.uk/news/correspondents/nickrobinson/rss.sxml"),
              #("Blog: Mark D'Arcy (Parliamentary Correspondent)", "http://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"),
              #("Blog: Robert Peston (Business Editor)", "http://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"),
              #("Blog: Stephanie Flanders (Economics Editor)", "http://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"),
              ("Blog: Rory Cellan-Jones (Technology correspondent)", "http://feeds.bbci.co.uk/news/correspondents/rorycellanjones/rss.sxml"),
              ("Sport Front Page", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml"),
              #("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"),
              #("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"),
              #("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"),
              #("Rugby League", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_league/rss.xml"),
              #("Tennis", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/tennis/rss.xml"),
              #("Golf", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/golf/rss.xml"),
              #("Motorsport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/motorsport/rss.xml"),
              #("Boxing", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/boxing/rss.xml"),
              #("Athletics", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/athletics/rss.xml"),
              #("Snooker", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/snooker/rss.xml"),
              #("Horse Racing", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/horse_racing/rss.xml"),
              #("Cycling", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/cycling/rss.xml"),
              #("Disability Sport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/disability_sport/rss.xml"),
              #("Other Sport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/rss.xml"),
              #("Olympics 2012", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/olympics_2012/rss.xml"),
              #("N. Ireland Politics", "http://feeds.bbci.co.uk/news/northern_ireland/northern_ireland_politics/rss.xml"),
              #("Scotland Politics", "http://feeds.bbci.co.uk/news/scotland/scotland_politics/rss.xml"),
              #("Scotland Business", "http://feeds.bbci.co.uk/news/scotland/scotland_business/rss.xml"),
              #("E. Scotland, Edinburgh & Fife", "http://feeds.bbci.co.uk/news/scotland/edinburgh_east_and_fife/rss.xml"),
              #("W. Scotland & Glasgow", "http://feeds.bbci.co.uk/news/scotland/glasgow_and_west/rss.xml"),
              #("Highlands & Islands", "http://feeds.bbci.co.uk/news/scotland/highlands_and_islands/rss.xml"),
              #("NE. Scotland, Orkney & Shetland", "http://feeds.bbci.co.uk/news/scotland/north_east_orkney_and_shetland/rss.xml"),
              #("South Scotland", "http://feeds.bbci.co.uk/news/scotland/south_scotland/rss.xml"),
              #("Central Scotland & Tayside", "http://feeds.bbci.co.uk/news/scotland/tayside_and_central/rss.xml"),
              #("Wales Politics", "http://feeds.bbci.co.uk/news/wales/wales_politics/rss.xml"),
              #("NW. Wales", "http://feeds.bbci.co.uk/news/wales/north_west_wales/rss.xml"),
              #("NE. Wales", "http://feeds.bbci.co.uk/news/wales/north_east_wales/rss.xml"),
              #("Mid. Wales", "http://feeds.bbci.co.uk/news/wales/mid_wales/rss.xml"),
              #("SW. Wales", "http://feeds.bbci.co.uk/news/wales/south_west_wales/rss.xml"),
              #("SE. Wales", "http://feeds.bbci.co.uk/news/wales/south_east_wales/rss.xml"),
              #("Newyddion - News in Welsh", "http://feeds.bbci.co.uk/newyddion/rss.xml"),
              #("Gwleidyddiaeth", "http://feeds.bbci.co.uk/newyddion/gwleidyddiaeth/rss.xml"),
              #("Gogledd-Ddwyrain", "http://feeds.bbci.co.uk/newyddion/gogledd-ddwyrain/rss.xml"),
              #("Gogledd-Orllewin", "http://feeds.bbci.co.uk/newyddion/gogledd-orllewin/rss.xml"),
              #("Canolbarth", "http://feeds.bbci.co.uk/newyddion/canolbarth/rss.xml"),
              #("De-Ddwyrain", "http://feeds.bbci.co.uk/newyddion/de-ddwyrain/rss.xml"),
              #("De-Orllewin", "http://feeds.bbci.co.uk/newyddion/de-orllewin/rss.xml"),
            ]
    #    **** SELECT YOUR USER PREFERENCES ****
    # Title to use for the ebook.
    #
    title = 'BBC News'
    # A brief description for the ebook.
    #
    description = u'BBC web site ebook created using rss feeds.'
    # The max number of articles which may be downloaded from each feed.
    # I've never seen more than about 70 articles in a single feed in the
    # BBC feeds.
    #
    max_articles_per_feed = 100
    # The max age of articles which may be downloaded from each feed. This is
    # specified in days - note fractions of days are allowed, Eg. 2.5 (2 and a
    # half days). My default of 1.5 days is the last 36 hours, the point at
    # which I've decided 'news' becomes 'old news', but be warned this is not
    # so good for the blogs, technology, magazine, etc., and sports feeds.
    # You may wish to extend this to 2-5 but watch out ebook creation time will
    # increase as well. Setting this to 30 will get everything (AFAICT) as long
    # as max_articles_per_feed remains set high (except for 'Click' which is
    # v. low volume and its currently oldest article is 4th Feb 2011).
    #
    oldest_article = 1.5
    # Number of simultaneous downloads. 20 is consistantly working fine on the
    # BBC News feeds with no problems. Speeds things up from the defualt of 5.
    # If you have a lot of feeds and/or have increased oldest_article above 2
    # then you may wish to try increasing simultaneous_downloads to 25-30,
    # Or, of course, if you are in a hurry. [I've not tried beyond 20.]
    #
    simultaneous_downloads = 20
    # Timeout for fetching files from the server in seconds. The default of
    # 120 seconds, seems somewhat excessive.
    #
    timeout = 30
    # The format string for the date shown on the ebook's first page.
    # List of all values: http://docs.python.org/library/time.html
    # Default in news.py has a leading space so that's mirrored here.
    # As with 'feeds' select/de-select by adding/removing the initial '#',
    # only one timefmt should be selected, here's a few to choose from.
    #
    timefmt = ' [%a, %d %b %Y]'              # [Fri, 14 Nov 2011] (Calibre default)
    #timefmt = ' [%a, %d %b %Y %H:%M]'       # [Fri, 14 Nov 2011 18:30]
    #timefmt = ' [%a, %d %b %Y %I:%M %p]'    # [Fri, 14 Nov 2011 06:30 PM]
    #timefmt = ' [%d %b %Y]'                 # [14 Nov 2011]
    #timefmt = ' [%d %b %Y %H:%M]'           # [14 Nov 2011 18.30]
    #timefmt = ' [%Y-%m-%d]'                 # [2011-11-14]
    #timefmt = ' [%Y-%m-%d-%H-%M]'           # [2011-11-14-18-30]
    #
    #    **** IMPORTANT ****
    #
    #    DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
    #
    #    DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
    #
    #    I MEAN IT, YES I DO, ABSOLUTELY, AT YOU OWN RISK. :)
    #
    #    **** IMPORTANT ****
    #
    # Author of this recipe.
    __author__ = 'mattst'
    # Specify English as the language of the RSS feeds (ISO-639 code).
    language = 'en_GB'
    # Set tags.
    tags = 'news, sport, blog'
    # Set publisher and publication type.
    publisher = 'BBC'
    publication_type = 'newspaper'
    # Disable stylesheets from site.
    no_stylesheets = True
    # Specifies an override encoding for sites that have an incorrect charset
    # specified. Default of 'None' says to auto-detect. Some other BBC recipes
    # use 'utf8', which works fine (so use that if necessary) but auto-detecting
    # with None is working fine, so stick with that for robustness.
    encoding = None
    # Sets whether a feed has full articles embedded in it. The BBC feeds do not.
    use_embedded_content = False
    # Removes empty feeds - why keep them!?
    remove_empty_feeds = True
    # Create a custom title which fits nicely in the Kindle title list.
    # Requires "import time" above class declaration, and replacing
    # title with custom_title in conversion_options (right column only).
    # Example of string below: "BBC News - 14 Nov 2011"
    #
    # custom_title = "BBC News - " + time.strftime('%d %b %Y')
    '''
    # Conversion options for advanced users, but don't forget to comment out the
    # current conversion_options below. Avoid setting 'linearize_tables' as that
    # plays havoc with the 'old style' table based pages.
    #
    conversion_options = { 'title'       : title,
                           'comments'    : description,
                           'tags'        : tags,
                           'language'    : language,
                           'publisher'   : publisher,
                           'authors'     : publisher,
                           'smarten_punctuation' : True
                         }
    '''
-    keep_only_tags    = [
+    conversion_options = { 'smarten_punctuation' : True }
                       dict(name='div', attrs={'class':['layout-block-a layout-block']})
                       ,dict(attrs={'class':['story-body','storybody']})
                        ]
-    remove_tags = [
+    # Specify extra CSS - overrides ALL other CSS (IE. Added last).
-                       dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper',
+    extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
-                                                    'story-feature wide ', 'story-feature narrow']}),
+                 .introduction, .first { font-weight: bold; } \
-                       dict(id=['hypertab', 'comment-form']),
+                 .cross-head { font-weight: bold; font-size: 125%; } \
-                        ]
+                 .cap, .caption { display: block; font-size: 80%; font-style: italic; } \
                 .cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } \
                 .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
                    .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
                    text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } \
                 .story-date, .published { font-size: 80%; } \
                 table { width: 100%; } \
                 td img { display: block; margin: 5px auto; } \
                 ul { padding-top: 10px; } \
                 ol { padding-top: 10px; } \
                 li { padding-top: 5px; padding-bottom: 5px; } \
                 h1 { text-align: center; font-size: 175%; font-weight: bold; } \
                 h2 { text-align: center; font-size: 150%; font-weight: bold; } \
                 h3 { text-align: center; font-size: 125%; font-weight: bold; } \
                 h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'
-    remove_attributes = ['width','height']
+    # Remove various tag attributes to improve the look of the ebook pages.
    remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
                          'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
-    feeds          = [
+    # Remove the (admittedly rarely used) line breaks, "<br />", which sometimes
-                      ('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
+    # cause a section of the ebook to start in an unsightly fashion or, more
-                      ('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
+    # frequently, a "<br />" will muck up the formatting of a correspondant's byline.
-                      ('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
+    # "<br />" and "<br clear/>" are far more frequently used on the table formatted
-                      ('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
+    # style of pages, and really spoil the look of the ebook pages.
-                      ('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
+    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
-                      ('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
+                              (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: '')]
                      ('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
                      ('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
                      ('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
                      ('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
                      ('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
                      ('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
                      ('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
                    ]
    # Create regular expressions for tag keeping and removal to make the matches more
    # robust against minor changes and errors in the HTML, Eg. double spaces, leading
    # and trailing spaces, missing hyphens, and such like.
    # Python regular expression ('re' class) page: http://docs.python.org/library/re.html
    # ***************************************
    # Regular expressions for keep_only_tags:
    # ***************************************
    # The BBC News HTML pages use variants of 'storybody' to denote the section of a HTML
    # page which contains the main text of the article. Match storybody variants: 'storybody',
    # 'story-body', 'story body','storybody ', etc.
    storybody_reg_exp = '^.*story[_ -]*body.*$'
    # The BBC sport and 'newsbeat' (features) HTML pages use 'blq_content' to hold the title
    # and published date. This is one level above the usual news pages which have the title
    # and date within 'story-body'. This is annoying since 'blq_content' must also be kept,
    # resulting in a lot of extra things to be removed by remove_tags.
    blq_content_reg_exp = '^.*blq[_ -]*content.*$'
    # The BBC has an alternative page design structure, which I suspect is an out-of-date
    # design but which is still used in some articles, Eg. 'Click' (technology), 'FastTrack'
    # (travel), and in some sport pages. These alternative pages are table based (which is
    # why I think they are an out-of-date design) and account for -I'm guesstimaking- less
    # than 1% of all articles. They use a table class 'storycontent' to hold the article
    # and like blq_content (above) have required lots of extra removal by remove_tags.
    story_content_reg_exp = '^.*story[_ -]*content.*$'
    # Keep the sections of the HTML which match the list below. The HTML page created by
    # Calibre will fill <body> with those sections which are matched. Note that the
    # blq_content_reg_exp must be listed before storybody_reg_exp in keep_only_tags due to
    # it being the parent of storybody_reg_exp, that is to say the div class/id 'story-body'
    # will be inside div class/id 'blq_content' in the HTML (if 'blq_content' is there at
    # all). If they are the other way around in keep_only_tags then blq_content_reg_exp
    # will end up being discarded.
    keep_only_tags = [ dict(name='table', attrs={'class':re.compile(story_content_reg_exp, re.IGNORECASE)}),
                       dict(name='div',   attrs={'class':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
                       dict(name='div',   attrs={'id':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
                       dict(name='div',   attrs={'class':re.compile(storybody_reg_exp, re.IGNORECASE)}),
                       dict(name='div',   attrs={'id':re.compile(storybody_reg_exp, re.IGNORECASE)}) ]
    # ************************************
    # Regular expressions for remove_tags:
    # ************************************
    # Regular expression to remove share-help and variant tags. The share-help class
    # is used by the site for a variety of 'sharing' type links, Eg. Facebook, delicious,
    # twitter, email. Removed to avoid page clutter.
    share_help_reg_exp = '^.*share[_ -]*help.*$'
    # Regular expression to remove embedded-hyper and variant tags. This class is used to
    # display links to other BBC News articles on the same/similar subject.
    embedded_hyper_reg_exp = '^.*embed*ed[_ -]*hyper.*$'
    # Regular expression to remove hypertabs and variant tags. This class is used to
    # display a tab bar at the top of an article which allows the user to switch to
    # an article (viewed on the same page) providing further info., 'in depth' analysis,
    # an editorial, a correspondant's blog entry, and such like. The ability to handle
    # a tab bar of this nature is currently beyond the scope of this recipe and
    # possibly of Calibre itself (not sure about that - TO DO - check!).
    hypertabs_reg_exp = '^.*hyper[_ -]*tabs.*$'
    # Regular expression to remove story-feature and variant tags. Eg. 'story-feature',
    # 'story-feature related narrow', 'story-feature wide', 'story-feature narrow'.
    # This class is used to add additional info. boxes, or small lists, outside of
    # the main story. TO DO: Work out a way to incorporate these neatly.
    story_feature_reg_exp = '^.*story[_ -]*feature.*$'
    # Regular expression to remove video and variant tags, Eg. 'videoInStoryB',
    # 'videoInStoryC'. This class is used to embed video.
    video_reg_exp = '^.*video.*$'
    # Regular expression to remove audio and variant tags, Eg. 'audioInStoryD'.
    # This class is used to embed audio.
    audio_reg_exp = '^.*audio.*$'
    # Regular expression to remove pictureGallery and variant tags, Eg. 'pictureGallery'.
    # This class is used to embed a photo slideshow. See also 'slideshow' below.
    picture_gallery_reg_exp = '^.*picture.*$'
    # Regular expression to remove slideshow and variant tags, Eg. 'dslideshow-enclosure'.
    # This class is used to embed a slideshow (not necessarily photo) but both
    # 'slideshow' and 'pictureGallery' are used for slideshows.
    slideshow_reg_exp = '^.*slide[_ -]*show.*$'
    # Regular expression to remove social-links and variant tags. This class is used to
    # display links to a BBC bloggers main page, used in various columnist's blogs
    # (Eg. Nick Robinson, Robert Preston).
    social_links_reg_exp = '^.*social[_ -]*links.*$'
    # Regular expression to remove quote and (multi) variant tags, Eg. 'quote',
    # 'endquote', 'quote-credit', 'quote-credit-title', etc. These are usually
    # removed by 'story-feature' removal (as they are usually within them), but
    # not always. The quotation removed is always (AFAICT) in the article text
    # as well but a 2nd copy is placed in a quote tag to draw attention to it.
    # The quote class tags may or may not appear in div's.
    quote_reg_exp = '^.*quote.*$'
    # Regular expression to remove hidden and variant tags, Eg. 'hidden'.
    # The purpose of these is unclear, they seem to be an internal link to a
    # section within the article, but the text of the link (Eg. 'Continue reading
    # the main story') never seems to be displayed anyway. Removed to avoid clutter.
    # The hidden class tags may or may not appear in div's.
    hidden_reg_exp = '^.*hidden.*$'
    # Regular expression to remove comment and variant tags, Eg. 'comment-introduction'.
    # Used on the site to display text about registered users entering comments.
    comment_reg_exp = '^.*comment.*$'
    # Regular expression to remove form and variant tags, Eg. 'comment-form'.
    # Used on the site to allow registered BBC users to fill in forms, typically
    # for entering comments about an article.
    form_reg_exp = '^.*form.*$'
    # Extra things to remove due to the addition of 'blq_content' in keep_only_tags.
    #<div class="story-actions"> Used on sports pages for 'email' and 'print'.
    story_actions_reg_exp = '^.*story[_ -]*actions.*$'
    #<div class="bookmark-list"> Used on sports pages instead of 'share-help' (for
    # social networking links).
    bookmark_list_reg_exp = '^.*bookmark[_ -]*list.*$'
    #<div id="secondary-content" class="content-group">
    # NOTE: Don't remove class="content-group" that is needed.
    # Used on sports pages to link to 'similar stories'.
    secondary_content_reg_exp = '^.*secondary[_ -]*content.*$'
    #<div id="featured-content" class="content-group">
    # NOTE: Don't remove class="content-group" that is needed.
    # Used on sports pages to link to pages like 'tables', 'fixtures', etc.
    featured_content_reg_exp = '^.*featured[_ -]*content.*$'
    #<div id="navigation">
    # Used on sports pages to link to pages like 'tables', 'fixtures', etc.
    # Used sometimes instead of "featured-content" above.
    navigation_reg_exp = '^.*navigation.*$'
    #<a class="skip" href="#blq-container-inner">Skip to top</a>
    # Used on sports pages to link to the top of the page.
    skip_reg_exp = '^.*skip.*$'
    # Extra things to remove due to the addition of 'storycontent' in keep_only_tags,
    # which are the alterative table design based pages. The purpose of some of these
    # is not entirely clear from the pages (which are a total mess!).
    # Remove mapping based tags, Eg. <map id="world_map">
    # The dynamic maps don't seem to work during ebook creation. TO DO: Investigate.
    map_reg_exp = '^.*map.*$'
    # Remove social bookmarking variation, called 'socialBookMarks'.
    social_bookmarks_reg_exp = '^.*social[_ -]*bookmarks.*$'
    # Remove page navigation tools, like 'search', 'email', 'print', called 'blq-mast'.
    blq_mast_reg_exp = '^.*blq[_ -]*mast.*$'
    # Remove 'sharesb', I think this is a generic 'sharing' class. It seems to appear
    # alongside 'socialBookMarks' whenever that appears. I am removing it as well
    # under the assumption that it can appear alone as well.
    sharesb_reg_exp = '^.*sharesb.*$'
    # Remove class 'o'. The worst named user created css class of all time. The creator
    # should immediately be fired. I've seen it used to hold nothing at all but with
    # 20 or so empty lines in it. Also to hold a single link to another article.
    # Whatever it was designed to do it is not wanted by this recipe. Exact match only.
    o_reg_exp = '^o$'
    # Remove 'promotopbg' and 'promobottombg', link lists. Have decided to
    # use two reg expressions to make removing this (and variants) robust.
    promo_top_reg_exp = '^.*promotopbg.*$'
    promo_bottom_reg_exp = '^.*promobottombg.*$'
    # Remove 'nlp', provides heading for link lists. Requires an exact match due to
    # risk of matching those letters in something needed, unless I see a variation
    # of 'nlp' used at a later date.
    nlp_reg_exp = '^nlp$'
    # Remove 'mva', provides embedded floating content of various types. Variant 'mvb'
    # has also now been seen. Requires an exact match of 'mva' or 'mvb' due to risk of
    # matching those letters in something needed.
    mva_or_mvb_reg_exp = '^mv[ab]$'
    # Remove 'mvtb', seems to be page navigation tools, like 'blq-mast'.
    mvtb_reg_exp = '^mvtb$'
    # Remove 'blq-toplink', class to provide a link to the top of the page.
    blq_toplink_reg_exp = '^.*blq[_ -]*top[_ -]*link.*$'
    # Remove 'products and services' links, Eg. desktop tools, alerts, and so on.
    # Eg. Class="servicev4 ukfs_services" - what a mess of a name. Have decided to
    # use two reg expressions to make removing this (and variants) robust.
    prods_services_01_reg_exp = '^.*servicev4.*$'
    prods_services_02_reg_exp = '^.*ukfs[_ -]*services.*$'
    # Remove -what I think is- some kind of navigation tools helper class, though I am
    # not sure, it's called: 'blq-rst blq-new-nav'. What I do know is it pops up
    # frequently and it is not wanted. Have decided to use two reg expressions to make
    # removing this (and variants) robust.
    blq_misc_01_reg_exp = '^.*blq[_ -]*rst.*$'
    blq_misc_02_reg_exp = '^.*blq[_ -]*new[_ -]*nav.*$'
    # Remove 'puffbox' - this may only appear inside 'storyextra', so it may not
    # need removing - I have no clue what it does other than it contains links.
    # Whatever it is - it is not part of the article and is not wanted.
    puffbox_reg_exp = '^.*puffbox.*$'
    # Remove 'sibtbg' and 'sibtbgf' - some kind of table formatting classes.
    sibtbg_reg_exp = '^.*sibtbg.*$'
    # Remove 'storyextra' - links to relevant articles and external sites.
    storyextra_reg_exp = '^.*story[_ -]*extra.*$'
    remove_tags = [ dict(name='div',  attrs={'class':re.compile(story_feature_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(share_help_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(embedded_hyper_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(hypertabs_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(video_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(audio_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(picture_gallery_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(slideshow_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(story_actions_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(bookmark_list_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'id':re.compile(secondary_content_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'id':re.compile(featured_content_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'id':re.compile(navigation_reg_exp, re.IGNORECASE)}),
                    dict(name='form', attrs={'id':re.compile(form_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(social_links_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(skip_reg_exp, re.IGNORECASE)}),
                    dict(name='map', attrs={'id':re.compile(map_reg_exp, re.IGNORECASE)}),
                    dict(name='map', attrs={'name':re.compile(map_reg_exp, re.IGNORECASE)}),
                    dict(name='div', attrs={'id':re.compile(social_bookmarks_reg_exp, re.IGNORECASE)}),
                    dict(name='div', attrs={'id':re.compile(blq_mast_reg_exp, re.IGNORECASE)}),
                    dict(name='div', attrs={'class':re.compile(sharesb_reg_exp, re.IGNORECASE)}),
                    dict(name='div', attrs={'class':re.compile(o_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(promo_top_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(promo_bottom_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(nlp_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(mva_or_mvb_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(mvtb_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(blq_toplink_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(prods_services_01_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(prods_services_02_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(blq_misc_01_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(blq_misc_02_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(puffbox_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(sibtbg_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(storyextra_reg_exp, re.IGNORECASE)})
                  ]
    # Uses url to create and return the 'printer friendly' version of the url.
    # In other words the 'print this page' address of the page.
    #
    # There are 3 types of urls used in the BBC site's rss feeds. There is just
    # 1 type for the standard news while there are 2 used for sports feed urls.
    # Note: Sports urls are linked from regular news feeds (Eg. 'News Home') when
    # there is a major story of interest to 'everyone'. So even if no BBC sports
    # feeds are added to 'feeds' the logic of this method is still needed to avoid
    # blank / missing / empty articles which have an index title and then no body.
    def print_version(self, url):
        # Handle sports page urls type 01:
        if (url.find("go/rss/-/sport1/") != -1):
            temp_url = url.replace("go/rss/-/", "")
        # Handle sports page urls type 02:
        elif (url.find("go/rss/int/news/-/sport1/") != -1):
            temp_url = url.replace("go/rss/int/news/-/", "")
        # Handle regular news page urls:
        else:
            temp_url = url.replace("go/rss/int/news/-/", "")
        # Always add "?print=true" to the end of the url.
        print_url = temp_url + "?print=true"
        return print_url
    # Remove articles in feeds based on a string in the article title or url.
    #
    # Code logic written by: Starson17 - posted in: "Recipes - Re-usable code"
    # thread, in post with title: "Remove articles from feed", see url:
    # http://www.mobileread.com/forums/showpost.php?p=1165462&postcount=6
    # Many thanks and all credit to Starson17.
    #
    # Starson17's code has obviously been altered to suite my requirements.
    def parse_feeds(self):
        # Call parent's method.
        feeds = BasicNewsRecipe.parse_feeds(self)
        # Loop through all feeds.
        for feed in feeds:
            # Loop through all articles in feed.
            for article in feed.articles[:]:
                # Match key words and remove article if there's a match.
                # Most BBC rss feed video only 'articles' use upper case 'VIDEO'
                # as a title prefix. Just match upper case 'VIDEO', so that
                # articles like 'Video game banned' won't be matched and removed.
                if 'VIDEO' in article.title:
                    feed.articles.remove(article)
                # Most BBC rss feed audio only 'articles' use upper case 'AUDIO'
                # as a title prefix. Just match upper case 'AUDIO', so that
                # articles like 'Hi-Def audio...' won't be matched and removed.
                elif 'AUDIO' in article.title:
                    feed.articles.remove(article)
                # Most BBC rss feed photo slideshow 'articles' use 'In Pictures',
                # 'In pictures', and 'in pictures', somewhere in their title.
                # Match any case of that phrase.
                elif 'IN PICTURES' in article.title.upper():
                    feed.articles.remove(article)
                # As above, but user contributed pictures. Match any case.
                elif 'YOUR PICTURES' in article.title.upper():
                    feed.articles.remove(article)
                # 'Sportsday Live' are articles which contain a constantly and
                # dynamically updated 'running commentary' during a live sporting
                # event. Match any case.
                elif 'SPORTSDAY LIVE' in article.title.upper():
                    feed.articles.remove(article)
                # Sometimes 'Sportsday Live' (above) becomes 'Live - Sport Name'.
                # These are being matched below using 'Live - ' because removing all
                # articles with 'live' in their titles would remove some articles
                # that are in fact not live sports pages. Match any case.
                elif 'LIVE - ' in article.title.upper():
                    feed.articles.remove(article)
                # 'Quiz of the week' is a Flash player weekly news quiz. Match only
                # the 'Quiz of the' part in anticipation of monthly and yearly
                # variants. Match any case.
                elif 'QUIZ OF THE' in article.title.upper():
                    feed.articles.remove(article)
                # Remove articles with 'scorecards' in the url. These are BBC sports
                # pages which just display a cricket scorecard. The pages have a mass
                # of table and css entries to display the scorecards nicely. Probably
                # could make them work with this recipe, but might take a whole day
                # of work to sort out all the css - basically a formatting nightmare.
                elif 'scorecards' in article.url:
                    feed.articles.remove(article)
        return feeds
 # End of class and file.
--- a/recipes/berliner_zeitung.recipe
+++ b/recipes/berliner_zeitung.recipe
@ -1,61 +1,44 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
-import re
+
 '''Calibre recipe to convert the RSS feeds of the Berliner Zeitung to an ebook.'''
 class SportsIllustratedRecipe(BasicNewsRecipe) :
-    __author__    = 'ape'
+    __author__    = 'a.peter'
-    __copyright__ = 'ape'
+    __copyright__ = 'a.peter'
    __license__   = 'GPL v3'
    language      = 'de'
-    description   = 'Berliner Zeitung'
+    description   = 'Berliner Zeitung RSS'
-    version       = 2
+    version       = 4
    title         = u'Berliner Zeitung'
    timefmt       = ' [%d.%m.%Y]'
    #oldest_article = 7.0
    no_stylesheets = True
    remove_javascript = True
    use_embedded_content = False
    publication_type = 'newspaper'
-    keep_only_tags = [dict(name='div', attrs={'class':'teaser t_split t_artikel'})]
+    remove_tags_before = dict(name='div', attrs={'class':'newstype'})
    remove_tags_after = [dict(id='article_text')]
-    INDEX = 'http://www.berlinonline.de/berliner-zeitung/'
+    feeds = [(u'Startseite', u'http://www.berliner-zeitung.de/home/10808950,10808950,view,asFeed.xml'),
-
+             (u'Politik', u'http://www.berliner-zeitung.de/home/10808018,10808018,view,asFeed.xml'),
-    def parse_index(self):
+             (u'Wirtschaft', u'http://www.berliner-zeitung.de/home/10808230,10808230,view,asFeed.xml'),
-        base = 'http://www.berlinonline.de'
+             (u'Berlin', u'http://www.berliner-zeitung.de/home/10809148,10809148,view,asFeed.xml'),
-        answer = []
+             (u'Brandenburg', u'http://www.berliner-zeitung.de/home/10809312,10809312,view,asFeed.xml'),
-        articles = {}
+             (u'Wissenschaft', u'http://www.berliner-zeitung.de/home/10808894,10808894,view,asFeed.xml'),
-        more = 1
+             (u'Digital', u'http://www.berliner-zeitung.de/home/10808718,10808718,view,asFeed.xml'),
-
+             (u'Kultur', u'http://www.berliner-zeitung.de/home/10809150,10809150,view,asFeed.xml'),
-        soup = self.index_to_soup(self.INDEX)
+             (u'Panorama', u'http://www.berliner-zeitung.de/home/10808334,10808334,view,asFeed.xml'),
-
+             (u'Sport', u'http://www.berliner-zeitung.de/home/10808794,10808794,view,asFeed.xml'),
-        # Get list of links to ressorts from index page
+             (u'Hertha', u'http://www.berliner-zeitung.de/home/10808800,10808800,view,asFeed.xml'),
-        ressort_list = soup.findAll('ul', attrs={'class': re.compile('ressortlist')})
+             (u'Union', u'http://www.berliner-zeitung.de/home/10808802,10808802,view,asFeed.xml'),
-        for ressort in ressort_list[0].findAll('a'):
+             (u'Verkehr', u'http://www.berliner-zeitung.de/home/10809298,10809298,view,asFeed.xml'),
-            feed_title = ressort.string
+             (u'Polizei', u'http://www.berliner-zeitung.de/home/10809296,10809296,view,asFeed.xml'),
-            print 'Analyzing', feed_title
+             (u'Meinung', u'http://www.berliner-zeitung.de/home/10808020,10808020,view,asFeed.xml')]
            if not articles.has_key(feed_title):
                articles[feed_title] = []
                answer.append(feed_title)
            # Load ressort page.
            feed = self.index_to_soup('http://www.berlinonline.de' + ressort['href'])
            # find mainbar div which contains the list of all articles
            for article_container in feed.findAll('div', attrs={'class': re.compile('mainbar')}):
                # iterate over all articles
                for article_teaser in article_container.findAll('div', attrs={'class': re.compile('teaser')}):
                    # extract title of article
                    if article_teaser.h3 != None:
                        article = {'title' : article_teaser.h3.a.string, 'date' : u'', 'url'  : base + article_teaser.h3.a['href'], 'description' : u''}
                        articles[feed_title].append(article)
                    else:
                        # Skip teasers for missing photos
                        if article_teaser.div.p.contents[0].find('Foto:') > -1:
                            continue
                        article = {'title': 'Weitere Artikel ' + str(more), 'date': u'', 'url': base + article_teaser.div.p.a['href'], 'description': u''}
                        articles[feed_title].append(article)
                        more += 1
        answer = [[key, articles[key]] for key in answer if articles.has_key(key)]
        return answer
    def get_masthead_url(self):
-        return 'http://www.berlinonline.de/.img/berliner-zeitung/blz_logo.gif'
+        return 'http://www.berliner-zeitung.de/image/view/10810244,7040611,data,logo.png'
    def print_version(self, url):
        return url.replace('.html', ',view,printVersion.html')
--- a/recipes/berlingske_dk.recipe
+++ b/recipes/berlingske_dk.recipe
@ -1,4 +1,3 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
@ -18,11 +17,17 @@ class Berlingske_dk(BasicNewsRecipe):
    no_stylesheets        = True
    remove_empty_feeds    = True
    use_embedded_content  = False
    remove_javascript     = True
    publication_type      = 'newspaper'
    encoding              = 'utf8'
    language              = 'da'
-    masthead_url          = 'http://www.berlingske.dk/sites/all/themes/bm/img/layout/masthead_bg.gif'
+    auto_cleanup          = True
-    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } h1,.manchet,.byline{font-family: Cambria,Georgia,Times,"Times New Roman",serif } '
+    extra_css             = '''
                            .manchet {color:#888888;}
                            .dateline {font-size: x-small; color:#444444;}
                            .manchet,.dateline { font-family: Cambria,Georgia,Times,"Times New Roman",serif }
                            .body {font-family: Arial,Helvetica,sans-serif }
                            '''
    conversion_options = {
                          'comment'  : description
@ -32,18 +37,14 @@ class Berlingske_dk(BasicNewsRecipe):
                        }
    feeds              = [
-                            (u'Breaking news' , u'http://www.berlingske.dk/breaking/rss'          )
+                            (u'Breaking news' , u'http://www.b.dk/breaking/rss'          )
-                           ,(u'Seneste nyt'   , u'http://www.berlingske.dk/seneste/rss'           )
+                           ,(u'Seneste nyt'   , u'http://www.b.dk/seneste/rss'           )
-                           ,(u'Topnyheder'    , u'http://www.berlingske.dk/top/rss'               )
+                           ,(u'Topnyheder'    , u'http://www.b.dk/top/rss'               )
-                           ,(u'Danmark'       , u'http://www.berlingske.dk/danmark/seneste/rss'   )
+                           ,(u'Danmark'       , u'http://www.b.dk/danmark/seneste/rss'   )
-                           ,(u'Verden'        , u'http://www.berlingske.dk/verden/seneste/rss'    )
+                           ,(u'Verden'        , u'http://www.b.dk/verden/seneste/rss'    )
-                           ,(u'Klima'         , u'http://www.berlingske.dk/klima/seneste/rss'     )
+                           ,(u'Klima'         , u'http://www.b.dk/klima/seneste/rss'     )
-                           ,(u'Debat'         , u'http://www.berlingske.dk/debat/seneste/rss'     )
+                           ,(u'Debat'         , u'http://www.b.dk/debat/seneste/rss'     )
-                           ,(u'Koebenhavn'    , u'http://www.berlingske.dk/koebenhavn/seneste/rss')
+                           ,(u'Koebenhavn'    , u'http://www.b.dk/koebenhavn/seneste/rss')
-                           ,(u'Politik'       , u'http://www.berlingske.dk/politik/seneste/rss'   )
+                           ,(u'Politik'       , u'http://www.b.dk/politik/seneste/rss'   )
-                           ,(u'Kultur'        , u'http://www.berlingske.dk/kultur/seneste/rss'    )
+                           ,(u'Kultur'        , u'http://www.b.dk/kultur/seneste/rss'    )
                          ]
    keep_only_tags     = [dict(attrs={'class':['first','pt-article']})]
    remove_tags        = [dict(name=['object','link','base','iframe','embed'])]
--- a/recipes/biamag.recipe
+++ b/recipes/biamag.recipe
@ -0,0 +1,38 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 bianet.com.tr
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Radikal_tr(BasicNewsRecipe):
    title                 = 'BiaMag'
    __author__            = 'Osman Kaysan'
    description           = 'Independent News from Turkey'
    publisher             = 'BiaMag'
    category              = 'news, politics, Turkey'
    oldest_article        = 15
    max_articles_per_feed = 120
    masthead_url          = 'http://bianet.org/images/biamag_logo.gif'
    language              = 'tr'
    no_stylesheets        = True
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                ,'remove_paragraph_spacing': True,
                          }
    remove_tags_before  = dict(name='div', attrs={'class':'manset'})
    remove_tags = [ dict(name='ul', attrs={'class':['altul']}), dict(name='div', attrs={'id':['habermenu']}), dict(name='div', attrs={'class':['mail']}), dict(name='div', attrs={'class':['from']})]
    remove_tags_after   = dict(name='div', attrs={'id':'habermenu'})
    feeds = [(u'BiaMag', u'http://www.bianet.org/biamag.rss')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/biamag_en.recipe
+++ b/recipes/biamag_en.recipe
@ -0,0 +1,38 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 bianet.com.tr
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Radikal_tr(BasicNewsRecipe):
    title                 = 'Bianet-English'
    __author__            = 'Osman Kaysan'
    description           = 'Independent News Network from Turkey(English)'
    publisher             = 'Bianet'
    category              = 'news, politics, Turkey'
    oldest_article        = 7
    max_articles_per_feed = 150
    masthead_url          = 'http://bianet.org/images/english_logo.gif'
    language              = 'en_TR'
    no_stylesheets        = True
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                ,'remove_paragraph_spacing': True,
                          }
    remove_tags_before  = dict(name='div', attrs={'class':'manset'})
    remove_tags = [ dict(name='ul', attrs={'class':['altul']}), dict(name='div', attrs={'id':['habermenu']}), dict(name='div', attrs={'class':['mail']}), dict(name='div', attrs={'class':['from']})]
    remove_tags_after   = dict(name='div', attrs={'id':'habermenu'})
    feeds = [(u'Bianet-English', u'http://www.bianet.org/english.rss')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/bianet.recipe
+++ b/recipes/bianet.recipe
@ -0,0 +1,38 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 bianet.com.tr
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Radikal_tr(BasicNewsRecipe):
    title                 = 'Bianet'
    __author__            = 'Osman Kaysan'
    description           = 'Independent News from Turkey'
    publisher             = 'Bianet'
    category              = 'news, politics, Turkey'
    oldest_article        = 7
    max_articles_per_feed = 120
    masthead_url          = 'http://bianet.org/images/bianet_logo.gif'
    language              = 'tr'
    no_stylesheets        = True
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                ,'remove_paragraph_spacing': True,
                          }
    remove_tags_before  = dict(name='div', attrs={'class':'manset'})
    remove_tags = [ dict(name='ul', attrs={'class':['altul']}), dict(name='div', attrs={'id':['habermenu']}), dict(name='div', attrs={'class':['mail']}), dict(name='div', attrs={'class':['from']})]
    remove_tags_after   = dict(name='div', attrs={'id':'habermenu'})
    feeds = [(u'Bianet', u'http://bianet.org/bianet.rss')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/biolog_pl.recipe
+++ b/recipes/biolog_pl.recipe
@ -0,0 +1,19 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class Biolog_pl(BasicNewsRecipe):
    title          = u'Biolog.pl'
    oldest_article = 7
    max_articles_per_feed = 100
    remove_empty_feeds=True
    __author__        = 'fenuks'
    description   = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.'
    category       = 'biology'
    language       = 'pl'
    cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
    no_stylesheets = True
    #keeps_only_tags=[dict(id='main')]
    remove_tags_before=dict(id='main')
    remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
    remove_tags=[dict(name='img', attrs={'alt':'Komentarze'})]
    feeds          = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')]
--- a/recipes/birgun_gazetesi.recipe
+++ b/recipes/birgun_gazetesi.recipe
@ -0,0 +1,50 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class Birgun (BasicNewsRecipe):
    title                  = u'Birgün Gazetesi'
    __author__             = u'Osman Kaysan'
    oldest_article         = 7
    max_articles_per_feed  =150
    use_embedded_content  = False
    description           = 'Birgun gazatesi haberleri, kose yazarlari'
    publisher              = 'Birgün'
    category               = 'news,haberler,turkce,gazete,birgun'
    language               = 'tr'
    no_stylesheets        = True
    publication_type = 'newspaper'
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                ,'remove_paragraph_spacing': True,
                          }
    cover_img_url = 'http://www.birgun.net/i/birgun.png'
    masthead_url = 'http://www.birgun.net/i/birgun.png'
    remove_attributes = ['width','height']
    remove_tags_before  = dict(name='h2', attrs={'class':'storyHeadline'})
    #remove_tags_after   = dict(name='div', attrs={'class':'toollinks'})
    remove_tags_after   = dict(name='tr', attrs={'valign':'top'})
    remove_tags   = [ dict(name='div', attrs={'id':'byLine'}), dict(name='div', attrs={'class':'toollinks'})
 , dict(name='div', attrs={'class':'main-lead'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})
 , dict(name='a', attrs={'class':'addthis_button'})]
    remove_empty_feeds= True
    feeds          = [
                      ( u'Güncel', u'http://www.birgun.net/actuels.xml')
         ,( u'Köşe Yazarları', u'http://www.birgun.net/writer.xml')
         ,( u'Politika', u'http://www.birgun.net/politics.xml')
         ,( u'Ekonomi', u'http://www.birgun.net/economic.xml')
         ,( u'Çalışma Yaşamı', u'http://www.birgun.net/workers.xml')
         ,( u'Dünya', u'http://www.birgun.net/worlds.xml')
         ,( u'Yaşam', u'http://www.birgun.net/lifes.xml')
                     ]
--- a/recipes/birmingham_post.recipe
+++ b/recipes/birmingham_post.recipe
@ -0,0 +1,44 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Birmingham post'
    description = 'News for Birmingham UK'
    timefmt = ''
    __author__ = 'Dave Asbury'
    cover_url = 'http://1.bp.blogspot.com/_GwWyq5eGw9M/S9BHPHxW55I/AAAAAAAAB6Q/iGCWl0egGzg/s320/Birmingham+post+Lite+front.JPG'
    oldest_article = 1
    max_articles_per_feed = 20
    remove_empty_feeds = True
    remove_javascript     = True
    auto_cleanup = True
    language = 'en_GB'
    masthead_url        = 'http://www.pressgazette.co.uk/Pictures/web/t/c/g/birmingham_post.jpg'
    keep_only_tags = [
    #dict(name='h1',attrs={'id' : 'article-headline'}),
                    #dict(attrs={'class':['article-meta-author','article-meta-date','article main','art-o art-align-center otm-1 ']}),
    #dict(name='p')
    #dict(attrs={'id' : 'three-col'})
        ]
    remove_tags    = [
             # dict(name='div',attrs={'class' : 'span-33 last header-links'})
                               ]
    feeds          = [
        #(u'News',u'http://www.birminghampost.net/news/rss.xml'),
        (u'Local News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
        (u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'),
        (u'Sports',u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'),
        (u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml')
         ]
    extra_css  = '''
                    body {font: sans-serif medium;}'
    h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
                h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
                    span{ font-size:9.5px; font-weight:bold;font-style:italic}
                    p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
     '''
--- a/recipes/blues.recipe
+++ b/recipes/blues.recipe
@ -0,0 +1,26 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Oskar Kunicki <rakso at interia.pl>'
 '''
 Changelog:
 2011-11-27
 News from BluesRSS.info
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class BluesRSS(BasicNewsRecipe):
    title                     = 'Blues News'
    __author__          = 'Oskar Kunicki'
    description           ='Blues news from around the world'
    publisher             = 'BluesRSS.info'
    category              = 'news, blues, USA,UK'
    oldest_article        = 5
    max_articles_per_feed = 100
    language              = 'en'
    cover_url             = 'http://bluesrss.info/cover.jpg'
    masthead_url       = 'http://bluesrss.info/cover.jpg'
    no_stylesheets = True
    remove_tags    = [dict(name='div', attrs={'class':'wp-pagenavi'})]
    feeds = [(u'News', u'http://bluesrss.info/feed/')]
--- a/recipes/buffalo_news.recipe
+++ b/recipes/buffalo_news.recipe
@ -10,49 +10,39 @@ http://www.buffalonews.com/RSS/
 from calibre.web.feeds.news import BasicNewsRecipe
-class AdvancedUserRecipe1298680852(BasicNewsRecipe):
+class BuffaloNews(BasicNewsRecipe):
    title          = u'Buffalo News'
    oldest_article = 2
    language = 'en'
-    __author__ = 'ChappyOnIce'
+    __author__ = 'ChappyOnIce, Krittika Goyal'
    max_articles_per_feed = 20
    encoding = 'utf-8'
    masthead_url = 'http://www.buffalonews.com/buffalonews/skins/buffalonews/images/masthead/the_buffalo_news_logo.png'
-    remove_javascript = True
+    auto_cleanup = True
-    extra_css = 'body {text-align: justify;}\n  \
+    remove_empty_feeds = True
       p {text-indent: 20px;}'
-    keep_only_tags    = [
+    feeds          = [
-                       dict(name='div', attrs={'class':['main-content-left']})
+            (u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'),
-                        ]
+            (u'Southern Erie County', u'http://www.buffalonews.com/city/communities/southern-erie/?widget=rssfeed&view=feed&contentId=77944'),
-
+            (u'Eastern Erie County', u'http://www.buffalonews.com/city/communities/eastern-erie/?widget=rssfeed&view=feed&contentId=77944'),
-    remove_tags = [
+            (u'Southern Tier', u'http://www.buffalonews.com/city/communities/southern-tier/?widget=rssfeed&view=feed&contentId=77944'),
-                       dict(name='div', attrs={'id':['commentCount']}),
+            (u'Niagara County', u'http://www.buffalonews.com/city/communities/niagara-county/?widget=rssfeed&view=feed&contentId=77944'),
-       dict(name='div', attrs={'class':['story-list-links']})
+            (u'Business', u'http://www.buffalonews.com/business/?widget=rssfeed&view=feed&contentId=77944'),
-                        ]
+            (u'MoneySmart', u'http://www.buffalonews.com/business/moneysmart/?widget=rssfeed&view=feed&contentId=77944'),
-
+            (u'Bills & NFL', u'http://www.buffalonews.com/sports/bills-nfl/?widget=rssfeed&view=feed&contentId=77944'),
-    remove_tags_after  = dict(name='div', attrs={'class':['body storyContent']})
+            (u'Sabres & NHL', u'http://www.buffalonews.com/sports/sabres-nhl/?widget=rssfeed&view=feed&contentId=77944'),
-
+            (u'Bob DiCesare', u'http://www.buffalonews.com/sports/columns/bob-dicesare/?widget=rssfeed&view=feed&contentId=77944'),
-    feeds          = [(u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Bucky Gleason', u'http://www.buffalonews.com/sports/columns/bucky-gleason/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Southern Erie County', u'http://www.buffalonews.com/city/communities/southern-erie/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Mark Gaughan', u'http://www.buffalonews.com/sports/bills-nfl/inside-the-nfl/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Eastern Erie County', u'http://www.buffalonews.com/city/communities/eastern-erie/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Mike Harrington', u'http://www.buffalonews.com/sports/columns/mike-harrington/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Southern Tier', u'http://www.buffalonews.com/city/communities/southern-tier/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Jerry Sullivan', u'http://www.buffalonews.com/sports/columns/jerry-sullivan/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Niagara County', u'http://www.buffalonews.com/city/communities/niagara-county/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Other Sports Columns', u'http://www.buffalonews.com/sports/columns/other-sports-columns/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Business', u'http://www.buffalonews.com/business/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Life', u'http://www.buffalonews.com/life/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'MoneySmart', u'http://www.buffalonews.com/business/moneysmart/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Bruce Andriatch', u'http://www.buffalonews.com/city/columns/bruce-andriatch/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Bills & NFL', u'http://www.buffalonews.com/sports/bills-nfl/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Donn Esmonde', u'http://www.buffalonews.com/city/columns/donn-esmonde/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Sabres & NHL', u'http://www.buffalonews.com/sports/sabres-nhl/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Rod Watson', u'http://www.buffalonews.com/city/columns/rod-watson/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Bob DiCesare', u'http://www.buffalonews.com/sports/columns/bob-dicesare/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Entertainment', u'http://www.buffalonews.com/entertainment/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Bucky Gleason', u'http://www.buffalonews.com/sports/columns/bucky-gleason/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Off Main Street', u'http://www.buffalonews.com/city/columns/off-main-street/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Mark Gaughan', u'http://www.buffalonews.com/sports/bills-nfl/inside-the-nfl/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Editorials', u'http://www.buffalonews.com/editorial-page/buffalo-news-editorials/?widget=rssfeed&view=feed&contentId=77944')
         (u'Mike Harrington', u'http://www.buffalonews.com/sports/columns/mike-harrington/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Jerry Sullivan', u'http://www.buffalonews.com/sports/columns/jerry-sullivan/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Other Sports Columns', u'http://www.buffalonews.com/sports/columns/other-sports-columns/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Life', u'http://www.buffalonews.com/life/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Bruce Andriatch', u'http://www.buffalonews.com/city/columns/bruce-andriatch/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Donn Esmonde', u'http://www.buffalonews.com/city/columns/donn-esmonde/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Rod Watson', u'http://www.buffalonews.com/city/columns/rod-watson/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Entertainment', u'http://www.buffalonews.com/entertainment/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Off Main Street', u'http://www.buffalonews.com/city/columns/off-main-street/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Editorials', u'http://www.buffalonews.com/editorial-page/buffalo-news-editorials/?widget=rssfeed&view=feed&contentId=77944')
         ]
--- a/recipes/catavencii.recipe
+++ b/recipes/catavencii.recipe
@ -0,0 +1,51 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 catavencii.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Catavencii(BasicNewsRecipe):
    title                 = u'Ca\u0163avencii'
    __author__            = u'Silviu Cotoar\u0103'
    publisher             = u'Ca\u0163avencii'
    description           = u'Ca\u0163avencii'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Romania'
    encoding              = 'utf-8'
    cover_url        	  = 'http://www.simonatache.ro/wp-content/uploads/2011/06/catavencii-logo.png'
    conversion_options = {
                'comments'    : description
                ,'tags'       : category
                ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
                      dict(name='div', attrs={'id':'content'})
                     ]
    remove_tags = [
             dict(name='div', attrs={'id':'breadcrumbs'})
           , dict(name='span', attrs={'class':'info'})
 		   , dict(name='div', attrs={'id':'social-media-article'})
                  ]
    remove_tags_after = [
 			         dict(name='div', attrs={'id':'social-media-article'})
 	               ]
    feeds  = [
        (u'\u0218tiri', u'http://www.catavencii.ro/rss')
         ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/catavencu.recipe
+++ b/recipes/catavencu.recipe
@ -4,16 +4,16 @@
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
-catavencu.ro
+academiacatavencu.info
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
-class Catavencu(BasicNewsRecipe):
+class AcademiaCatavencu(BasicNewsRecipe):
    title                 = u'Academia Ca\u0163avencu'
    __author__            = u'Silviu Cotoar\u0103'
    description           = 'Tagma cum laude'
-    publisher             = 'Catavencu'
+    publisher             = u'Ca\u0163avencu'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
@ -21,32 +21,31 @@ class Catavencu(BasicNewsRecipe):
    use_embedded_content  = False
    category              = 'Ziare'
    encoding              = 'utf-8'
-    cover_url         = 'http://upload.wikimedia.org/wikipedia/en/1/1e/Academia_Catavencu.jpg'
+    cover_url         = 'http://www.academiacatavencu.info/images/logo.png'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
-                ,'publisher'  : publisher
+							,'publisher'  : publisher
                         }
    keep_only_tags = [
-            dict(name='ul', attrs={'class':'articles'})
+            dict(name='h1', attrs={'class':'art_title'}),
 			dict(name='div', attrs={'class':'art_text'})
                     ]
    remove_tags = [
-             dict(name='div', attrs={'class':['tools']})
+             dict(name='div', attrs={'class':['desp_m']})
-           , dict(name='div', attrs={'class':['share']})
+           , dict(name='div', attrs={'id':['tags']})          
           , dict(name='div', attrs={'class':['category']})
           , dict(name='div', attrs={'id':['comments']})
                  ]
    remove_tags_after = [
-              dict(name='div', attrs={'id':'comments'})
+              dict(name='div', attrs={'class':['desp_m']})
            ]
    feeds          = [
-            (u'Feeds', u'http://catavencu.ro/feed/rss')
+            (u'Feeds', u'http://www.academiacatavencu.info/rss.xml')
                 ]
    def preprocess_html(self, soup):
--- a/recipes/cgm_pl.recipe
+++ b/recipes/cgm_pl.recipe
@ -27,7 +27,7 @@ class CGM(BasicNewsRecipe):
            del item['style']
        ad=soup.findAll('a')
        for r in ad:
-            if 'http://www.hustla.pl' in r['href']:                
+            if 'http://www.hustla.pl' in r['href'] or 'http://www.ebilet.pl' in r['href']:                
                 r.extract()
        gallery=soup.find('div', attrs={'class':'galleryFlash'})
        if gallery:
--- a/recipes/cnd.recipe
+++ b/recipes/cnd.recipe
@ -23,7 +23,9 @@ class TheCND(BasicNewsRecipe):
 	remove_tags		= [dict(name='table', attrs={'align':'right'}), dict(name='img', attrs={'src':'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
 	no_stylesheets	 = True
-	preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+	preprocess_regexps = [  (re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
 				(re.compile('<table width.*?</table>', re.DOTALL), lambda m: ''),
 				]
 	def print_version(self, url):
 		if url.find('news/article.php') >= 0:
@ -46,13 +48,15 @@ class TheCND(BasicNewsRecipe):
 			title = self.tag_to_string(a)
 			self.log('\tFound article: ', title, 'at', url)
 			date = a.nextSibling
 			if re.search('cm', date):
 				continue
 			if (date is not None) and len(date)>2:
 				if not articles.has_key(date):
 					articles[date] = []
 				articles[date].append({'title':title, 'url':url, 'description': '', 'date':''})
 				self.log('\t\tAppend to : ', date)
-		self.log('log articles', articles)
+		#self.log('log articles', articles)
 		mostCurrent = sorted(articles).pop()
 		self.title = 'CND ' + mostCurrent		
--- a/recipes/cnd_weekly.recipe
+++ b/recipes/cnd_weekly.recipe
@ -0,0 +1,72 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Derek Liang <Derek.liang.ca @@@at@@@ gmail.com>'
 '''
 cnd.org
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class TheCND(BasicNewsRecipe):
 	title	  = 'CND Weekly'
 	__author__ = 'Derek Liang'
 	description = ''
 	INDEX = 'http://cnd.org'
 	language = 'zh'
 	conversion_options = {'linearize_tables':True}
 	remove_tags_before = dict(name='div', id='articleHead')
 	remove_tags_after  = dict(id='copyright')
 	remove_tags		= [dict(name='table', attrs={'align':'right'}), dict(name='img', attrs={'src':'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
 	no_stylesheets	 = True
 	preprocess_regexps = [  (re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
 				(re.compile('<table width.*?</table>', re.DOTALL), lambda m: ''),
 				]
 	def print_version(self, url):
 		if url.find('news/article.php') >= 0:
 			return re.sub("^[^=]*", "http://my.cnd.org/modules/news/print.php?storyid", url)
 		else:
 			return re.sub("^[^=]*", "http://my.cnd.org/modules/wfsection/print.php?articleid", url)
 	def parse_index(self):
 		soup = self.index_to_soup(self.INDEX)
 		feeds = []
 		articles = {}
 		for a in soup.findAll('a', attrs={'target':'_cnd'}):
 			url = a['href']
 			if url.find('article.php') < 0 :
 				continue
 			if url.startswith('/'):
 				url = 'http://cnd.org'+url
 			title = self.tag_to_string(a)
 			date = a.nextSibling
 			if not re.search('cm', date):
 				continue
 			self.log('\tFound article: ', title, 'at', url, '@', date)
 			if (date is not None) and len(date)>2:
 				if not articles.has_key(date):
 					articles[date] = []
 				articles[date].append({'title':title, 'url':url, 'description': '', 'date':''})
 				self.log('\t\tAppend to : ', date)
 		sorted_articles = sorted(articles)
 		while sorted_articles:
 			mostCurrent = sorted_articles.pop()
 			self.title = 'CND ' + mostCurrent
 			feeds.append((self.title, articles[mostCurrent]))
 		return feeds
 	def populate_article_metadata(self, article, soup, first):
 		header = soup.find('h3')
 		self.log('header: ' + self.tag_to_string(header))
 		pass
--- a/recipes/computerworld_pl.recipe
+++ b/recipes/computerworld_pl.recipe
@ -0,0 +1,22 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class Computerworld_pl(BasicNewsRecipe):
    title          = u'Computerworld.pl'
    __author__        = 'fenuks'
    description   = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
    category       = 'IT'
    language       = 'pl'
    no_stylesheets=True
    oldest_article = 7
    max_articles_per_feed = 100
    keep_only_tags=[dict(name='div', attrs={'id':'s'})]
    remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
    remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
    feeds          = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.computerworld.pl/')
        cover=soup.find(name='img', attrs={'class':'prawo'})
        self.cover_url=cover['src']
        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/cosmopolitan_uk.recipe
+++ b/recipes/cosmopolitan_uk.recipe
@ -0,0 +1,52 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 #from calibre import __appname__
 from calibre.utils.magick import Image
 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Cosmopolitan UK'
    description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
    __author__ = 'Dave Asbury'
    #last update 21/12/11
    # greyscale code by Starson
    cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 20
    remove_empty_feeds = True
    remove_javascript     = True
    preprocess_regexps = [
    (re.compile(r'<!-- Begin tmpl module_competition_offer -->.*?<!-- End tmpl module_competition_offer-->', re.IGNORECASE | re.DOTALL), lambda match: '')]
    language = 'en_GB'
    masthead_url        = 'http://www.cosmopolitan.co.uk/cm/cosmopolitanuk/site_images/header/cosmouk_logo_home.gif'
    keep_only_tags = [
                              dict(attrs={'class' : ['dateAuthor', 'publishDate']}),
                              dict(name='div',attrs ={'id' : ['main_content']})
                              ]
    remove_tags    = [
                              dict(name='div',attrs={'class' : ['blogInfo','viral_toolbar','comment_number','prevEntry nav']}),
                              dict(name='div',attrs={'class' : 'blog_module_about_the_authors'}),
                              dict(attrs={'id': ['breadcrumbs','comment','related_links_list','right_rail','content_sec_fb_more','content_sec_mostpopularstories','content-sec_fb_frame_viewfb_bot']}),
                              dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']}),
                              dict(name='li',attrs={'class' : 'thumb'})
              ]
    feeds          = [
        (u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')]
    def postprocess_html(self, soup, first):
        #process all the images
        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
            iurl = tag['src']
            img = Image()
            img.open(iurl)
            if img < 0:
                raise RuntimeError('Out of memory')
            img.type = "GrayscaleType"
            img.save(iurl)
        return soup
--- a/recipes/daily_writing_tips.recipe
+++ b/recipes/daily_writing_tips.recipe
@ -0,0 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class DailyWritingTips(BasicNewsRecipe):
    title          = u'Daily Writing Tips'
    language       = 'en_GB'
    __author__ = 'NotTaken'
    oldest_article = 7 #days
    max_articles_per_feed = 40
    use_embedded_content = True
    no_stylesheets = True
    auto_cleanup = False
    encoding = 'utf-8'
    feeds          = [
 ('Latest tips',
 'http://feeds2.feedburner.com/DailyWritingTips'),
 ]
--- a/recipes/datasport.recipe
+++ b/recipes/datasport.recipe
@ -0,0 +1,15 @@
 __license__   = 'GPL v3'
 __author__    = 'faber1971'
 description   = 'Italian soccer news website - v1.00 (17, December 2011)'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1324114272(BasicNewsRecipe):
    title          = u'Datasport'
    language = 'it'
    __author__ = 'faber1971'
    oldest_article = 1
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Datasport', u'http://www.datasport.it/calcio/rss.xml')]
--- a/recipes/descopera_org.recipe
+++ b/recipes/descopera_org.recipe
@ -0,0 +1,27 @@
 # -*- coding: utf-8 -*-
 '''
 descopera.org
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Descopera(BasicNewsRecipe):
    title = u'Descoperă.org'
    __author__  = 'Marius Ignătescu'
    description = 'Descoperă. Placerea de a cunoaște'
    publisher = 'descopera.org'
    category = 'science, technology, culture, history, earth'
    language = 'ro'
    oldest_article = 14
    max_articles_per_feed = 100
    encoding = 'utf8'
    no_stylesheets = True
    extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
    keep_only_tags    = [dict(name='div', attrs={'class':['post']})]
    remove_tags = [dict(name='div', attrs={'class':['topnav', 'box_a', 'shr-bookmarks shr-bookmarks-expand shr-bookmarks-center shr-bookmarks-bg-knowledge']})]
    remove_attributes = ['width','height']
    cover_url = 'http://www.descopera.org/wp-content/themes/dorg/styles/default/img/b_top.png?width=400'
    feeds  = [(u'Articles', u'http://www.descopera.org/feed/')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/di.recipe
+++ b/recipes/di.recipe
@ -46,7 +46,8 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
 		dict(name = 'div', attrs = {'class' : 'poradniki_context'}),
 		dict(name = 'div', attrs = {'class' : 'uniBox'}),
 		dict(name = 'object', attrs = {}),
-		dict(name = 'h3', attrs = {})
+		dict(name = 'h3', attrs = {}),
 		dict(attrs={'class':'twitter-share-button'})
 	]
 	preprocess_regexps = [
@ -58,3 +59,8 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
 			(r'\s*</', lambda match: '</'),
 		]
 	]
 	def skip_ad_pages(self, soup):
 		if 'Advertisement' in soup.title:
 			nexturl=soup.find('a')['href']
 			return self.index_to_soup(nexturl, raw=True)
--- a/recipes/dziennik_pl.recipe
+++ b/recipes/dziennik_pl.recipe
@ -0,0 +1,58 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class Dziennik_pl(BasicNewsRecipe):
    title          = u'Dziennik.pl'
    __author__        = 'fenuks'
    description   = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.'
    category       = 'newspaper'
    language       = 'pl'
    cover_url='http://6.s.dziennik.pl/images/og_dziennik.jpg'
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 100
    remove_javascript=True
    remove_empty_feeds=True
    preprocess_regexps     = [(re.compile("Komentarze:"), lambda m: '')]
    keep_only_tags=[dict(id='article')]
    remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget']}), dict(name='a', attrs={'class':'komentarz'})]
    feeds          = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
 		(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
 		(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
 		(u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'),
 		(u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'),
 		(u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'),
 		(u'Film', u'http://rss.dziennik.pl/Dziennik-Film'),
 		(u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'),
 		(u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'),
 		(u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'),
 		(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
 		(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]
    def append_page(self, soup, appendtag):
        tag=soup.find('a', attrs={'class':'page_next'})
        if tag:
            appendtag.find('div', attrs={'class':'article_paginator'}).extract()
        while tag:
            soup2= self.index_to_soup(tag['href'])
            tag=soup2.find('a', attrs={'class':'page_next'})
            if not tag:
                for r in appendtag.findAll('div', attrs={'class':'art_src'}):
                    r.extract()
            pagetext = soup2.find(name='div', attrs={'class':'article_body'})
            for dictionary in self.remove_tags:
                 v=pagetext.findAll(name=dictionary['name'], attrs=dictionary['attrs'])
                 for delete in v:
                     delete.extract()
            pos = len(appendtag.contents)
            appendtag.insert(pos, pagetext)
            if appendtag.find('div', attrs={'class':'article_paginator'}):
                appendtag.find('div', attrs={'class':'article_paginator'}).extract()
    def preprocess_html(self, soup):
         self.append_page(soup, soup.body)
         return soup
--- a/recipes/echo_online.recipe
+++ b/recipes/echo_online.recipe
@ -0,0 +1,47 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
 '''
 Fetch echo-online.de
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Echo_Online(BasicNewsRecipe):
    title          = u' Echo Online'
    description = '-Echo Online-'
    publisher = 'Echo Online GmbH'
    category = 'News, Germany'
    __author__ = 'Armin Geller' # 2011-12-17
    language = 'de'
    lang = 'de-DE'
    encoding = 'iso-8859-1'
    timefmt = ' [%a, %d %b %Y]'
    oldest_article = 7
    max_articles_per_feed = 2
    no_stylesheets = True
    auto_cleanup = True
    remove_javascript = True
    feeds = [
              (u'Topnews', u'http://www.echo-online.de/storage/rss/rss/topnews.xml'),
              (u'Darmstadt', u'http://www.echo-online.de/rss/darmstadt.xml'),
              (u'Darmstadt-Dieburg', u'http://www.echo-online.de/rss/darmstadtdieburg.xml'),
              (u'Kreis Gro\xdf-Gerau', u'http://www.echo-online.de/rss/kreisgrossgerau.xml'),
              (u'R\xfcsselsheim', u'http://www.echo-online.de/rss/ruesselsheim.xml'),
              (u'Kreis Bergstra\xdfe', u'http://www.echo-online.de/rss/bergstrasse.xml'),
              (u'Odenwaldkreis', u'http://www.echo-online.de/rss/odenwald.xml'),
              (u'SV 98', u'http://www.echo-online.de/rss/sv98.xml'),
              (u'Kino', u'http://www.echo-online.de/rss/kino.xml'),
              (u'Ausstellungen', u'http://www.echo-online.de/rss/ausstellungen.xml'),
              (u'Ausflug & Reise', u'http://www.echo-online.de/rss/ausflugreise.xml'),
             ]
    def print_version(self, url):
          return self.browser.open_novisit(url).geturl() + '?_FRAME=33&_FORMAT=PRINT'
    remove_tags = [dict(name='div', attrs={'class':["header", "name"]}),]
    auto_cleanup_keep = '//div[@class="bild_gross w270"]'
 #    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-ash2/41801_145340745513489_893927_n.jpg' # 2011-12-16 AGe
    cover_url = 'http://adcounter.darmstaedter-echo.de/webdav/files/config/gui/images/Zeitungsfaecher.gif' # 2011-12-16 AGe
--- a/recipes/economist.recipe
+++ b/recipes/economist.recipe
@ -55,12 +55,17 @@ class Economist(BasicNewsRecipe):
    '''
    def get_cover_url(self):
-        br = self.browser
+        soup = self.index_to_soup('http://www.economist.com/printedition/covers')
-        br.open(self.INDEX)
+        div = soup.find('div', attrs={'class':lambda x: x and
-        issue = br.geturl().split('/')[4]
+            'print-cover-links' in x})
-        self.log('Fetching cover for issue: %s'%issue)
+        a = div.find('a', href=True)
-        cover_url = "http://media.economist.com/sites/default/files/imagecache/print-cover-full/print-covers/%s_CNA400.jpg" %(issue.translate(None,'-'))
+        url = a.get('href')
-        return cover_url
+        if url.startswith('/'):
            url = 'http://www.economist.com' + url
        soup = self.index_to_soup(url)
        div = soup.find('div', attrs={'class':'cover-content'})
        img = div.find('img', src=True)
        return img.get('src')
    def parse_index(self):
        return self.economist_parse_index()
--- a/recipes/economist_free.recipe
+++ b/recipes/economist_free.recipe
@ -39,13 +39,17 @@ class Economist(BasicNewsRecipe):
    delay = 1
    def get_cover_url(self):
-        br = self.browser
+        soup = self.index_to_soup('http://www.economist.com/printedition/covers')
-        br.open(self.INDEX)
+        div = soup.find('div', attrs={'class':lambda x: x and
-        issue = br.geturl().split('/')[4]
+            'print-cover-links' in x})
-        self.log('Fetching cover for issue: %s'%issue)
+        a = div.find('a', href=True)
-        cover_url = "http://media.economist.com/sites/default/files/imagecache/print-cover-full/print-covers/%s_CNA400.jpg" %(issue.translate(None,'-'))
+        url = a.get('href')
-        return cover_url
+        if url.startswith('/'):
-
+            url = 'http://www.economist.com' + url
        soup = self.index_to_soup(url)
        div = soup.find('div', attrs={'class':'cover-content'})
        img = div.find('img', src=True)
        return img.get('src')
    def parse_index(self):
        try:
--- a/recipes/el_periodico.recipe
+++ b/recipes/el_periodico.recipe
@ -5,12 +5,11 @@ __license__     = 'GPL v3'
 __copyright__   = '04 December 2010, desUBIKado'
 __author__      = 'desUBIKado'
 __description__ = 'Daily newspaper from Aragon'
-__version__     = 'v0.07'
+__version__     = 'v0.08'
-__date__        = '06, February 2011'
+__date__        = '13, November 2011'
 '''
 elperiodicodearagon.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
@ -20,13 +19,13 @@ class elperiodicodearagon(BasicNewsRecipe):
    description           = u'Noticias desde Aragon'
    publisher             = u'elperiodicodearagon.com'
    category              = u'news, politics, Spain, Aragon'
-    oldest_article        = 2
+    oldest_article        = 1
    delay                 = 0
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    language              = 'es'
-    encoding              = 'utf8'
+    encoding              = 'iso-8859-1'
    remove_empty_feeds    = True
    remove_javascript     = True
@ -39,61 +38,30 @@ class elperiodicodearagon(BasicNewsRecipe):
                         }
    feeds              = [
-                           (u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
+                           (u'Portada', u'http://zetaestaticos.com/aragon/rss/portada_es.xml'),
-                           (u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
+                           (u'Arag\xf3n', u'http://zetaestaticos.com/aragon/rss/2_es.xml'),
-                           (u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
+                           (u'Internacional', u'http://zetaestaticos.com/aragon/rss/4_es.xml'),
-                           (u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
+                           (u'Espa\xf1a', u'http://zetaestaticos.com/aragon/rss/3_es.xml'),
-                           (u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
+                           (u'Econom\xeda', u'http://zetaestaticos.com/aragon/rss/5_es.xml'),
-                           (u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
+                           (u'Deportes', u'http://zetaestaticos.com/aragon/rss/7_es.xml'),
-                           (u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
+                           (u'Real Zaragoza', u'http://zetaestaticos.com/aragon/rss/10_es.xml'),
-                           (u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
+                           (u'CAI Zaragoza', u'http://zetaestaticos.com/aragon/rss/91_es.xml'),
-                           (u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
+                           (u'Monta\xf1ismo', u'http://zetaestaticos.com/aragon/rss/354_es.xml'),
-                           (u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')
+                           (u'Opini\xf3n', u'http://zetaestaticos.com/aragon/rss/103_es.xml'),
                           (u'Tema del d\xeda', u'http://zetaestaticos.com/aragon/rss/102_es.xml'),
                           (u'Escenarios', u'http://zetaestaticos.com/aragon/rss/105_es.xml'),
                           (u'Sociedad', u'http://zetaestaticos.com/aragon/rss/104_es.xml'),
                           (u'Gente', u'http://zetaestaticos.com/aragon/rss/330_es.xml'),
                           (u'Espacio 3', u'http://zetaestaticos.com/aragon/rss/328_es.xml'),
                           (u'Fiestas del Pilar', u'http://zetaestaticos.com/aragon/rss/107_es.xml')
                         ]
    extra_css = '''
                    h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
                    h2 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
                    h4 {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:20px;}
                    .columnaDeRecursosRelacionados {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
                    img{margin-bottom: 0.4em}
 		'''
    remove_attributes = ['height','width']
-    keep_only_tags     = [dict(name='div', attrs={'id':'contenidos'})]
+    keep_only_tags     = [dict(name='div', attrs={'id':'Noticia'})]
    # Quitar toda la morralla
    remove_tags        = [dict(name='ul', attrs={'class':'herramientasDeNoticia'}),
                          dict(name='span', attrs={'class':'MasInformacion '}),
                          dict(name='span', attrs={'class':'MasInformacion'}),
                          dict(name='div', attrs={'class':'Middle'}),
                          dict(name='div', attrs={'class':'MenuCabeceraRZaragoza'}),
                          dict(name='div', attrs={'id':'MenuCabeceraRZaragoza'}),
                          dict(name='div', attrs={'class':'MenuEquipo'}),
                          dict(name='div', attrs={'class':'TemasRelacionados'}),
                          dict(name='div', attrs={'class':'GaleriaEnNoticia'}),
                          dict(name='div', attrs={'class':'Recorte'}),
                          dict(name='div', attrs={'id':'NoticiasenRecursos'}),
                          dict(name='div', attrs={'id':'NoticiaEnPapel'}),
                          dict(name='p', attrs={'class':'RecorteEnNoticias'}),
                          dict(name='div', attrs={'id':'Comparte'}),
                          dict(name='div', attrs={'id':'CajaComparte'}),
                          dict(name='a', attrs={'class':'EscribirComentario'}),
                          dict(name='a', attrs={'class':'AvisoComentario'}),
                          dict(name='div', attrs={'class':'CajaAvisoComentario'}),
                          dict(name='div', attrs={'class':'navegaNoticias'}),
                          dict(name='div', attrs={'class':'Mensaje'}),
                          dict(name='div', attrs={'id':'PaginadorDiCom'}),
                          dict(name='div', attrs={'id':'CajaAccesoCuentaUsuario'}),
                          dict(name='div', attrs={'id':'CintilloComentario'}),
                          dict(name='div', attrs={'id':'EscribeComentario'}),
                          dict(name='div', attrs={'id':'FormularioComentario'}),
                          dict(name='div', attrs={'id':'FormularioNormas'})]
    # Recuperamos la portada de papel (la imagen format=1 tiene mayor resolucion)
    def get_cover_url(self):
@ -104,23 +72,7 @@ class elperiodicodearagon(BasicNewsRecipe):
              return image['src'].rstrip('format=2') + 'format=1'
        return None
-    # Para quitar espacios entre la noticia y los comentarios (lineas 1 y 2)
+    # Usamos la versión para móviles
    # El indice no apuntaba correctamente al empiece de la noticia (linea 3)
-    preprocess_regexps = [
+    def print_version(self, url):
-        (re.compile(r'<p>&nbsp;</p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
+          return url.replace('http://www.elperiodicodearagon.com/', 'http://www.elperiodicodearagon.com/m/')
        (re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'<p id="">', re.DOTALL|re.IGNORECASE), lambda match: '<p>')
        ]
    # Para sustituir el video incrustado de YouTube por una imagen
    def preprocess_html(self, soup):
        for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
            if video_yt:
               video_yt.name = 'img'
               fuente = video_yt['src']
               fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
               video_yt['src'] = fuente2 + '/0.jpg'
        return soup
--- a/recipes/elet_es_irodalom.recipe
+++ b/recipes/elet_es_irodalom.recipe
@ -0,0 +1,48 @@
 ################################################################################
 #Description:	  http://es.hu/ RSS channel
 #Author: 	  Bigpapa (bigpapabig@hotmail.com)
 #Date:	  2010.12.01. - V1.0
 ################################################################################
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class elet_es_irodalom(BasicNewsRecipe):
    title                  = u'Elet es Irodalom'
    __author__             = 'Bigpapa'
    oldest_article         = 7
    max_articles_per_feed  = 20	# Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
    no_stylesheets         = True
    #delay                  = 1
    use_embedded_content   = False
    encoding               = 'iso-8859-2'
    category               = 'Cikkek'
    language               = 'hu'
    publication_type       = 'newsportal'
    extra_css              = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} '
    keep_only_tags    = [
                       dict(name='div', attrs={'class':['doc_author', 'doc_title', 'doc']})
                       	 ]
    remove_tags = [
 	 dict(name='a', attrs={'target':['_TOP']}),
 	dict(name='div', attrs={'style':['float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}),
                      	  ]
    feeds          = [
 	(u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'), 
 	(u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'), 
 	(u'Visszhang', 'http://www.feed43.com/3727375706873086.xml'), 
 	(u'P\xe1ratlan oldal', 'http://www.feed43.com/2525784782475057.xml'), 
 	(u'Feuilleton', 'http://www.feed43.com/7216025082703073.xml'), 
 	(u'Pr\xf3za', 'http://www.feed43.com/8760248802326384.xml'), 
 	(u'Vers', 'http://www.feed43.com/1737324675134275.xml'), 
 	(u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'), 	
 	(u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml')
                   	 ]
--- a/recipes/elmundo.recipe
+++ b/recipes/elmundo.recipe
@ -4,7 +4,8 @@ __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 elmundo.es
 '''
-
+import re
 import time
 from calibre.web.feeds.news import BasicNewsRecipe
 class ElMundo(BasicNewsRecipe):
@ -18,12 +19,15 @@ class ElMundo(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'iso8859_15'
    remove_javascript     = True
    remove_empty_feeds    = True
    language              = 'es'
    masthead_url          = 'http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png'
    publication_type      = 'newspaper'
    extra_css             = """
                               body{font-family: Arial,Helvetica,sans-serif}
                               .metadata_noticia{font-size: small}
                               .pestana_GDP{font-size: small; font-weight:bold}
                               h1,h2,h3,h4,h5,h6,.subtitulo {color: #3F5974}
                               .hora{color: red}
                               .update{color: gray}
@ -41,8 +45,11 @@ class ElMundo(BasicNewsRecipe):
    remove_tags_after  = dict(name='div' , attrs={'id':['desarrollo_noticia','tamano']})
    remove_attributes  = ['lang','border']
    remove_tags = [
-                     dict(name='div', attrs={'class':['herramientas','publicidad_google']})
+                     dict(name='div', attrs={'class':['herramientas','publicidad_google','comenta','col col-2b','apoyos','no-te-pierdas']})
-                    ,dict(name='div', attrs={'id':'modulo_multimedia' })
+                    ,dict(name='div', attrs={'class':['publicidad publicidad_cuerpo_noticia','comentarios_nav','mensaje_privado','interact']})
                    ,dict(name='div', attrs={'class':['num_comentarios estirar']})
                    ,dict(name='span', attrs={'class':['links_comentar']})
                    ,dict(name='div', attrs={'id':['comentar']})
                    ,dict(name='ul', attrs={'class':'herramientas' })
                    ,dict(name=['object','link','embed','iframe','base','meta'])
                  ]
@ -50,13 +57,31 @@ class ElMundo(BasicNewsRecipe):
    feeds = [
              (u'Portada'         , u'http://estaticos.elmundo.es/elmundo/rss/portada.xml'       )                                      
             ,(u'Deportes'        , u'http://estaticos.elmundo.es/elmundodeporte/rss/portada.xml')
-             ,(u'Economia'        , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml'      )
+             ,(u'Econom\xeda'     , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml'      )
-             ,(u'Espana'          , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml'        )
+             ,(u'Espa\xf1a'       , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml'        )
             ,(u'Internacional'   , u'http://estaticos.elmundo.es/elmundo/rss/internacional.xml' )
             ,(u'Cultura'         , u'http://estaticos.elmundo.es/elmundo/rss/cultura.xml'       )
-             ,(u'Ciencia/Ecologia', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml'       )
+             ,(u'Ciencia/Ecolog\xeda', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml'    )
-             ,(u'Comunicacion'    , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml'  )
+             ,(u'Comunicaci\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml'  )
-             ,(u'Television'      , u'http://estaticos.elmundo.es/elmundo/rss/television.xml'    )
+             ,(u'Televisi\xf3n'   , u'http://estaticos.elmundo.es/elmundo/rss/television.xml'    )
             ,(u'Salud'           , u'http://estaticos.elmundo.es/elmundosalud/rss/portada.xml'  )
             ,(u'Solidaridad'     , u'http://estaticos.elmundo.es/elmundo/rss/solidaridad.xml'   )
             ,(u'Su vivienda'     , u'http://estaticos.elmundo.es/elmundo/rss/suvivienda.xml'    )             
             ,(u'Motor'           , u'http://estaticos.elmundo.es/elmundomotor/rss/portada.xml'  )             
             ,(u'Madrid'          , u'http://estaticos.elmundo.es/elmundo/rss/madrid.xml'        )
             ,(u'Barcelona'       , u'http://estaticos.elmundo.es/elmundo/rss/barcelona.xml'     )
             ,(u'Pa\xeds Vasco'   , u'http://estaticos.elmundo.es/elmundo/rss/paisvasco.xml'     )	     
             ,(u'Baleares'        , u'http://estaticos.elmundo.es/elmundo/rss/baleares.xml'      )
 	     ,(u'Castilla y Le\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/castillayleon.xml' )	     
 	     ,(u'Valladolid'      , u'http://estaticos.elmundo.es/elmundo/rss/valladolid.xml'    )
 	     ,(u'Valencia'        , u'http://estaticos.elmundo.es/elmundo/rss/valencia.xml'      )
 	     ,(u'Alicante'        , u'http://estaticos.elmundo.es/elmundo/rss/alicante.xml'      )
 	     ,(u'Castell\xf3n'    , u'http://estaticos.elmundo.es/elmundo/rss/castellon.xml'     )	
 	     ,(u'Andaluc\xeda'    , u'http://estaticos.elmundo.es/elmundo/rss/andalucia.xml'     )
 	     ,(u'Sevilla'         , u'http://estaticos.elmundo.es/elmundo/rss/andalucia_sevilla.xml'  )
 	     ,(u'M\xe1laga'       , u'http://estaticos.elmundo.es/elmundo/rss/andalucia_malaga.xml'   )
            ]
    def preprocess_html(self, soup):
@ -67,3 +92,34 @@ class ElMundo(BasicNewsRecipe):
    def get_article_url(self, article):
        return article.get('guid',  None)
    preprocess_regexps = [     
                           # Para presentar la imagen de los videos incrustados                           
                           (re.compile(r'var imagen', re.DOTALL|re.IGNORECASE), lambda match: '--></script><img src'),
                           (re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
                           (re.compile(r'var video=', re.DOTALL|re.IGNORECASE), lambda match: '<script language="Javascript" type="text/javascript"><!--'),
                           # Para que no salga la numeración de comentarios: 1, 2, 3 ...
                           (re.compile(r'<ol>\n<li style="z-index:', re.DOTALL|re.IGNORECASE), lambda match: '<ul><li style="z-index:'),
                           (re.compile(r'</ol>\n<div class="num_comentarios estirar">', re.DOTALL|re.IGNORECASE), lambda match: '</ul><div class="num_comentarios estirar">'),
                         ]
    # Obtener la imagen de portada
    def get_cover_url(self):
       cover = None
       st = time.localtime()
       year = str(st.tm_year)
       month = "%.2d" % st.tm_mon
       day = "%.2d" % st.tm_mday
 		#http://img.kiosko.net/2011/11/19/es/elmundo.750.jpg
       cover='http://img.kiosko.net/'+ year + '/' +  month + '/' + day +'/es/elmundo.750.jpg'
       br = BasicNewsRecipe.get_browser()
       try:
           br.open(cover)
       except:
           self.log("\nPortada no disponible")
           cover ='http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png'
       return cover 
--- a/recipes/emuzica_pl.recipe
+++ b/recipes/emuzica_pl.recipe
@ -0,0 +1,16 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class eMuzyka(BasicNewsRecipe):
    title          = u'eMuzyka'
    __author__        = 'fenuks'
    description   = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
    category       = 'music'
    language       = 'pl'
    cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 100
    keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
    remove_tags=[dict(name='span', attrs={'id':'date'})]
    feeds          = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
--- a/recipes/expansion_spanish.recipe
+++ b/recipes/expansion_spanish.recipe
@ -1,35 +1,43 @@
 #!/usr/bin/env  python
-__license__   = 'GPL v3'
+__license__     = 'GPL v3'
-__author__    = 'Gerardo Diez'
+__copyright__   = '5, January 2011 Gerardo Diez<gerardo.diez.garcia@gmail.com> & desUBIKado'
-__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
+__author__      = 'desUBIKado, based on an earlier version by Gerardo Diez'
-description   = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
+__version__     = 'v1.01'
-__docformat__ = 'restructuredtext en'
+__date__        = '13, November 2011'
 '''
-expansion.es
+[url]http://www.expansion.com/[/url]
 '''
 import time
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
-class Publico(BasicNewsRecipe):
+
-    title               =u'Expansion.com'
+class expansion_spanish(BasicNewsRecipe):
-    __author__      ='Gerardo Diez'
+    __author__      ='Gerardo Diez & desUBIKado'
-    publisher       =u'Unidad Editorial Información Económica, S.L.'
+    description     ='Financial news from Spain'
-    category                ='finances, catalunya'
+    title           =u'Expansion'
-    oldest_article      =1
+    publisher       =u'Unidad Editorial Internet, S.L.'
    category        ='news, finances, Spain'
    oldest_article  = 2
    simultaneous_downloads = 10
    max_articles_per_feed   =100
-    simultaneous_downloads  =10
+    timefmt         = '[%a, %d %b, %Y]'
-    cover_url       =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
+    encoding        ='iso-8859-15'
    timefmt         ='[%A, %d %B, %Y]'
    encoding        ='latin'
    language        ='es'
-    remove_javascript   =True
+    use_embedded_content  = False
-    no_stylesheets      =True
+    remove_javascript     = True
    no_stylesheets        = True
    remove_empty_feeds    = True
    keep_only_tags      =dict(name='div', attrs={'class':['noticia primer_elemento']})
    remove_tags         =[
-                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
+                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto','tit_relacionadas','interact','paginacion estirar','sumario derecha']}),
-                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
+                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia','not_logged']}),
                dict(name='span', attrs={'class':['comentarios']}),
                dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
-                dict(name='div', attrs={'id':['comentarios_lectores_listado']})
+                dict(name='div', attrs={'id':['comentarios_lectores_listado','comentar']})
                            ]
    feeds               =[
                (u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
@ -38,42 +46,112 @@ class Publico(BasicNewsRecipe):
                (u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
                (u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
                (u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),
                (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
                (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
-                (u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
+                (u'Cr\xe9ditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
                (u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
-                (u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
+                (u'Fondos de Inversi\xf3n', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
                (u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),
                (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
                (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
                (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
-                (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
+                (u'Energ\xeda', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
-                (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
+                (u'Inmobiliario y Construcci\xf3n', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
                (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
-                (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
+                (u'Automoci\xf3n e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
-                (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
+                (u'Distribuci\xf3n', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
-                (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
+                (u'Deporte y Negocio', u' [url]http://estaticos.expansion.com/rss/empresasdeporte.xml[/url]'),
                (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
                (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
                (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),
-
+                (u'Portada: Econom\xeda y Pol\xedtica', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
-                (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
+                (u'Pol\xedtica', u'http://estaticos.expansion.com/rss/economia.xml'),
                (u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
                (u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),
-
+                (u'Portada: Opini\xf3n', u'http://estaticos.expansion.com/rss/opinion.xml'),
                (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
                (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
                (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),
-
+                (u'Portada: Jur\xeddico', u'http://estaticos.expansion.com/rss/juridico.xml'),
                (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
                (u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
-                (u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
+                (u'Opini\xf3n', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
                (u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),
                (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
-                (u'Catalu&ntilde;a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
+                (u'Catalu\xf1a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
-                (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
+                (u'Funci\xf3n p\xfablica', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
                ]
    # Obtener la imagen de portada
    def get_cover_url(self):
       cover = None
       st = time.localtime()
       year = str(st.tm_year)
       month = "%.2d" % st.tm_mon
       day = "%.2d" % st.tm_mday
 		#[url]http://img5.kiosko.net/2011/11/14/es/expansion.750.jpg[/url]
       cover='http://img5.kiosko.net/'+ year + '/' +  month + '/' + day +'/es/expansion.750.jpg'
       br = BasicNewsRecipe.get_browser()
       try:
           br.open(cover)
       except:
           self.log("\nPortada no disponible")
           cover ='http://www.aproahp.org/enlaces/images/diario_expansion.gif'
       return cover
    # Para que no salte la publicidad al recuperar la noticia, y que siempre se recupere
    # la página web, mando la variable "t" con la hora "linux" o "epoch" actual
    # haciendole creer al sitio web que justo se acaba de ver la publicidad
    def print_version(self, url):
           st = time.time()
           segundos = str(int(st))
           parametros = '.html?t=' + segundos
           return url.replace('.html', parametros)
    _processed_links = []
    def get_article_url(self, article):
       # Para obtener la url original del artículo a partir de la de "feedsportal"
       link = article.get('link', None)
       if link is None:
           return article
       if link.split('/')[-1]=="story01.htm":
           link=link.split('/')[-2]
           a=['0B','0C','0D','0E','0F','0G','0N'  ,'0L0S','0A']
           b=['.' ,'/' ,'?' ,'-' ,'=' ,'&' ,'.com','www.','0']
           for i in range(0,len(a)):
              link=link.replace(a[i],b[i])
           link="http://"+link
       # Eliminar artículos duplicados en otros feeds
       if not (link in self._processed_links):
            self._processed_links.append(link)
       else:
            link = None
       return link
    # Un poco de css para mejorar la presentación de las noticias
    extra_css = '''
                    .entradilla {font-family:Arial,Helvetica,sans-serif; font-weight:bold; font-style:italic; font-size:16px;}
                    .fecha_publicacion,.autor {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
                '''
    # Para presentar la imagen de los videos incrustados
    preprocess_regexps = [
                           (re.compile(r'var imagen', re.DOTALL|re.IGNORECASE), lambda match: '--></script><img src'),
                           (re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
                           (re.compile(r'var id_reproductor', re.DOTALL|re.IGNORECASE), lambda match: '<script language="Javascript" type="text/javascript"><!--'),
                         ]
--- a/recipes/fisco_oggi.recipe
+++ b/recipes/fisco_oggi.recipe
@ -0,0 +1,18 @@
 __license__   = 'GPL v3'
 __author__    = 'faber1971'
 description   = 'Website of Italian Governament Income Agency (about revenue, taxation, taxes)- v1.00 (17, December 2011)'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1324112023(BasicNewsRecipe):
    title          = u'Fisco Oggi'
    language = 'it'
    __author__ = 'faber1971'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    remove_javascript = True
    no_stylesheets = True
    feeds          = [(u'Attualit\xe0', u'http://www.fiscooggi.it/taxonomy/term/1/feed'), (u'Normativa', u'http://www.fiscooggi.it/taxonomy/term/5/feed'), (u'Giurisprudenza', u'http://www.fiscooggi.it/taxonomy/term/8/feed'), (u'Dati e statistiche', u'http://www.fiscooggi.it/taxonomy/term/12/feed'), (u'Analisi e commenti', u'http://www.fiscooggi.it/taxonomy/term/13/feed'), (u'Bilancio e contabilit\xe0', u'http://www.fiscooggi.it/taxonomy/term/576/feed'), (u'Dalle regioni', u'http://www.fiscooggi.it/taxonomy/term/16/feed'), (u'Dal mondo', u'http://www.fiscooggi.it/taxonomy/term/17/feed')]
--- a/recipes/focus_pl.recipe
+++ b/recipes/focus_pl.recipe
@ -1,57 +1,68 @@
-# -*- coding: utf-8 -*-
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
-class Focus_pl(BasicNewsRecipe):
+class FocusRecipe(BasicNewsRecipe):
-    title          = u'Focus.pl'
+    __license__ = 'GPL v3'
-    oldest_article = 15
+    __author__ = u'intromatyk <intromatyk@gmail.com>'
-    max_articles_per_feed = 100
+    language = 'pl'
-    __author__        = 'fenuks'
+    version = 1
-    language       = 'pl'
+
-    description ='polish scientific monthly magazine'
+    title = u'Focus'
    publisher = u'Gruner + Jahr Polska'
    category = u'News'
    description = u'Newspaper'
    category='magazine'
    cover_url=''
    remove_empty_feeds= True
    no_stylesheets=True
-    remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'})
+    oldest_article = 7
-    remove_tags_after=dict(name='div', attrs={'class':'clear'})
+    max_articles_per_feed = 100000
-    feeds          = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'),
+    recursions = 0
-	(u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
+
-	(u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
+    no_stylesheets = True
-	(u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
+    remove_javascript = True
-	(u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
+    encoding = 'utf-8'
-	(u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
+    # Seems to work best, but YMMV
-	(u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
+    simultaneous_downloads = 5
-	(u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
+
-	(u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'),
+    r = re.compile('.*(?P<url>http:\/\/(www.focus.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
    keep_only_tags =[]
    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'cll'}))
    remove_tags =[]
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulm noprint'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'txb'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'h2'}))
    remove_tags.append(dict(name = 'ul', attrs = {'class' : 'txu'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulc'}))
    extra_css = '''
                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
                    h1{text-align: left;}
                    h2{font-size: medium; font-weight: bold;}
                    p.lead {font-weight: bold; text-align: left;}
                    .authordate {font-size: small; color: #696969;}
                    .fot{font-size: x-small; color: #666666;}
                    '''    
-
+    feeds          = [
-]
+                            ('Nauka', 'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
                            ('Historia', 'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
                            ('Cywilizacja', 'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
                            ('Sport', 'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
                            ('Technika', 'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
                            ('Przyroda', 'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
                            ('Technologie', 'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),                            
                          ]
    def skip_ad_pages(self, soup):
-          tag=soup.find(name='a')
+        if ('advertisement' in soup.find('title').string.lower()):
-          if tag:
+            href = soup.find('a').get('href')
-            new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True)
+            return self.index_to_soup(href, raw=True)
-            return new_soup
+        else:
-
+            return None
    def append_page(self, appendtag):
        tag=appendtag.find(name='div', attrs={'class':'arrows'})
        if tag:
            nexturl='http://www.focus.pl/'+tag.a['href']
            for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
                rem.extract()
            while nexturl:
                 soup2=self.index_to_soup(nexturl)
                 nexturl=None
                 pagetext=soup2.find(name='div', attrs={'class':'txt'})
                 tag=pagetext.find(name='div', attrs={'class':'arrows'})
                 for r in tag.findAll(name='a'):
                     if u'Następne' in r.string:
                         nexturl='http://www.focus.pl/'+r['href']
                 for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
                     rem.extract()
                 pos = len(appendtag.contents)
                 appendtag.insert(pos, pagetext)
    def get_cover_url(self):
        soup=self.index_to_soup('http://www.focus.pl/magazyn/')
@ -60,7 +71,14 @@ class Focus_pl(BasicNewsRecipe):
            self.cover_url='http://www.focus.pl/' + tag.a['href']
            return getattr(self, 'cover_url', self.cover_url)
-
+    def print_version(self, url):
-    def preprocess_html(self, soup):
+     if url.count ('focus.pl.feedsportal.com'):
-         self.append_page(soup.body)
+            u = url.find('focus0Bpl')
-         return soup
+            u = 'http://www.focus.pl/' + url[u + 11:]
            u = u.replace('0C', '/')
            u = u.replace('A', '')
            u = u.replace ('0E','-')
            u = u.replace('/nc/1//story01.htm', '/do-druku/1')
     else:
            u = url.replace('/nc/1','/do-druku/1')           
     return u
--- a/recipes/folhadesaopaulo_sub.recipe
+++ b/recipes/folhadesaopaulo_sub.recipe
@ -8,31 +8,35 @@ class FSP(BasicNewsRecipe):
    __author__ = 'fluzao'
    description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
                  u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
-    INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
+
    #found this to be the easiest place to find the index page (13-Nov-2011).
    #  searching for the "Indice Geral" link
    HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
    masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
    language = 'pt'
    no_stylesheets = True
    max_articles_per_feed  = 40
    remove_javascript     = True
    needs_subscription = True
-    remove_tags_before = dict(name='b')
+
    remove_tags_before = dict(name='p')
    remove_tags  = [dict(name='td', attrs={'align':'center'})]
    remove_attributes = ['height','width']
    masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
    # fixes the problem with the section names
    section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \
                    'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \
                    'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \
-                    'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio'}
+                    'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio', \
                    'imoveis' : u'im\xf3veis', 'negocios' : u'neg\xf3cios', \
                    'veiculos' : u've\xedculos', 'corrida' : 'folha corrida'}
    # this solves the problem with truncated content in Kindle
    conversion_options = {'linearize_tables' : True}
    # this bit removes the footer where there are links for Proximo Texto, Texto Anterior,
    #    Indice e Comunicar Erros
-    preprocess_regexps = [(re.compile(r'<BR><BR>Texto Anterior:.*<!--/NOTICIA-->',
+    preprocess_regexps = [(re.compile(r'<!--/NOTICIA-->.*Comunicar Erros</a>',
                                      re.DOTALL|re.IGNORECASE), lambda match: r''),
                          (re.compile(r'<BR><BR>Pr&oacute;ximo Texto:.*<!--/NOTICIA-->',
                                      re.DOTALL|re.IGNORECASE), lambda match: r'')]
    def get_browser(self):
@ -49,7 +53,25 @@ class FSP(BasicNewsRecipe):
    def parse_index(self):
-        soup = self.index_to_soup(self.INDEX)
+        #Searching for the index page on the HOMEPAGE
        hpsoup = self.index_to_soup(self.HOMEPAGE)
        indexref = hpsoup.find('a', href=re.compile('^indices.*'))
        self.log('--> tag containing the today s index: ', indexref)
        INDEX = indexref['href']
        INDEX = 'http://www1.folha.uol.com.br/fsp/'+INDEX
        self.log('--> INDEX after extracting href and adding prefix: ', INDEX)
        # ... and taking the opportunity to get the cover image link
        coverurl = hpsoup.find('a', href=re.compile('^cp.*'))['href']
        if coverurl:
            self.log('--> tag containing the today s cover: ', coverurl)
            coverurl = coverurl.replace('htm', 'jpg')
            coverurl = 'http://www1.folha.uol.com.br/fsp/images/'+coverurl
            self.log('--> coverurl after extracting href and adding prefix: ', coverurl)
            self.cover_url = coverurl
        #soup = self.index_to_soup(self.INDEX)
        soup = self.index_to_soup(INDEX)
        feeds = []
        articles = []
        section_title = "Preambulo"
@ -68,8 +90,12 @@ class FSP(BasicNewsRecipe):
                self.log('--> new section title:   ', section_title)
            if strpost.startswith('<a href'):
                url = post['href']
                #this bit is kept if they ever go back to the old format (pre Nov-2011)
                if url.startswith('/fsp'):
                    url = 'http://www1.folha.uol.com.br'+url
                #
                if url.startswith('http://www1.folha.uol.com.br/fsp'):
                    #url = 'http://www1.folha.uol.com.br'+url
                    title = self.tag_to_string(post)
                    self.log()
                    self.log('--> post:  ', post)
@ -82,15 +108,11 @@ class FSP(BasicNewsRecipe):
        # keeping the front page url
        minha_capa = feeds[0][1][1]['url']
-        # removing the 'Preambulo' section
+        # removing the first section (now called 'top')
        del feeds[0]
        # creating the url for the cover image
        coverurl = feeds[0][1][0]['url']
        coverurl = coverurl.replace('/opiniao/fz', '/images/cp')
        coverurl = coverurl.replace('01.htm', '.jpg')
        self.cover_url = coverurl
        # inserting the cover page as the first article (nicer for kindle users)
        feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}]))
        return feeds
--- a/recipes/formulaas.recipe
+++ b/recipes/formulaas.recipe
@ -0,0 +1,50 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 formula-as.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class FormulaAS(BasicNewsRecipe):
    title                 = u'Formula AS'
    __author__            = u'Silviu Cotoar\u0103'
    publisher             = u'Formula AS'
    description           = u'Formula AS'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Romania'
    encoding              = 'utf-8'
    cover_url        	  = 'http://www.formula-as.ro/_client/img/header_logo.png'
    conversion_options = {
                'comments'    : description
                ,'tags'       : category
                ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
                      dict(name='div', attrs={'class':'item padded'})					 
                     ]
    remove_tags = [
 					dict(name='ul', attrs={'class':'subtitle lower'})
                  ]
    remove_tags_after = [
 			         dict(name='ul', attrs={'class':'subtitle lower'}),
 					 dict(name='div', attrs={'class':'item-brief-options'})					 
 	               ]
    feeds  = [
        (u'\u0218tiri', u'http://www.formula-as.ro/rss/articole.xml')
         ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/frankfurter_rundschau.recipe
+++ b/recipes/frankfurter_rundschau.recipe
@ -1,35 +1,61 @@
 #!/usr/bin/env  python
 __license__            = 'GPL v3'
 __copyright__          = '2010-2011, Christian Schmitt'
 '''
 fr-online.de
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe(BasicNewsRecipe):
-    title          = u'Frankfurter Rundschau'
+class FROnlineRecipe(BasicNewsRecipe):
-    __author__  = 'schuster'
+  title                  = 'Frankfurter Rundschau'
-    oldest_article = 1
+  __author__             = 'maccs'
-    max_articles_per_feed = 100
+  description            = 'Nachrichten aus D und aller Welt'
-    no_stylesheets         = True
+  encoding               = 'utf-8'
-    use_embedded_content   = False
+  masthead_url           = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
-    language               = 'de'
+  publisher              = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
-    remove_javascript      = True
+  category               = 'news, germany, world'
-    cover_url = 'http://www.fr-online.de/image/view/-/1474018/data/823538/-/logo.png'
+  language               = 'de'
-    extra_css = '''
+  publication_type       = 'newspaper'
-                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+  use_embedded_content   = False
-                    h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+  remove_javascript      = True
-                    img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
+  no_stylesheets         = True
-                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+  oldest_article         = 1   # Increase this number if you're interested in older articles
-                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+  max_articles_per_feed  = 50  # Seems a reasonable number to me
-	'''
+  extra_css              = '''
-
+                            body { font-family: "arial", "verdana", "geneva", sans-serif; font-size: 12px; margin: 0px; background-color: #ffffff;}
-    feeds          = [(u'Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml'),
+                            .imgSubline{background-color: #f4f4f4; font-size: 0.8em;}
-                          (u'Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml'),
+                            .p--heading-1 {font-weight: bold;}
-                          (u'Meinungen', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml'),
+                            .calibre_navbar {font-size: 0.8em; font-family: "arial", "verdana", "geneva", sans-serif;}
-                          (u'Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml'),
+                            '''
-                          (u'Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml'),
+  keep_only_tags         = [{'class':'ArticleHeadlineH1'}, {'class':'article_text'}]
-                          (u'Kultur', u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml'),
+  cover_url              = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
-                          (u'Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml'),
+  cover_margins          = (100, 150, '#ffffff')
                          (u'Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml'),
                          (u'Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml')
 ]
-    def print_version(self, url):
+  feeds = []
-        return url.replace('index.html', 'view/printVersion/-/index.html')
+  feeds.append(('Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml'))
  feeds.append(('Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml'))
  feeds.append(('Meinung', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml'))
  feeds.append(('Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml'))
  feeds.append(('Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml'))
  feeds.append(('Eintracht Frankfurt', u'http://www.fr-online.de/sport/eintracht-frankfurt/-/1473446/1473446/-/view/asFeed/-/index.xml'))
  feeds.append(('Kultur und Medien', u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml'))
  feeds.append(('Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml'))
  feeds.append(('Frankfurt', u'http://www.fr-online.de/frankfurt/-/1472798/1472798/-/view/asFeed/-/index.xml'))
  feeds.append(('Rhein-Main', u'http://www.fr-online.de/rhein-main/-/1472796/1472796/-/view/asFeed/-/index.xml'))
  feeds.append(('Hanau', u'http://www.fr-online.de/rhein-main/hanau/-/1472866/1472866/-/view/asFeed/-/index.xml'))
  feeds.append(('Darmstadt', u'http://www.fr-online.de/rhein-main/darmstadt/-/1472858/1472858/-/view/asFeed/-/index.xml'))
  feeds.append(('Wiesbaden', u'http://www.fr-online.de/rhein-main/wiesbaden/-/1472860/1472860/-/view/asFeed/-/index.xml'))
  feeds.append(('Offenbach', u'http://www.fr-online.de/rhein-main/offenbach/-/1472856/1472856/-/view/asFeed/-/index.xml'))
  feeds.append(('Bad Homburg', u'http://www.fr-online.de/rhein-main/bad-homburg/-/1472864/1472864/-/view/asFeed/-/index.xml'))
  feeds.append(('Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml'))
  feeds.append(('Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml'))
  def print_version(self, url):
    return url.replace('index.html', 'view/printVersion/-/index.html')
--- a/recipes/frazpc.recipe
+++ b/recipes/frazpc.recipe
@ -18,7 +18,7 @@ class FrazPC(BasicNewsRecipe):
    max_articles_per_feed = 100
    use_embedded_content = False
    no_stylesheets = True
-
+    cover_url='http://www.frazpc.pl/images/logo.png'
    feeds          = [
        (u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'), 
        (u'Artyku\u0142y', u'http://www.frazpc.pl/feed/artykuly')
@ -33,6 +33,7 @@ class FrazPC(BasicNewsRecipe):
        dict(name='div', attrs={'class':'comments_box'})
    ]
    remove_tags_after=dict(name='div', attrs={'class':'content'})
    preprocess_regexps = [(re.compile(r'\| <a href="#comments">Komentarze \([0-9]*\)</a>'), lambda match: '')]
    remove_attributes = [ 'width', 'height' ]
--- a/recipes/gazeta_pl_szczecin.recipe
+++ b/recipes/gazeta_pl_szczecin.recipe
@ -0,0 +1,35 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 import re
 import string
 from calibre.web.feeds.news import BasicNewsRecipe
 class GazetaPlSzczecin(BasicNewsRecipe):
    title          = u'Gazeta.pl Szczecin'
    description    = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
    __author__     = u'Michał Szkutnik'
    __license__    = u'GPL v3'
    language       = 'pl'
    publisher      = 'Agora S.A.'
    category       = 'news, szczecin'
    oldest_article = 2
    max_articles_per_feed = 100
    auto_cleanup   = True
    remove_tags    = [ { "name" : "a", "attrs" : { "href" : "http://szczecin.gazeta.pl/szczecin/www.gazeta.pl" }}]
    cover_url      = "http://bi.gazeta.pl/i/hp/hp2009/logo.gif"
    feeds          = [(u'Wszystkie', u'http://rss.feedsportal.com/c/32739/f/530434/index.rss')]
    def get_article_url(self, article):
        s = re.search("""/0L(szczecin.*)/story01.htm""", article.link)
        s = s.group(1)
        replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I"  : "_"}
        for (a, b) in replacements.iteritems():
            s = string.replace(s, a, b)
        s = string.replace(s, "0A", "0")
        return "http://"+s
    def print_version(self, url):
        s = re.search("""/(\d*),(\d*),(\d*),.*\.html""", url)
        no1 = s.group(2)
        no2 = s.group(3)
        return """http://szczecin.gazeta.pl/szczecin/2029020,%s,%s.html""" % (no1, no2)
--- a/recipes/givemesomethingtoread.recipe
+++ b/recipes/givemesomethingtoread.recipe
@ -0,0 +1,90 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class GiveMeSomethingToRead(BasicNewsRecipe):
    title          = u'Give Me Something To Read'
    description    = 'Curation / aggregation of articles on diverse topics'
    language = 'en'
    __author__     = 'barty on mobileread.com forum'
    max_articles_per_feed = 100
    no_stylesheets = False
    timefmt        = ' [%a, %d %b, %Y]'
    oldest_article = 365
    auto_cleanup   = True
    INDEX          = 'http://givemesomethingtoread.com'
    CATEGORIES     = [
        # comment out categories you don't want
        # (user friendly name, system name, max number of articles to load)
        ('The Arts','arts',25),
        ('Science','science',30),
        ('Technology','technology',30),
        ('Politics','politics',20),
        ('Media','media',30),
        ('Crime','crime',15),
        ('Other articles','',10)
        ]
    def parse_index(self):
        self.cover_url = 'http://thegretchenshow.files.wordpress.com/2009/12/well-read-cat-small.jpg'
        feeds = []
        seen_urls = set([])
        regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
        for category in self.CATEGORIES:
            (cat_name, tag, max_articles) = category
            tagurl = '' if tag=='' else '/tagged/'+tag
            self.log('Reading category:', cat_name)
            articles = []
            pageno = 1
            while len(articles) < max_articles and pageno < 100:
                page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
                pageno += 1
                self.log('\tReading page:', page)
                try:
                    soup = self.index_to_soup(page)
                except:
                    break
                headers = soup.findAll('h2')
                if len(headers) == .0:
                    break
                for header in headers:
                    atag = header.find('a')
                    url = atag['href']
                    # skip promotionals and duplicate
                    if url.startswith('http://givemesomethingtoread') or url.startswith('/') or url in seen_urls:
                        continue
                    seen_urls.add(url)
                    title = self.tag_to_string(header)
                    self.log('\tFound article:', title)
                    #self.log('\t', url)
                    desc = header.parent.find('blockquote')
                    desc = self.tag_to_string(desc) if desc else ''
                    m = regex.match( url)
                    if m:
                        desc = "[%s] %s" %  (m.group(2), desc)
                    #self.log('\t', desc)
                    date = ''
                    p = header.parent.previousSibling
                    # navigate up to find h3, which contains the date
                    while p:
                        if hasattr(p,'name') and p.name == 'h3':
                            date = self.tag_to_string(p)
                            break
                        p = p.previousSibling
                    articles.append({'title':title,'url':url,'description':desc,'date':date})
                    if len(articles) >= max_articles:
                        break
            if articles:
                feeds.append((cat_name, articles))
        return feeds
--- a/recipes/glasgow_herald.recipe
+++ b/recipes/glasgow_herald.recipe
@ -1,4 +1,3 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class GlasgowHerald(BasicNewsRecipe):
@ -9,12 +8,16 @@ class GlasgowHerald(BasicNewsRecipe):
    language = 'en_GB'
    __author__     = 'Kovid Goyal'
    use_embedded_content = False
-    keep_only_tags = [dict(attrs={'class':'article'})]
+    no_stylesheets = True
-    remove_tags = [
+    auto_cleanup = True
-            dict(id=['pic-nav']),
+
-            dict(attrs={'class':['comments-top']})
+    #keep_only_tags = [dict(attrs={'class':'article'})]
-            ]
+    #remove_tags = [
            #dict(id=['pic-nav']),
            #dict(attrs={'class':['comments-top']})
            #]
    feeds          = [
@ -26,4 +29,3 @@ class GlasgowHerald(BasicNewsRecipe):
                        u'http://www.heraldscotland.com/cmlink/1.768',),
                        (u'Columnists', u'http://www.heraldscotland.com/cmlink/1.658574')]
--- a/recipes/globe_and_mail.recipe
+++ b/recipes/globe_and_mail.recipe
@ -51,6 +51,13 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
            {'class':['articleTools', 'pagination', 'Ads', 'topad',
                'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]
    def populate_article_metadata(self, article, soup, first):
        if first and hasattr(self, 'add_toc_thumbnail'):
            picdiv = soup.find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,picdiv['src'])
    #Use the mobile version rather than the web version
    def print_version(self, url):
        return url.rpartition('?')[0] + '?service=mobile'
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@ -12,7 +12,6 @@ class GN(BasicNewsRecipe):
        EDITION = 0
        __author__ = 'Piotr Kontek'
        title = u'Gość niedzielny'
        description = 'Weekly magazine'
        encoding = 'utf-8'
        no_stylesheets = True
@ -20,6 +19,8 @@ class GN(BasicNewsRecipe):
        remove_javascript = True
        temp_files = []
        simultaneous_downloads = 1
        masthead_url = 'http://gosc.pl/files/11/03/12/949089_top.gif'
        title = u'Gość niedzielny'
        articles_are_obfuscated = True
@ -64,7 +65,6 @@ class GN(BasicNewsRecipe):
                    if img != None:
                        a = img.parent
                        self.EDITION = a['href']
                        self.title = img['alt']
                        self.cover_url = 'http://www.gosc.pl' + img['src']
                        if not first:
                            break
--- a/recipes/grantland.recipe
+++ b/recipes/grantland.recipe
@ -0,0 +1,96 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class GrantLand(BasicNewsRecipe):
 	title          = u"Grantland"
 	description    = 'Writings on Sports & Pop Culture'
 	language       = 'en'
 	__author__     = 'Barty'
 	max_articles_per_feed = 100
 	no_stylesheets = False
 	# auto_cleanup is too aggressive sometimes and we end up with blank articles
 	auto_cleanup   = False
 	timefmt        = ' [%a, %d %b %Y]'
 	oldest_article = 365
 	cover_url      = 'http://cdn0.sbnation.com/imported_assets/740965/blog_grantland_grid_3.jpg'
 	masthead_url   = 'http://a1.espncdn.com/prod/assets/grantland/grantland-logo.jpg'
 	INDEX          = 'http://www.grantland.com'
 	CATEGORIES     = [
 		# comment out categories you don't want
 		# (user friendly name, url suffix, max number of articles to load)
 		('Today in Grantland','',20),
 		('In Case You Missed It','incaseyoumissedit',35),
 		]
 	remove_tags    = [
 		{'name':['head','style','script']},
 		{'id':['header']},
 		{'class':re.compile(r'\bside|\bad\b|floatright|tags')}
 		]
 	remove_tags_before = {'class':'wrapper'}
 	remove_tags_after  = [{'id':'content'}]
 	preprocess_regexps = [
 		# <header> tags with an img inside are just blog banners, don't need them
 		# note: there are other useful <header> tags so we don't want to just strip all of them
 		(re.compile(r'<header class.+?<img .+?>.+?</header>', re.DOTALL|re.IGNORECASE),lambda m: ''),
 		# delete everything between the *last* <hr class="small" /> and </article>
 		(re.compile(r'<hr class="small"(?:(?!<hr class="small").)+</article>', re.DOTALL|re.IGNORECASE),lambda m: '<hr class="small" /></article>'),
 		]
 	extra_css = """cite, time { font-size: 0.8em !important; margin-right: 1em !important; }
 		img + cite { display:block; text-align:right}"""
 	def parse_index(self):
 		feeds = []
 		seen_urls = set([])
 		for category in self.CATEGORIES:
 			(cat_name, tag, max_articles) = category
 			self.log('Reading category:', cat_name)
 			articles = []
 			page = "%s/%s" % (self.INDEX, tag)
 			soup = self.index_to_soup(page)
 			headers = soup.findAll('h2' if tag=='' else 'h3')
 			for header in headers:
 				tag = header.find('a')
 				if tag is None or not hasattr(tag,'href'):
 					continue
 				url = tag['href']
 				if url.startswith('/'):
 					url = self.INDEX + url
 				if url in seen_urls:
 					continue
 				seen_urls.add(url)
 				title = self.tag_to_string(tag)
 				if 'Podcast:' in title or 'In Case You Missed It' in title:
 					continue
 				desc = dt = ''
 				par = header.parent
 				#tag = par.find('cite')
 				#if tag is not None:
 				#	desc = '['+self.tag_to_string(tag) + '] '
 				tag = par.find('div')
 				if tag is not None:
 					desc = desc + self.tag_to_string(tag)
 					tag = tag.find('time')
 					if tag is not None:
 						dt = self.tag_to_string( tag)
 				self.log('\tFound article:', title)
 				self.log('\t', url)
 				articles.append({'title':title,'url':url,'description':desc,'date':dt})
 				if len(articles) >= max_articles:
 					break
 			if articles:
 				feeds.append((cat_name, articles))
 		return feeds
 	def print_version(self, url):
 		return url+'?view=print'
--- a/recipes/gs24_pl.recipe
+++ b/recipes/gs24_pl.recipe
@ -0,0 +1,43 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 import re
 import string
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1322322819(BasicNewsRecipe):
    title          = u'GS24.pl (Głos Szczeciński)'
    description    = u'Internetowy serwis Głosu Szczecińskiego'
    __author__     = u'Michał Szkutnik'
    __license__    = u'GPL v3'
    language       = 'pl'
    publisher      = 'Media Regionalne sp. z o.o.'
    category       = 'news, szczecin'
    oldest_article = 2
    max_articles_per_feed = 100
    auto_cleanup = True
    cover_url = "http://www.gs24.pl/images/top_logo.png"
    feeds          = [
    # (u'Wszystko', u'http://www.gs24.pl/rss.xml'),
     (u'Szczecin', u'http://www.gs24.pl/szczecin.xml'),
     (u'Stargard', u'http://www.gs24.pl/stargard.xml'),
     (u'Świnoujście', u'http://www.gs24.pl/swinoujscie.xml'),
     (u'Goleniów', u'http://www.gs24.pl/goleniow.xml'),
     (u'Gryfice', u'http://www.gs24.pl/gryfice.xml'),
     (u'Kamień Pomorski', u'http://www.gs24.pl/kamienpomorski.xml'),
     (u'Police', u'http://www.gs24.pl/police.xml'),
     (u'Region', u'http://www.gs24.pl/region.xml'),
     (u'Sport', u'http://www.gs24.pl/sport.xml'),
                    ]
    def get_article_url(self, article):
        s = re.search("""/0L0S(gs24.*)/story01.htm""", article.link)
        s = s.group(1)
        replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I"  : "_", "0D" : "?", "0F" : "="}
        for (a, b) in replacements.iteritems():
            s = string.replace(s, a, b)
        s = string.replace(s, "0A", "0")
        return "http://"+s
    def print_version(self, url):
        return url + "&Template=printpicart"
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -9,6 +9,7 @@ www.guardian.co.uk
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from datetime import date
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 class Guardian(BasicNewsRecipe):
@ -16,16 +17,19 @@ class Guardian(BasicNewsRecipe):
    if date.today().weekday() == 6:
        base_url = "http://www.guardian.co.uk/theobserver"
        cover_pic = 'Observer digital edition'
        masthead_url = 'http://static.guim.co.uk/sys-images/Guardian/Pix/site_furniture/2010/10/19/1287478087992/The-Observer-001.gif'
    else:
        base_url = "http://www.guardian.co.uk/theguardian"
        cover_pic = 'Guardian digital edition'
        masthead_url = 'http://static.guim.co.uk/static/f76b43f9dcfd761f0ecf7099a127b603b2922118/common/images/logos/the-guardian/titlepiece.gif'
    __author__ = 'Seabound and Sujata Raman'
    language = 'en_GB'
-    oldest_article = 7
+    oldest_article              = 7
-    max_articles_per_feed = 100
+    max_articles_per_feed       = 100
-    remove_javascript = True
+    remove_javascript           = True
    encoding                    = 'utf-8'
    # List of section titles to ignore
    # For example: ['Sport']
@ -41,6 +45,16 @@ class Guardian(BasicNewsRecipe):
                        dict(name='div', attrs={'class':["guardian-tickets promo-component",]}),
                        dict(name='ul', attrs={'class':["pagination"]}),
                        dict(name='ul', attrs={'id':["content-actions"]}),
                        # article history link
                        dict(name='a', attrs={'class':["rollover history-link"]}),
                        # "a version of this article ..." speil
                        dict(name='div' , attrs = { 'class' : ['section']}),
                        # "about this article" js dialog
                        dict(name='div', attrs={'class':["share-top",]}),
                        # author picture
                        dict(name='img', attrs={'class':["contributor-pic-small"]}),
                        # embedded videos/captions
                        dict(name='span',attrs={'class' : ['inline embed embed-media']}),
                        #dict(name='img'),
                        ]
    use_embedded_content    = False
@ -65,8 +79,21 @@ class Guardian(BasicNewsRecipe):
              url = None
          return url
    def populate_article_metadata(self, article, soup, first):
        if first and hasattr(self, 'add_toc_thumbnail'):
            picdiv = soup.find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,picdiv['src'])
    def preprocess_html(self, soup):
          # multiple html sections in soup, useful stuff in the first
          html = soup.find('html')
          soup2 = BeautifulSoup()
          soup2.insert(0,html) 
          soup = soup2  
          for item in soup.findAll(style=True):
              del item['style']
@ -75,6 +102,17 @@ class Guardian(BasicNewsRecipe):
          for tag in soup.findAll(name=['ul','li']):
                tag.name = 'div'
         # removes number next to rating stars
          items_to_remove = []
          rating_container = soup.find('div', attrs = {'class': ['rating-container']})
          if rating_container:
            for item in rating_container:
                if isinstance(item, Tag) and str(item.name) == 'span':
                    items_to_remove.append(item)
          for item in items_to_remove:
            item.extract()
          return soup
    def find_sections(self):
--- a/recipes/hackernews.recipe
+++ b/recipes/hackernews.recipe
@ -9,9 +9,9 @@ from calibre.ptempfile import PersistentTemporaryFile
 from urlparse import urlparse
 import re
-class HackerNews(BasicNewsRecipe):
+class HNWithCommentsLink(BasicNewsRecipe):
-    title                 = 'Hacker News'
+    title                 = 'HN With Comments Link'
-    __author__            = 'Tom Scholl'
+    __author__            = 'Tom Scholl & David Kerschner'
    description           = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.'
    publisher             = 'Y Combinator'
    category              = 'news, programming, it, technology'
@ -80,6 +80,11 @@ class HackerNews(BasicNewsRecipe):
        body = body + comments
        return u'<html><title>' + title + u'</title><body>' + body + '</body></html>'
    def parse_feeds(self):
        a = super(HNWithCommentsLink, self).parse_feeds()
        self.hn_articles = a[0].articles
        return a
    def get_obfuscated_article(self, url):
        if url.startswith('http://news.ycombinator.com'):
            content = self.get_hn_content(url)
@ -97,6 +102,13 @@ class HackerNews(BasicNewsRecipe):
            else:
                content = self.get_readable_content(url)
            article = 0
            for a in self.hn_articles:
                if a.url == url:
                    article = a
        content = re.sub(r'</body>\s*</html>\s*$', '', content) + article.summary + '</body></html>'
        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
        self.temp_files[-1].write(content)
        self.temp_files[-1].close()
--- a/recipes/heise_online.recipe
+++ b/recipes/heise_online.recipe
@ -1,11 +1,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class AdvancedUserRecipe(BasicNewsRecipe):
-    title = 'heise online'
+    title = 'Heise-online'
    description = 'News vom Heise-Verlag'
    __author__ = 'schuster'
    masthead_url = 'http://www.heise.de/icons/ho/heise_online_logo.gif'
    publisher   = 'Heise Zeitschriften Verlag GmbH & Co. KG'
    use_embedded_content   = False
    language = 'de'
    oldest_article = 2
@ -14,11 +14,10 @@ class AdvancedUserRecipe(BasicNewsRecipe):
    remove_empty_feeds = True
    timeout = 5
    no_stylesheets = True
    encoding = 'utf-8'
    remove_tags_after = dict(name ='p', attrs={'class':'editor'})
-    remove_tags = [{'class':'navi_top_container'},
+    remove_tags = [dict(id='navi_top_container'),
                            dict(id='navi_bottom'),
                            dict(id='mitte_rechts'),
                            dict(id='navigation'),
@ -29,27 +28,31 @@ class AdvancedUserRecipe(BasicNewsRecipe):
                            dict(id='seiten_navi'),
                            dict(id='adbottom'),
                            dict(id='sitemap'),
-                            dict(name='a', href=re.compile(r'^/([a-zA-Z]+/)?')),
+                            dict(name='div', attrs={'id':'sitemap'}),
-                ]
+                            dict(name='ul', attrs={'class':'erste_zeile'}),
                            dict(name='ul', attrs={'class':'zweite_zeile'}),
                            dict(name='div', attrs={'class':'navi_top_container'})]
    feeds =  [
                   ('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
-                   ('iX', 'http://www.heise.de/ix/news/news.rdf'),
+                   ('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
                      ('Technology Review', 'http://www.heise.de/tr/news-atom.xml'),
                   ('mobil', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
                   ('Security', 'http://www.heise.de/security/news/news-atom.xml'),
                   ('Netze', 'http://www.heise.de/netze/rss/netze-atom.xml'),
                   ('Open Source', 'http://www.heise.de/open/news/news-atom.xml'),
                   ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
                   ('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
-                   ('Autos', 'http://www.heise.de/autos/rss/news.rdf'),
+                   ('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
-                   ('Mac & i', 'http://www.heise.de/mac-and-i/news.rdf'),
+                   ('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
                   ('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
                   ('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
                   ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
                   ('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
                   ('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
                   ('iX', 'http://www.heise.de/ix/news/news.rdf'),
                   ('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'),
                   ('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
                   ('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
                   ('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
-                   ('Blog: The World of IT', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
+                   ('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
-                   ('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
+                   ('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')]
             ]
    def print_version(self, url):
        return url + '?view=print'
--- a/recipes/hindustan_times.recipe
+++ b/recipes/hindustan_times.recipe
@ -1,4 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import urllib, re
 class HindustanTimes(BasicNewsRecipe):
    title          = u'Hindustan Times'
@ -26,4 +27,24 @@ class HindustanTimes(BasicNewsRecipe):
            'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
 ]
    def get_article_url(self, article):
        '''
        HT uses a variant of the feedportal RSS ad display mechanism
        '''
        try:
            s = article.summary
            return urllib.unquote(
                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
        except:
            pass
        url = BasicNewsRecipe.get_article_url(self, article)
        res = self.browser.open_novisit(url)
        url = res.geturl().split('/')[-2]
        encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
                '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
                'www.'}
        for k, v in encoding.iteritems():
            url = url.replace(k, v)
        return url
--- a/recipes/histmag.recipe
+++ b/recipes/histmag.recipe
@ -4,56 +4,20 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, matek09, matek09@gmail.com'
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class Histmag(BasicNewsRecipe):
    title          = u'Histmag'
    oldest_article = 7
    max_articles_per_feed = 100
    cover_url='http://histmag.org/grafika/loga/histmag-logo-2-300px.png'
    __author__ = 'matek09'
    description = u"Artykuly historyczne i publicystyczne"
    encoding = 'utf-8'
    #preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),(re.compile(r'<span>'), lambda match: '<br><br><span>')]
    no_stylesheets = True
    language = 'pl'
    remove_javascript = True
    keep_only_tags=[dict(id='article')]
    remove_tags=[dict(name = 'p', attrs = {'class' : 'article-tags'})]
-	title = u'Histmag'
+    feeds          = [(u'Wszystkie', u'http://histmag.org/rss/wszystkie.xml'), (u'Wydarzenia', u'http://histmag.org/rss/wydarzenia.xml'), (u'Recenzje', u'http://histmag.org/rss/recenzje.xml'), (u'Artykuły historyczne', u'http://histmag.org/rss/historia.xml'), (u'Publicystyka', u'http://histmag.org/rss/publicystyka.xml')]
 	__author__ = 'matek09'
 	description = u"Artykuly historyczne i publicystyczne"
 	encoding = 'utf-8'
 	no_stylesheets = True
 	language = 'pl'
 	remove_javascript = True
 	#max_articles_per_feed = 1
 	remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'article'}))
 	remove_tags_after = dict(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
 	#keep_only_tags =[]
 	#keep_only_tags.append(dict(name = 'h2'))
 	#keep_only_tags.append(dict(name = 'p'))
 	remove_tags =[]
 	remove_tags.append(dict(name = 'p', attrs = {'class' : 'podpis'}))
 	remove_tags.append(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
 	remove_tags.append(dict(name = 'img', attrs = {'src' : 'style/buttons/wesprzyjnas-1.jpg'}))
 	preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),
 						(re.compile(r'<span>'), lambda match: '<br><br><span>')]
 	extra_css = '''
 					.left {font-size: x-small}
 					.right {font-size: x-small}
 				'''
 	def find_articles(self, soup):
 		articles = []
 		for div in soup.findAll('div', attrs={'class' : 'text'}):
 			articles.append({
 				'title' : self.tag_to_string(div.h3.a),
 				'url'   : 'http://www.histmag.org/' + div.h3.a['href'],
 				'date'  : self.tag_to_string(div.next('p')).split('|')[0],
 				'description' : self.tag_to_string(div.next('p', podpis=False)),
 				})
 		return articles
 	def parse_index(self):
 		soup = self.index_to_soup('http://histmag.org/?arc=4&dx=0')
 		feeds = []
 		feeds.append((u"Artykuly historyczne", self.find_articles(soup)))
 		soup = self.index_to_soup('http://histmag.org/?arc=5&dx=0')
 		feeds.append((u"Artykuly publicystyczne", self.find_articles(soup)))
 		soup = self.index_to_soup('http://histmag.org/?arc=1&dx=0')
 		feeds.append((u"Wydarzenia", self.find_articles(soup)))
 		return feeds
--- a/recipes/historia_pl.recipe
+++ b/recipes/historia_pl.recipe
@ -8,6 +8,15 @@ class Historia_org_pl(BasicNewsRecipe):
    category       = 'history'
    language       = 'pl'
    oldest_article = 8
    remove_empty_feeds=True
    max_articles_per_feed = 100
-    feeds          = [(u'Artykuły', u'http://www.historia.org.pl/index.php?format=feed&type=rss')]
+    feeds          = [(u'Wszystkie', u'http://www.historia.org.pl/index.php?format=feed&type=rss'),
 		(u'Wiadomości', u'http://www.historia.org.pl/index.php/wiadomosci.feed?type=rss'),
 		(u'Publikacje', u'http://www.historia.org.pl/index.php/publikacje.feed?type=rss'),
 		(u'Publicystyka', u'http://www.historia.org.pl/index.php/publicystyka.feed?type=rss'),
 		(u'Recenzje', u'http://historia.org.pl/index.php/recenzje.feed?type=rss'),
 		(u'Kultura i sztuka', u'http://www.historia.org.pl/index.php/kultura-i-sztuka.feed?type=rss'),
 		(u'Rekonstykcje', u'http://www.historia.org.pl/index.php/rekonstrukcje.feed?type=rss'),
 		(u'Projekty', u'http://www.historia.org.pl/index.php/projekty.feed?type=rss'),
 		(u'Konkursy'), (u'http://www.historia.org.pl/index.php/konkursy.feed?type=rss')]
--- a/recipes/hvg.recipe
+++ b/recipes/hvg.recipe
@ -1,44 +1,58 @@
-# -*- coding: utf-8 -*-
+################################################################################
-import re
+#Description:	  http://hvg.hu/ RSS channel
-from calibre.web.feeds.recipes import BasicNewsRecipe
+#Author: 	  Bigpapa (bigpapabig@hotmail.com)
 #Date:	  2011.12.20. - V1.1
 ################################################################################
-class HVG(BasicNewsRecipe):
+from calibre.web.feeds.news import BasicNewsRecipe
    title                 = 'HVG.HU'
    __author__            = u'István Papp'
    description           = u'Friss hírek a HVG-től'
    timefmt               = ' [%Y. %b. %d., %a.]'
    oldest_article        = 4
    language              = 'hu'
-    max_articles_per_feed = 100
+class hvg(BasicNewsRecipe):
-    no_stylesheets        = True
+    title          = u'HVG'
-    use_embedded_content  = False
+    __author__     = 'Bigpapa'
-    encoding              = 'utf8'
+    language = 'hu'
-    publisher             = 'HVG Online'
+    oldest_article = 5		# Hany napos legyen a legregebbi cikk amit leszedjen.
-    category              = u'news, hírek, hvg'
+    max_articles_per_feed = 5	# Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
-    extra_css             = 'body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    no_stylesheets = True
-    preprocess_regexps    = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+    encoding = 'utf8'
-    remove_tags_before    = dict(id='pg-content')
+    extra_css = ' h2 { font:bold 28px} '
    remove_javascript     = True
    remove_empty_feeds    = True
-    feeds = [
+    remove_attributes = ['style','font', 'href']
              (u'Itthon', u'http://hvg.hu/rss/itthon')
             ,(u'Világ', u'http://hvg.hu/rss/vilag')
             ,(u'Gazdaság', u'http://hvg.hu/rss/gazdasag')
             ,(u'IT | Tudomány', u'http://hvg.hu/rss/tudomany')
             ,(u'Panoráma', u'http://hvg.hu/rss/Panorama')
             ,(u'Karrier', u'http://hvg.hu/rss/karrier')
             ,(u'Gasztronómia', u'http://hvg.hu/rss/gasztronomia')
             ,(u'Helyi érték', u'http://hvg.hu/rss/helyiertek')
             ,(u'Kultúra', u'http://hvg.hu/rss/kultura')
             ,(u'Cégautó', u'http://hvg.hu/rss/cegauto')
             ,(u'Vállalkozó szellem', u'http://hvg.hu/rss/kkv')
             ,(u'Egészség', u'http://hvg.hu/rss/egeszseg')
             ,(u'Vélemény', u'http://hvg.hu/rss/velemeny')
             ,(u'Sport', u'http://hvg.hu/rss/sport')
            ]
-    def print_version(self, url):
+    keep_only_tags    = [
-        return url.replace ('#rss', '/print')
+		dict(name='div', attrs={'id':['pg-content']})
 	]
    remove_tags = [ 
 	dict(name='div', attrs={'class':['box articlemenu', 'bannergoogle468', 'boxcontainer left', 'boxcontainer', 'commentbox']}),
 	dict(name='table', attrs={'class':['banner2', 'monocle']}),
 	dict(name='div', attrs={'id':['connect_widget_4cf63ca849ddf4577922632', 'sharetip', 'upprev_box']}),
 	dict(name='div', attrs={'style':['float: right; margin-bottom: 5px;', 'display: none;']}),
 	dict(name='h3', attrs={'class':['hthree']}),
 	dict(name='ul', attrs={'class':['defaultul']}),
 	dict(name='form', attrs={'id':['commentForm']}),
 	dict(name='h6', attrs={'class':['hthree']}),
 	dict(name='h6', attrs={'class':['more2']}),
 	dict(name='img', attrs={'class':['framed']}),
 	dict(name='td', attrs={'class':['greyboxbody','embedvideobody','embedvideofooter','embedvideobottom']}),
 	]
    feeds          = [
 #	(u'\xd6sszes', 'http://hvg.hu/rss'),
 	(u'Itthon', 'http://hvg.hu/rss/itthon'),
 	(u'Vil\xe1g', 'http://hvg.hu/rss/vilag'),
 	(u'Gazdas\xe1g', 'http://hvg.hu/rss/gazdasag'),
 	(u'Tudom\xe1ny', 'http://hvg.hu/rss/tudomany'),
 	(u'Panor\xe1ma', 'http://hvg.hu/rss/panorama'),
 	(u'Karrier', 'http://hvg.hu/rss/karrier'),
 	(u'Gasztron\xf3mia', 'http://hvg.hu/rss/gasztronomia'),
 	(u'Helyi \xe9rt\xe9k', 'http://hvg.hu/rss/helyiertek'),
 	(u'Kult\xfara', 'http://hvg.hu/rss/kultura'),
 	(u'C\xe9gaut\xf3', 'http://hvg.hu/rss/cegauto'),
 	(u'V\xe1llalkoz\xf3 szellem', 'http://hvg.hu/rss/kkv'),
 	(u'Eg\xe9szs\xe9g', 'http://hvg.hu/rss/egeszseg'),
 	(u'V\xe9lem\xe9ny', 'http://hvg.hu/rss/velemeny'),
 	(u'Sport', 'http://hvg.hu/rss/sport')
 ]
--- a/recipes/icons/b365realitatea.png
+++ b/recipes/icons/b365realitatea.png
--- a/recipes/icons/biolog_pl.png
+++ b/recipes/icons/biolog_pl.png
--- a/recipes/icons/blues.png
+++ b/recipes/icons/blues.png
--- a/recipes/icons/catavencii.png
+++ b/recipes/icons/catavencii.png
--- a/recipes/icons/computerworld_pl.png
+++ b/recipes/icons/computerworld_pl.png
--- a/recipes/icons/descopera_org.png
+++ b/recipes/icons/descopera_org.png
--- a/recipes/icons/dziennik_pl.png
+++ b/recipes/icons/dziennik_pl.png
--- a/recipes/icons/formulaas.png
+++ b/recipes/icons/formulaas.png
--- a/recipes/icons/infra_pl.png
+++ b/recipes/icons/infra_pl.png
--- a/recipes/icons/kosmonauta_pl.png
+++ b/recipes/icons/kosmonauta_pl.png
--- a/recipes/icons/mlody_technik_pl.png
+++ b/recipes/icons/mlody_technik_pl.png
--- a/recipes/icons/moneynews.png
+++ b/recipes/icons/moneynews.png
--- a/recipes/icons/skylife.png
+++ b/recipes/icons/skylife.png
--- a/recipes/icons/zaman.png
+++ b/recipes/icons/zaman.png
--- a/recipes/il_giornale.recipe
+++ b/recipes/il_giornale.recipe
@ -1,8 +1,8 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__author__    = 'Gabriele Marini, based on Darko Miletic'
+__author__    = 'Gambarini, based on Darko Miletic'
 __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
-description   = 'Italian daily newspaper - 19-04-2010'
+description   = 'Italian daily newspaper - 09-11-2011'
 '''
 http://www.ilgiornale.it/
@ -11,7 +11,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.web.feeds.news import BasicNewsRecipe
 class IlGiornale(BasicNewsRecipe):
-    __author__        = 'Marini Gabriele'
+    __author__        = 'GAMBARINI'
    description   = 'Italian daily newspaper'
    cover_url      = 'http://www.ilgiornale.it/img_v1/logo.gif'
@ -23,9 +23,8 @@ class IlGiornale(BasicNewsRecipe):
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 7
-    max_articles_per_feed = 50
+    max_articles_per_feed = 100
    use_embedded_content  = False
    recursion             = 100
    no_stylesheets        = True
    conversion_options = {'linearize_tables':True}
@ -38,11 +37,11 @@ class IlGiornale(BasicNewsRecipe):
    def print_version(self, url):
        raw = self.browser.open(url).read()
        soup = BeautifulSoup(raw.decode('utf8', 'replace'))
-        all_print_tags = soup.find('div', {'style':'float:left; width:35%;'})
+        all_print_tags = soup.find('div', {'id':'print_article'})
-        print_link = all_print_tags.contents[1]
+        print_link = all_print_tags.a
-        if all_print_tags is None:
+        if print_link is None:
           return url
-        return  print_link['href']
+        return  'http://www.ilgiornale.it' + print_link['href']
    feeds = [
--- a/recipes/independent.recipe
+++ b/recipes/independent.recipe
@ -1,33 +1,60 @@
-__license__   = 'GPL v3'
+# adapted from old recipe by Darko Miletic <darko.miletic at gmail.com>
 __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.independent.co.uk
 '''
-from calibre.web.feeds.news import BasicNewsRecipe
+import re
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag, NavigableString
 class TheIndependentNew(BasicNewsRecipe):
    # flag to enable/disable article graphics on business pages/some others
    # eg http://www.independent.co.uk/news/world/europe/berlusconi-departure-fails-to-calm-the-markets-6259682.html
    # -max dimensions can be altered using the .pictureContainer img selector in the css
    _FETCH_ARTICLE_GRAPHICS = True
    #Flag to enable/disable image fetching (not business)
    _FETCH_IMAGES = True
     #used for converting rating to stars
    _STAR_URL = 'http://www.independent.co.uk/skins/ind/images/rating_star.png'
    _NO_STAR_URL = 'http://www.independent.co.uk/skins/ind/images/rating_star_grey.png'
    title                   = u'The Independent'
    __author__              = 'Will'
    description             = 'The latest in UK News and World News from The \
                               Independent. Wide range of international and local news, sports \
                               news, commentary and opinion pieces.Independent News - Breaking news \
                               that matters. Your daily comprehensive news source - The \
                               Independent Newspaper'
    publisher               = 'The Independent'
    category                = 'news, UK'
    no_stylesheets          = True
    use_embedded_content    = False
    remove_empty_feeds      = True
    language                = 'en_GB'
    publication_type        = 'newspaper'
    masthead_url            = 'http://www.independent.co.uk/independent.co.uk/editorial/logo/independent_Masthead.png'
    encoding                = 'utf-8'
    remove_tags             =[
                               dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
                               dict(attrs={'class' : ['autoplay','openBiogPopup']}),
                               dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}),
                               dict(attrs={'style' : re.compile('.*')}),
                             ]
    keep_only_tags          =[dict(attrs={'id':'main'})]
    recursions = 0
    # fixes non compliant html nesting and 'marks' article graphics links
    preprocess_regexps      = [
                                (re.compile('<span class="storyTop ">(?P<nested>.*?)</span>', re.DOTALL),
                                lambda match: '<div class="storyTop">' + match.group('nested') + '</div>'),
                                (re.compile('(<strong>.*?[Cc]lick.*?<a.*?((HERE)|([Hh]ere)).*?</strong>)', re.DOTALL),
                                lambda match: '<div class="article-graphic">' + match.group(0) + '</div>'),
                              ]
 class TheIndependent(BasicNewsRecipe):
    title                 = 'The Independent'
    __author__            = 'Darko Miletic'
    description           = 'Independent News - Breaking news, comment and features from The Independent newspaper'
    publisher             = 'The Independent'
    category              = 'news, politics, UK'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'en_GB'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.independent.co.uk/independent.co.uk/images/logo-london.png'
    extra_css             = """
                               h1{font-family: Georgia,serif }
                               body{font-family: Verdana,Arial,Helvetica,sans-serif}
                               img{margin-bottom: 0.4em; display:block}
                               .info,.caption,.credits{font-size: x-small}
                            """
    conversion_options = {
                          'comment'   : description
@ -36,51 +63,451 @@ class TheIndependent(BasicNewsRecipe):
                        , 'language'  : language
                        }
-    remove_tags      =[
+    extra_css             = """
-                        dict(name=['meta','link','object','embed','iframe','base','style'])
+                               h1{font-family: Georgia,serif }
-                        ,dict(attrs={'class':['related-articles','share','googleCols','article-tools','paging','googleArt']})
+                               body{font-family: Verdana,Arial,Helvetica,sans-serif}
-                        ,dict(attrs={'id':['newsVideoPlayer','yahoobook','google-intext']})
+                               img{margin-bottom: 0.4em; display:block}
-                      ]
+                               .starRating img {float: left}
-    keep_only_tags   =[dict(attrs={'id':'article'})]
+                               .starRating {margin-top:0.4em; display: block}
-    remove_attributes=['lang','onclick','width','xmlns:fb']
+                               .image {clear:left; font-size: x-small; color:#888888;}
                               .articleByTimeLocation {font-size: x-small; color:#888888;
                                margin-bottom:0.2em ; margin-top:0.2em ; display:block}
                                .subtitle {clear:left}
                               .column-1 h1 { color: #191919}
                               .column-1 h2 { color: #333333}
                               .column-1 h3 { color: #444444}
                               .column-1 p { color: #777777}
                               .column-1 p,a,h1,h2,h3 { margin: 0; }
                               .column-1 div{color:#888888; margin: 0;}
                               .articleContent {display: block; clear:left;}
                               .storyTop{}
                               .pictureContainer img { max-width: 400px; max-height: 400px;}
                            """
    oldest_article = 1
    max_articles_per_feed = 100
    _processed_urls = []
    feeds = [
              (u'UK'                 , u'http://www.independent.co.uk/news/uk/rss'                 )
             ,(u'World'              , u'http://www.independent.co.uk/news/world/rss'              )
             ,(u'Business'           , u'http://www.independent.co.uk/news/business/rss'           )
             ,(u'People'             , u'http://www.independent.co.uk/news/people/rss'             )
             ,(u'Science'            , u'http://www.independent.co.uk/news/science/rss'            )
             ,(u'Media'              , u'http://www.independent.co.uk/news/media/rss'              )
             ,(u'Education'          , u'http://www.independent.co.uk/news/education/rss'          )
             ,(u'Leading Articles'   , u'http://www.independent.co.uk/opinion/leading-articles/rss')
             ,(u'Comentators'        , u'http://www.independent.co.uk/opinion/commentators/rss'    )
             ,(u'Columnists'         , u'http://www.independent.co.uk/opinion/columnists/rss'      )
             ,(u'Letters'            , u'http://www.independent.co.uk/opinion/letters/rss'         )
             ,(u'Big Question'       , u'http://www.independent.co.uk/extras/big-question/rss'     )
             ,(u'Sport'              , u'http://www.independent.co.uk/sport/rss'                   )
             ,(u'Life&Style'         , u'http://www.independent.co.uk/life-style/rss'              )
             ,(u'Arts&Entertainment' , u'http://www.independent.co.uk/arts-entertainment/rss'      )
             ,(u'Travel'             , u'http://www.independent.co.uk/travel/rss'                  )
             ,(u'Money'              , u'http://www.independent.co.uk/money/rss'                   )
            ]
    def get_article_url(self, article):
-        return article.get('guid',  None)
+        url = super(self.__class__,self).get_article_url(article)
        title = article.get('title', None)
        if title and re.search("^Video:",title):
            return None
        #remove duplicates
        if not (url in self._processed_urls):
            self._processed_urls.append(url)
        else:
            url = None
        return url
    def populate_article_metadata(self, article, soup, first):
        if first and hasattr(self, 'add_toc_thumbnail'):
            picdiv = soup.find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,picdiv['src'])
    def preprocess_html(self, soup):
-        for item in soup.body.findAll(style=True):
+
-            del item['style']
+        #remove 'advertorial articles'
-        for item in soup.body.findAll(['author','preform']):
+        strapline = soup.find('div',attrs={'class' : re.compile('.*strapLine.*')})
-            item.name='span'
+        if strapline:
-        for item in soup.body.findAll('img'):
+            for para in strapline.findAll('p'):
-            if not item.has_key('alt'):
+                if len(para.contents) and isinstance(para.contents[0],NavigableString) \
-               item['alt'] = 'image'
+                and para.contents[0] == 'ADVERTORIAL FEATURE':
-        for item in soup.body.findAll('div', attrs={'class':['clear-o','body','photoCaption']}):
+                    return None
-            item.name = 'p'
+
-        for item in soup.body.findAll('div'):
+        items_to_extract = []
-            if not item.attrs and not item.contents:
+        slideshow_elements = []
-               item.extract()
+
-        soup2 = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        for item in soup.findAll(attrs={'class' : re.compile("widget.*")}):
-        soup2.body.replaceWith(soup.body)
+            remove = True
-        return soup2
+            pattern = re.compile('((articleContent)|(title))$')
            if (pattern.search(item['class'])) is not None:
                remove = False
            # corrections
            # story content always good
            pattern = re.compile('storyContent')
            if (pattern.search(item['class'])) is not None:
                remove = False
            #images
            pattern = re.compile('slideshow')
            if (pattern.search(item['class'])) is not None:
                if self._FETCH_IMAGES:
                    remove = False
                    slideshow_elements.append(item)
                else:
                    remove = True
            #social widgets always bad
            pattern = re.compile('socialwidget')
            if (pattern.search(item['class'])) is not None:
                remove = True
            if remove:
                items_to_extract.append(item)
        for item in items_to_extract:
            item.extract()
        items_to_extract = []
        if self._FETCH_IMAGES:
            for element in slideshow_elements:
                for item in element.findAll('a',attrs={'href' : re.compile('.*')}):
                    if item.img is not None:
                        #use full size image
                        img = item.findNext('img')
                        img['src'] = item['href']
                        #insert caption if available
                        if img.get('title') and (len(img['title']) > 1):
                            tag = Tag(soup,'h3')
                            text = NavigableString(img['title'])
                            tag.insert(0,text)
                            #picture before text
                            img.extract()
                            item.insert(0,img)
                            item.insert(1,tag)
                        # remove link
                        item.name = "div"
                        item["class"]='image'
                        del item["href"]
        #remove empty subtitles
        """
        currently the subtitle is located in first paragraph after
        sibling <h3 class="subtitle"> tag. This may be 'fixed' at
        some point.
        """
        subtitle = soup.find('h3',attrs={'class' : 'subtitle'})
        if subtitle is not None:
            subtitleText = subtitle.findNext('p')
            if subtitleText is not None:
                if len(subtitleText.contents[0]) <= 1 :
                    subtitleText.extract()
                    subtitle.extract()
        #replace rating numbers with stars
        for item in soup.findAll('div',attrs={ 'class' : 'starRating'}):
            if item is not None:
                soup2 = self._insertRatingStars(soup,item)
            if soup2 is not None:
                soup = soup2
        #remove empty paragraph tags in storyTop which can leave a space
        #between first paragraph and rest of story
        nested_content = False
        storyTop = soup.find('div',attrs={ 'class' : ['storyTop']})
        for item in storyTop.findAll('p'):
            for nested in item:
                if isinstance(nested, Tag):
                    nested_content = True
                    break
            if not nested_content and item.contents is not None and len(item.contents[0]) <= 1 :
                items_to_extract.append(item)
        for item in items_to_extract:
            item.extract()
        items_to_extract = []
        #remove line breaks immediately next to tags with default margins
        #to prevent double line spacing and narrow columns of text
        storyTop = soup.find('div',attrs={ 'class' : ['storyTop']})
        self._remove_undesired_line_breaks_from_tag(storyTop,soup)
        #replace article graphics link with the graphics themselves
        if self._FETCH_ARTICLE_GRAPHICS:
            items_to_insert = []
            for item in soup.findAll('div', attrs={'class' : ['article-graphic']}):
                strong = item.find('strong')
                if not strong:
                    continue
                for child in strong:
                    if isinstance(child,Tag):
                        if str(child.name) == 'a':
                            items_to_insert.extend(self._get_article_graphic(strong,child['href'],soup))
            for item in items_to_insert:
                item[0].replaceWith(item[1])
        for item in items_to_extract:
            item.extract()
        return soup
    def _get_article_graphic(self,old_item,url,soup):
        items_to_insert = []
        if re.search('\.jpg$',str(url)):
            div = Tag(soup,'div')
            div['class'] = 'pictureContainer'
            img = Tag(soup,'img')
            img['src'] = url
            img['alt'] = 'article graphic'
            div.insert(0,img)
            items_to_insert.append((old_item,div,))
            return items_to_insert
        soup2 = self.index_to_soup(url)
        for item in soup2.findAll('div',attrs={'class' : re.compile("widget picture article.*")}):
            items_to_insert.append((old_item,item),)
        return items_to_insert
    def _insertRatingStars(self,soup,item):
        if item.contents is None or len(item.contents) < 1:
            return
        rating = item.contents[0]
        try:
            rating = float(item.contents[0])
        except:
            print 'Could not convert decimal rating to star: malformatted float.'
            return
        for i in range(1,6):
            star = Tag(soup,'img')
            if i <= rating:
                star['src'] = self._STAR_URL
            else:
                star['src'] = self._NO_STAR_URL
            star['alt'] = 'star number ' +  str(i)
            item.insert(i,star)
        #item.contents[0] = NavigableString('(' + str(rating) + ')')
        item.contents[0] = ''
    def postprocess_html(self,soup, first_fetch):
        #find broken images and remove captions
        items_to_extract = []
        for item in soup.findAll('div', attrs={'class' : 'image'}):
            img = item.findNext('img')
            if img and img.get('src'):
                # broken images still point to remote url
                pattern = re.compile('http://www.independent.co.uk.*')
                if pattern.match(img["src"]) is not None:
                    caption = img.findNextSibling('h3')
                    if caption is not None:
                        items_to_extract.append(caption)
                    items_to_extract.append(img)
        for item in items_to_extract:
            item.extract()
        return soup
    def _recurisvely_linearise_tag_tree(
        self,
        item,
        linearised= None,
        count=0,
        limit = 100
        ):
        linearised = linearised or []
        count = count + 1
        if count > limit:
            return linearised
        if not (isinstance(item,Tag)):
            return linearised
        for nested in item:
            linearised.append(nested)
            linearised = self._recurisvely_linearise_tag_tree(nested,linearised, count)
        return linearised
    def _get_previous_tag(self,current_index, tag_tree):
        if current_index == 0:
            return None
        else:
            return tag_tree[current_index - 1]
    def _get_next_tag(self,current_index, tag_tree):
        if current_index < len(tag_tree) - 1:
            return tag_tree[current_index + 1]
        else:
            return None
    def _list_match(self,test_str, list_regex):
        for regex in list_regex:
            match = re.match(regex, test_str)
            if match is not None:
                return True
        return False
    def _remove_undesired_line_breaks_from_tag(self,parent,soup):
        if parent is None:
            return
        tag_tree = self._recurisvely_linearise_tag_tree(parent)
        items_to_remove = []
        for item in tag_tree:
            if item == u'\n':
               items_to_remove.append(item)
               continue;
        for item in items_to_remove:
            tag_tree.remove(item)
        spaced_tags = [r'p', r'h\d', r'blockquote']
        tags_to_extract = []
        tags_to_replace = []
        for (i, tag) in enumerate(tag_tree):
            if isinstance(tag, Tag):
                if str(tag) == '<br />':
                    previous_tag = self._get_previous_tag(i, tag_tree)
                    if isinstance(previous_tag, Tag):
                        previous_tag_is_spaced = previous_tag is not None\
                             and self._list_match(str(previous_tag.name),
                                spaced_tags)
                    else:
                        previous_tag_is_spaced = False
                    next_tag = self._get_next_tag(i, tag_tree)
                    if isinstance(next_tag, Tag):
                        next_tag_is_spaced = next_tag is not None\
                             and self._list_match(str(next_tag.name), spaced_tags)
                    else:
                        next_tag_is_spaced = False
                    if previous_tag_is_spaced or next_tag_is_spaced or i == 0\
                         or i == len(tag_tree) - 1:
                        tags_to_extract.append(tag)
                    else:
                        tags_to_replace.append((tag,NavigableString(' '),))
        for pair in tags_to_replace:
            pair[0].replaceWith(pair[1])
        for tag in tags_to_extract:
            tag.extract()
    feeds = [
        (u'News - UK',
         u'http://www.independent.co.uk/news/uk/?service=rss'),
        (u'News - World',
         u'http://www.independent.co.uk/news/world/?service=rss'),
        (u'News - Business',
         u'http://www.independent.co.uk/news/business/?service=rss'),
        (u'News - People',
         u'http://www.independent.co.uk/news/people/?service=rss'),
        (u'News - Science',
         u'http://www.independent.co.uk/news/science/?service=rss'),
        (u'News - Media',
         u'http://www.independent.co.uk/news/media/?service=rss'),
        (u'News - Education',
         u'http://www.independent.co.uk/news/education/?service=rss'),
        (u'News - Obituaries',
         u'http://www.independent.co.uk/news/obituaries/?service=rss'),
        (u'News - Corrections',
         u'http://www.independent.co.uk/news/corrections/?service=rss'
         ),
        (u'Opinion',
         u'http://www.independent.co.uk/opinion/?service=rss'),
        (u'Environment',
         u'http://www.independent.co.uk/environment/?service=rss'),
        (u'Sport - Athletics',
         u'http://www.independent.co.uk/sport/general/athletics/?service=rss'
         ),
        (u'Sport - Cricket',
         u'http://www.independent.co.uk/sport/cricket/?service=rss'),
        (u'Sport - Football',
         u'http://www.independent.co.uk/sport/football/?service=rss'),
        (u'Sport - Golf',
         u'http://www.independent.co.uk/sport/golf/?service=rss'),
        (u'Sport - Motor racing',
         u'http://www.independent.co.uk/sport/motor-racing/?service=rss'
         ),
        (u'Sport - Olympics',
         u'http://www.independent.co.uk/sport/olympics/?service=rss'),
        (u'Sport - Racing',
         u'http://www.independent.co.uk/sport/racing/?service=rss'),
        (u'Sport - Rugby League',
         u'http://www.independent.co.uk/sport/general/rugby-league/?service=rss'),
        (u'Sport - Rugby Union',
         u'http://www.independent.co.uk/sport/rugby/rugby-union/?service=rss'
         ),
        (u'Sport - Sailing',
         u'http://www.independent.co.uk/sport/general/sailing/?service=rss'
         ),
        (u'Sport - Tennis',
         u'http://www.independent.co.uk/sport/tennis/?service=rss'),
        (u'Sport - Others',
         u'http://www.independent.co.uk/sport/general/others/?service=rss'
         ),
        (u'Life & Style - Fashion',
         u'http://www.independent.co.uk/life-style/fashion/?service=rss'
         ),
        (u'Life & Style -Food & Drink',
         u'http://www.independent.co.uk/life-style/food-and-drink/?service=rss'
         ),
        (u'Life & Style - Health and Families',
         u'http://www.independent.co.uk/life-style/health-and-families/?service=rss'
         ),
        (u'Life & Style - House & Home',
         u'http://www.independent.co.uk/life-style/house-and-home/'),
        (u'Life & Style - History',
         u'http://www.independent.co.uk/life-style/history/?service=rss'
         ),
        (u'Life & Style - Gadgets & Tech',
         u'http://www.independent.co.uk/life-style/gadgets-and-tech/?service=rss'
         ),
        (u'Life & Style - Motoring',
         u'http://www.independent.co.uk/life-style/motoring/?service=rss'
         ),
        (u'Arts & Ents - Art',
         u'http://www.independent.co.uk/arts-entertainment/art/?service=rss'
         ),
        (u'Arts & Ents - Architecture',
         u'http://www.independent.co.uk/arts-entertainment/architecture/?service=rss'
         ),
        (u'Arts & Ents - Music',
         u'http://www.independent.co.uk/arts-entertainment/music/?service=rss'
         ),
        (u'Arts & Ents - Classical',
         u'http://www.independent.co.uk/arts-entertainment/classical/?service=rss'
         ),
        (u'Arts & Ents - Films',
         u'http://www.independent.co.uk/arts-entertainment/films/?service=rss'
         ),
        (u'Arts & Ents - TV',
         u'http://www.independent.co.uk/arts-entertainment/tv/?service=rss'
         ),
        (u'Arts & Ents - Theatre and Dance',
         u'http://www.independent.co.uk/arts-entertainment/theatre-dance/?service=rss'
         ),
        (u'Arts & Ents - Comedy',
         u'http://www.independent.co.uk/arts-entertainment/comedy/?service=rss'
         ),
        (u'Arts & Ents - Books',
         u'http://www.independent.co.uk/arts-entertainment/books/?service=rss'
         ),
        (u'Travel', u'http://www.independent.co.uk/travel/?service=rss'
         ),
        (u'Money', u'http://www.independent.co.uk/money/?service=rss'),
        (u'IndyBest',
         u'http://www.independent.co.uk/extras/indybest/?service=rss'),
        ]
--- a/recipes/infra_pl.recipe
+++ b/recipes/infra_pl.recipe
@ -0,0 +1,17 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class INFRA(BasicNewsRecipe):
    title          = u'INFRA'
    oldest_article = 7
    max_articles_per_feed = 100
    __author__        = 'fenuks'
    description   = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
    cover_url      = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
    category       = 'UFO'
    language       = 'pl'
    max_articles_per_feed = 100
    no_stylesheers=True
    remove_tags_before=dict(name='h2', attrs={'class':'contentheading'})
    remove_tags_after=dict(attrs={'class':'pagenav'})
    remove_tags=[dict(attrs={'class':'pagenav'})]
    feeds          = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')]
--- a/recipes/japan_news.recipe
+++ b/recipes/japan_news.recipe
@ -0,0 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class NewsOnJapan(BasicNewsRecipe):
    title          = u'News On Japan'
    language       = 'en'
    __author__     = 'Krittika Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25
    use_embedded_content = False
    no_stylesheets = True
    auto_cleanup = True
    feeds          = [
 ('News',
 'http://newsonjapan.com/rss/top.xml'),
 ]
--- a/recipes/kosmonauta_pl.recipe
+++ b/recipes/kosmonauta_pl.recipe
@ -0,0 +1,14 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class Kosmonauta(BasicNewsRecipe):
    title          = u'Kosmonauta.net'
    __author__        = 'fenuks'
    description   = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
    category       = 'astronomy'
    language       = 'pl'
    cover_url='http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 100
    feeds          = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/index.php/feed/rss.html')]
--- a/recipes/l_espresso.recipe
+++ b/recipes/l_espresso.recipe
@ -11,7 +11,7 @@ __description__ = 'Italian weekly magazine'
 from calibre.web.feeds.news import BasicNewsRecipe
 class Espresso(BasicNewsRecipe):
-    __author__     = 'Lorenzo Vigentini, Gabriele Marini'
+    __author__     = 'Lorenzo Vigentini, Gabriele Marini, Krittika Goyal'
    description    = 'Italian weekly magazine'
    cover_url      = 'http://espresso.repubblica.it/images/logo_espresso.gif'
@ -26,10 +26,9 @@ class Espresso(BasicNewsRecipe):
    oldest_article        = 16
    max_articles_per_feed = 100
    use_embedded_content  = False
    recursion             = 10
    remove_javascript     = True
    no_stylesheets = True
    auto_cleanup = True
    feeds          = [
@ -42,36 +41,3 @@ class Espresso(BasicNewsRecipe):
                       (u'Chiesa: HomePage', u'http://data.kataweb.it/rss/chiesa/homepage/it'),
                       (u'Chiesa: Speciali e Focus', u'http://data.kataweb.it/rss/chiesa/speciali_e_focus/it')
                    ]
    def print_version(self,url):
        print url[7:25]
        if url[7:25] == 'temi.repubblica.it':
          return url + '/?printpage=undefined'
        elif url[7:25] == 'www.chiesa.espress':
          return url
        return url + '/&print=true'
    keep_only_tags     = [
                            dict(name='div', attrs={'class':['testo','copertina','occhiello','firma','didascalia','content-second-right','detail-articles','titolo-local','generic-articles']}),
                            dict(name='div', attrs={'class':['generic-articles','summary','detail-articles']}),
                            dict(name='div', attrs={'id':['content-second-right','content2']})
                          ]
    remove_tags        = [
                            dict(name='div',attrs={'class':['servizi','aggiungi','label-web','bottom-mobile','box-abbonamenti','box-cerca','big','little','stampaweb']}),
                            dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left','menutext']}),
                            dict(name='ul',attrs={'id':'user-utility'}),
                            dict(name=['script','noscript','iframe'])
                         ]
 #    extra_css = '''
 #                h1 {font-family:Times New Roman,"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:24px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
 #                h2 {font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
 #                h3 {color:#333333;font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
 #                h4 {color:#333333; font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
 #                h5 {color:#333333; font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
 #                .firma {color:#333333;font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;}
 #                .testo {font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
 #                '''
--- a/recipes/la_republica.recipe
+++ b/recipes/la_republica.recipe
@ -1,13 +1,12 @@
 __license__   = 'GPL v3'
 __author__    = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
 __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
-description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version'
+description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version'
 '''
 http://www.repubblica.it/
 '''
 import re
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
@ -33,12 +32,6 @@ class LaRepubblica(BasicNewsRecipe):
    remove_attributes = ['width','height','lang','xmlns:og','xmlns:fb']
    preprocess_regexps = [
        (re.compile(r'.*?<head>', re.DOTALL|re.IGNORECASE), lambda match: '<head>'),
        (re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE), lambda match: '<head><title>'),
        (re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE), lambda match: '</title></head>')
    ]
    def get_article_url(self, article):
        link = BasicNewsRecipe.get_article_url(self, article)
        if link and not '.repubblica.it/' in link:
@ -73,15 +66,15 @@ class LaRepubblica(BasicNewsRecipe):
    remove_tags        = [
                            dict(name=['object','link','meta','iframe','embed']),
                            dict(name='span',attrs={'class':'linkindice'}),
-                            dict(name='div', attrs={'class':'bottom-mobile'}),
+                            dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}),
-                            dict(name='div', attrs={'id':['rssdiv','blocco']}),
+                            dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head']}),
-                            dict(name='div', attrs={'class':'utility'}),
+                            dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}),
                            dict(name='div', attrs={'class':'generalbox'}),
                            dict(name='ul', attrs={'id':'hystory'})
                         ]
    feeds          = [
-                       (u'Rilievo', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
+                       (u'Homepage', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
                       (u'Cronaca', u'http://www.repubblica.it/rss/cronaca/rss2.0.xml'),
                       (u'Esteri', u'http://www.repubblica.it/rss/esteri/rss2.0.xml'),
                       (u'Economia', u'http://www.repubblica.it/rss/economia/rss2.0.xml'),
@ -110,3 +103,5 @@ class LaRepubblica(BasicNewsRecipe):
            del item['style']           
        return soup
    def preprocess_raw_html(self, raw, url):
       return '<html><head>'+raw[raw.find('</head>'):]
--- a/recipes/letsgetcritical.recipe
+++ b/recipes/letsgetcritical.recipe
@ -0,0 +1,94 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class LetsGetCritical(BasicNewsRecipe):
    title          = u"Let's Get Critical"
    description    = 'Curation / aggregation of criticisms of the arts and culture '
    language = 'en'
    __author__     = 'barty on mobileread.com forum'
    max_articles_per_feed = 100
    no_stylesheets = False
    timefmt        = ' [%a, %d %b, %Y]'
    oldest_article = 365
    auto_cleanup   = True
    INDEX          = 'http://www.letsgetcritical.org'
    CATEGORIES     = [
        # comment out categories you don't want
        # (user friendly name, system name, max number of articles to load)
        ('Architecture','architecture',30),
        ('Art','art',30),
        ('Books','books',30),
        ('Design','design',30),
        ('Digital','digital',30),
        ('Food','food',30),
        ('Movies','movies',30),
        ('Music','music',30),
        ('Television','television',30),
        ('Other articles','',10)
        ]
    def parse_index(self):
        self.cover_url = 'http://www.letsgetcritical.org/wp-content/themes/lets_get_critical/images/lgc.jpg'
        feeds = []
        seen_urls = set([])
        regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
        for category in self.CATEGORIES:
            (cat_name, tag, max_articles) = category
            tagurl = '' if tag=='' else '/category/'+tag.lower()
            self.log('Reading category:', cat_name)
            articles = []
            pageno = 1
            while len(articles) < max_articles and pageno < 100:
                page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
                pageno += 1
                self.log('\tReading page:', page)
                try:
                    soup = self.index_to_soup(page)
                except:
                    break
                posts = soup.findAll('div',attrs={'class':'post_multi'})
                if len(posts) == 0:
                    break
                for post in posts:
                    dt = post.find('div',attrs={'class':'title'})
                    atag = dt.find('a')
                    url = atag['href']
                    # skip promotionals and duplicate
                    if url.startswith('http://letsgetcritical') or url.startswith('/') or url in seen_urls:
                        continue
                    seen_urls.add(url)
                    title = self.tag_to_string(atag)
                    self.log('\tFound article:', title)
                    self.log('\t', url)
                    desc = post.find('blockquote')
                    desc = self.tag_to_string(desc) if desc else ''
                    m = regex.match( url)
                    if m:
                        desc = "[%s] %s" %  (m.group(2), desc)
                    #self.log('\t', desc)
                    date = ''
                    p = post.previousSibling
                    # navigate up sibling to find date
                    while p:
                        if hasattr(p,'class') and p['class'] == 'singledate':
                            date = self.tag_to_string(p)
                            break
                        p = p.previousSibling
                    articles.append({'title':title,'url':url,'description':desc,'date':date})
                    if len(articles) >= max_articles:
                        break
            if articles:
                feeds.append((cat_name, articles))
        return feeds
--- a/recipes/metro_news_nl.recipe
+++ b/recipes/metro_news_nl.recipe
@ -1,95 +1,117 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 from calibre.utils.magick import Image
 from BeautifulSoup import BeautifulSoup
 try:
    from calibre_plugins.drMerry.debug import debuglogger as mlog
    print 'drMerry debuglogger found, debug options can be used'
    from calibre_plugins.drMerry.stats import statslogger as mstat
    print 'drMerry stats tracker found, stat can be tracked'
    mlog.setLoglevel(1) #-1 == no log; 0 for normal output
    mstat.calculateStats(False) #track stats (to track stats loglevel must be > 0
    KEEPSTATS = mstat.keepmystats()
    SHOWDEBUG0 = mlog.showdebuglevel(0)
    SHOWDEBUG1 = mlog.showdebuglevel(1)
    SHOWDEBUG2 = mlog.showdebuglevel(2)
 except:
    #print 'drMerry debuglogger not found, skipping debug options'
    SHOWDEBUG0 = False
    SHOWDEBUG1 = False
    SHOWDEBUG2 = False
    KEEPSTATS = False
 #print ('level0: %s\nlevel1: %s\nlevel2: %s' % (SHOWDEBUG0,SHOWDEBUG1,SHOWDEBUG2))
 ''' Version 1.2, updated cover image to match the changed website.
 added info date on title
 version 1.4 Updated tags, delay and added autoclean 22-09-2011
 version 1.5 Changes due to changes in site
 version 1.6 Added css, removed auto cleanup, added buitenland section, added use_embedded_content, added remove_attributes
-    Added som processing on pictures
+    Added some processing on pictures
    Removed links in html
    Removed extre white characters
    changed handling of self closing span
- '''
+ Version 1.7 11-11-2011 Changed oldest_article back to 1.5
    changed è into &egrave;
    updated remove tags
    removed keep_only tags
 Version 1.8 26-11-2022
   added remove tag: article-slideshow
 '''
 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title = u'Metro Nieuws NL'
-    oldest_article = 2
+    oldest_article = 10
-    max_articles_per_feed = 100
+    max_articles_per_feed = 15
    __author__     = u'DrMerry'
    description    = u'Metro Nederland'
    language       = u'nl'
    simultaneous_downloads = 5
-    #delay          = 1
+    masthead_url = 'http://blog.metronieuws.nl/wp-content/themes/metro/images/header.gif'
-    #auto_cleanup = True
+    timeout = 2
-    #auto_cleanup_keep = '//div[@class="article-image-caption-2column"]/*|//div[@id="date"]/*|//div[@class="article-image-caption-3column"]/*'
+    center_navbar  = True
    timefmt        = ' [%A, %d %b %Y]'
    no_stylesheets = True
    remove_javascript = True
    remove_empty_feeds = True
    cover_url      = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
    publication_type = 'newspaper'
    remove_tags_before = dict(name='div', attrs={'id':'date'})
    remove_tags_after = dict(name='div', attrs={'class':'article-body'})
    encoding              = 'utf-8'
    remove_attributes = ['style', 'font', 'width', 'height']
    use_embedded_content = False
    conversion_options = {
        'authors'        : 'Metro Nederland & calibre & DrMerry',
        'author_sort'    : 'Metro Nederland & calibre & DrMerry',
        'publisher'      : 'DrMerry/Metro Nederland'
    }
    extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
-        #date {clear: both;margin-left: 19px;font-size: 11px;font-weight: 300;color: #616262;height: 15px;}\
+        #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {clear: both;margin-bottom: 10px;font-size:0.5em; color: #616262;}\
-        .article-box-fact.module-title {clear:both;border-top:1px solid black;border-bottom:4px solid black;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
+        .article-box-fact.module-title {clear:both;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
-        h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;line-height: 1.15;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
+        h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
-        .article-body p{padding-bottom:10px;}div.column-1-3{float: left;display: inline;width: 567px;margin-left: 19px;border-right: 1px solid #CACACA;padding-right: 9px;}\
+        .article-body p{padding-bottom:10px;}div.column-1-3{margin-left: 19px;padding-right: 9px;}\
-        div.column-1-2 {float: left;display: inline;width: 373px;padding-right: 7px;border-right: 1px solid #CACACA;}\
+        div.column-1-2 {display: inline;padding-right: 7px;}\
-        p.article-image-caption {font-size: 12px;font-weight: 300;line-height: 1.4;color: #616262;margin-top: 5px;} \
+        p.article-image-caption {font-size: 12px;font-weight: 300;color: #616262;margin-top: 5px;} \
        p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
        div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
        div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
-        img {border:0px;} .img-mask {position:absolute;top:0px;left:0px;}'
+        img {border:0px; padding:2px;} hr.merryhr {width:30%;  border-width:0px; color:green; margin-left:5px; background-color: green} div.column-3 {background-color:#eee; width:50%; margin:2px; float:right; padding:2px;} div.column-3 module-title {border: 1px solid #aaa} div.article-box-fact div.subtitle {font-weight:bold; color:green;}'
-    keep_only_tags = [dict(name='div', attrs={'class':[ 'article-image-caption-2column', 'article-image-caption-3column', 'article-body', 'article-box-fact']}),
+    preprocess_regexps = [
-        dict(name='div', attrs={'id':['date']}),
+        (re.compile(r'<img[^>]+top-line[^>]+>', re.DOTALL|re.IGNORECASE),
-        dict(name='h1', attrs={'class':['title']}),
+        lambda match: '<hr class="merryhr" />'),
-        dict(name='h2', attrs={'class':['subtitle']})]
+        (re.compile(r'(<img[^>]+metronieuws\.nl/[^>]+/templates/[^>]+jpe?g[^>]+>|metronieuws\.nl/internal\-roxen\-unit\.gif)', re.DOTALL|re.IGNORECASE),
-
+        lambda match: ''),
    remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
        'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links',
        'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', 'article-page-auto-pushes', 'footer-edit']}),
        dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar', 'share-and-byline']}),
        dict(name='iframe')]
    preprocess_regexps = [(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>|<!--.*?-->)', re.DOTALL|re.IGNORECASE),lambda match: ''),
        (re.compile(r'(&nbsp;|\s\s)+\s*', re.DOTALL|re.IGNORECASE),lambda match: ' '),
        (re.compile(r'([\s>])([^\s>]+)(<span[^>]+) />', re.DOTALL|re.IGNORECASE),
            lambda match: match.group(1) + match.group(3) + '>' + match.group(2) + '</span>'),
        ]
    def preprocess_html(self, soup):
        if SHOWDEBUG0 == True:
            mlog.setdefaults()
            mlog.addTextAndTag(['Show debug = on with level'], [str(mlog.debuglevel)])
            if KEEPSTATS == True:
                mlog.addDebug('Stats will be calculated')
            else:
                mlog.addTextAndTag(['Stats won\'t be calculated\nTo be enabled, stats must be true, currently','and debug level must be 1 or higher, currently'],[mstat.dokeepmystats, mlog.debuglevel])
            mlog.showDebug()
        myProcess = MerryProcess()
        myProcess.removeUnwantedTags(soup)
        return soup
    def postprocess_html(self, soup, first):
-        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
+        myProcess = MerryProcess()
-            iurl = tag['src']
+        myProcess.optimizeLayout(soup)
-            img = Image()
+        if SHOWDEBUG0 == True:
-            img.open(iurl)
+            if KEEPSTATS == True:
-        #width, height = img.size
+                statinfo = 'generated stats:'
-        #print '***img is: ', iurl, '\n****width is: ', width, 'height is: ', height
+                statinfo += str(mstat.stats(mstat.statslist))
-            img.trim(0)
+                print statinfo
-            img.save(iurl)
+                statinfo = 'generated stats (for removed tags):'
-            '''
+                statinfo += str(mstat.stats(mstat.removedtagslist))
-            #width, height = img.size
+                print statinfo
-            #print '***TRIMMED img width is: ', width, 'height is: ', height
+            #show all Debug info we forgot to report
-            left=0
+            #Using print to be sure that this text will not be added at the end of the log.
-            top=0
+            print '\n!!!!!unreported messages:\n(should be empty)\n'
-            border_color='#ffffff'
+            mlog.showDebug()
            width, height = img.size
            #print '***retrieved img width is: ', width, 'height is: ', height
            height_correction = 1.17
            canvas = create_canvas(width, height*height_correction,border_color)
            canvas.compose(img, left, top)
            #img = canvas
            canvas.save(iurl)
            #width, height = canvas.size
            #print '***NEW img width is: ', width, 'height is: ', height
            '''
        return soup
    feeds = [
@ -105,6 +127,291 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
        (u'Familie', u'http://www.metronieuws.nl/rss.xml?c=1283166782-9'),
        (u'Blogs', u'http://www.metronieuws.nl/rss.xml?c=1295586825-6'),
        (u'Reizen', u'http://www.metronieuws.nl/rss.xml?c=1277377288-13'),
-        (u'Carrière', u'http://www.metronieuws.nl/rss.xml?c=1278070988-1'),
+        (u'Carri&egrave;re', u'http://www.metronieuws.nl/rss.xml?c=1278070988-1'),
        (u'Sport', u'http://www.metronieuws.nl/rss.xml?c=1277377288-12')
        ]
 class MerryPreProcess():
    def replacePictures(self, soup):
        #to be implemented
        return soup
    def optimizePicture(self,soup):
        if SHOWDEBUG0 == True:
            mlog.addDebug('start image optimize')
        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
            iurl = tag['src']
            img = Image()
            img.open(iurl)
            img.trim(0)
            img.save(iurl)
        if SHOWDEBUG0 == True:
            mlog.addDebug('Images optimized')
            mlog.showDebug()
        return soup
 class MerryExtract():
    def safeRemovePart(self, killingSoup, soupIsArray):
        if killingSoup and not killingSoup == None:
            if SHOWDEBUG2 == True:
                mlog.addTextAndTag(['items to remove'],[killingSoup])
            try:
                if soupIsArray == True:
                    for killer in killingSoup:
                        killer.extract()
                else:
                    killingSoup.extract()
                if SHOWDEBUG1 == True:
                    mlog.addDebug('tag extracted')
                    mlog.showDebug()
                    if KEEPSTATS == True:
                        try:
                            mstat.addstat(mstat.removedtagslist,str(killingSoup.name))
                        except:
                            mstat.addstat(mstat.removedtagslist,'unknown')
            except:
                if SHOWDEBUG1 == True:
                    mlog.addDebug('tag extraction failed')
                    mlog.showDebug()
                    if KEEPSTATS == True:
                        mstat.addstat(mstat.removedtagslist,'exception')
                return False
        else:
            return False
        return killingSoup
 class MerryReplace():
    myKiller = MerryExtract()
    def replaceATag(self, soup):
        anchors = []
        anchors = soup.findAll('a')
        if anchors and not (anchors == None or anchors == []):
          try:
            for link in anchors:
                # print str(link)
                if link and not link == None:
                    # print ('type: %s'%(str(type(link))))
                    # print ('link: %s' % (link))
                    myParent = link.parent
                    # print str('parent: %s'%(myParent))
                    try:
                        myIndex = link.parent.index(link)
                        hasIndex = True
                    except:
                        myIndex = 0
                        hasIndex = False
                    # print str('index %s'%(myIndex))
                    if not link.string == None:
                        # print 'link=notnone'
                        if hasIndex == True:
                            myParent.insert(myIndex, link.string)
                        else:
                            myParent.append(link.string)
                    else:
                        # print 'link=none'
                        myParent.insert(myIndex, link.contents)
                    self.myKiller.safeRemovePart(link, False)
                else:
                     notshown = 'tag received is empty' # print
          except:
            notshown = 'tag received is empty' # print
            notshown
        return soup
 class MerryProcess(BeautifulSoup):
    myKiller = MerryExtract()
    myReplacer = MerryReplace()
    myPrepare = MerryPreProcess()
    def optimizeLayout(self,soup):
        self.myPrepare.optimizePicture(soup)
        if SHOWDEBUG0 == True:
            mlog.addDebug('End of Optimize Layout')
            mlog.showDebug()
        return soup
    def insertFacts(self, soup):
        allfacts = soup.findAll('div', {'class':re.compile('^article-box-fact.*$')})
        if SHOWDEBUG0 == True:
            mlog.addTextAndTag(['allfacts'],[allfacts])
            mlog.showDebug()
        if allfacts and not allfacts == None:
            allfactsparent = soup.find('div', {'class':re.compile('^article-box-fact.*$')}).parent
            if SHOWDEBUG0 == True:
                mlog.addTextAndTag(['allfactsparent'],[allfactsparent])
                mlog.showDebug()
            for part in allfactsparent:
                if not part in allfacts:
                    if SHOWDEBUG0 == True:
                        mlog.addTextAndTag(['FOUND A non-fact'],[part])
                        mlog.showDebug()
                    self.myKiller.safeRemovePart(part, True)
            if SHOWDEBUG1 == True:
                mlog.addTextAndTag(['New All Facts'],[allfacts])
                mlog.showDebug()
        articlefacts = soup.find('div', {'class':'article-box-fact column'})
        errorOccured=False
        if (articlefacts and not articlefacts==None):
          try:
            contenttag = soup.find('div', {'class':'article-body'})
            if SHOWDEBUG0 == True:
                mlog.addTextAndTag(['curcontag'],[contenttag])
                mlog.showDebug()
            foundrighttag = False
            if contenttag and not contenttag == None:
                foundrighttag = True
            if SHOWDEBUG0 == True:
                if errorOccured == False:
                    mlog.addTextAndTag(['type','curcontag (in while)'],[type(contenttag),contenttag])
                else:
                    mlog.addDebug('Could not find right parent tag. Error Occured')
                mlog.showDebug()
            if foundrighttag == True:
                contenttag.insert(0, allfactsparent)
                if SHOWDEBUG2 == True:
                    mlog.addTextAndTag(['added parent'],[soup.prettify()])
                    mlog.showDebug()
          except:
            errorOccured=True
            mlog.addTrace()
        else:
            errorOccured=True
        if SHOWDEBUG0 == True and errorOccured == True:
            mlog.addTextAndTag(['no articlefacts'],[articlefacts])
            mlog.showDebug()
        return soup
    def previousNextSibRemover(self, soup, previous=True, soupIsArray=False):
        findsibsof = soup
        firstpart = previous
        if findsibsof and not findsibsof == None:
            if soupIsArray == True:
                for foundsib in findsibsof:
                    self.previousNextSibRemover(foundsib, firstpart, soupIsArray=False)
            else:
                if firstpart == True and soupIsArray == False:
                    sibs = findsibsof.previousSiblingGenerator()
                else:
                    sibs = findsibsof.nextSiblingGenerator()
                for sib in sibs:
                    self.myKiller.safeRemovePart(sib, True)
        else:
            if SHOWDEBUG1 == True:
                mlog.addDebug('Not any sib found')
        return
    def removeUnwantedTags(self,soup):
        if SHOWDEBUG1 == True:
            mlog.addTextAndTag(['Len of Soup before RemoveTagsByName'],[len(str(soup))])
            mlog.showDebug()
        self.removeTagsByName(soup)
        if SHOWDEBUG1 == True:
            mlog.addDebug('Len of Soup before firstandlastpart: %s' % len(str(soup)))
            mlog.showDebug()
        self.insertFacts(soup)
        self.removeFirstAndLastPart(soup)
        if SHOWDEBUG1 == True:
            mlog.addDebug('Len of Soup before unwantedpart: %s' % len(str(soup)))
            mlog.showDebug()
        self.removeUnwantedParts(soup)
        if SHOWDEBUG1 == True:
            mlog.addDebug('Len of Soup before EmptyParts: %s' % len(str(soup)))
            mlog.showDebug()
        self.removeEmptyTags(soup)
        if SHOWDEBUG1 == True:
            mlog.addDebug('Len of Soup after EmptyParts: %s' % len(str(soup)))
            mlog.showDebug()
        self.myReplacer.replaceATag(soup)
        return soup
    def removeUnwantedParts(self, soup):
        if SHOWDEBUG1 == True:
            mlog.addDebug('Len of Soup before UnwantedID: %s' % len(str(soup)))
            mlog.showDebug()
        self.removeUnwantedTagsByID(soup)
        if SHOWDEBUG1 == True:
            mlog.addDebug('Len of Soup before Class: %s' % len(str(soup)))
            mlog.showDebug()
        self.removeUnwantedTagsByClass(soup)
        if SHOWDEBUG1 == True:
            mlog.addDebug('Len of Soup before Style: %s' % len(str(soup)))
            mlog.showDebug()
        self.removeUnwantedTagsByStyle(soup)
        return soup
    def removeUnwantedTagsByStyle(self,soup):
        self.removeArrayOfTags(soup.findAll(attrs={'style' : re.compile("^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$")}))
        if SHOWDEBUG0 == True:
            mlog.addDebug('end remove by style')
        return soup
    def removeArrayOfTags(self,souparray):
        return self.myKiller.safeRemovePart(souparray, True)
    def removeUnwantedTagsByClass(self,soup):
        if SHOWDEBUG0 == True:
            mlog.addDebug('start remove by class')
        self.removeArrayOfTags(soup.findAll("div", { "class" :re.compile('^(promo.*?|article-tools-below-title|metroCommentFormWrap|ad|share-tools|tools|header-links|related-links|padding-top-15)$')}))
        return soup
    def removeUnwantedTagsByID(self,soup):
        defaultids = ['footer-extra',re.compile('^ad(\d+|adcomp.*?)?$'),'column-4-5','navigation','header',re.compile('^column-1-5-(top|bottom)$'),'footer','hidden_div','sidebar',re.compile('^article-\d$'),'comments','footer']
        for removeid in defaultids:
            if SHOWDEBUG1 == True:
                mlog.addDebug('RemoveTagByID, tag: %s, Len of Soup: %s' % (str(removeid), len(str(soup))))
                mlog.showDebug()
            self.removeArrayOfTags(soup.findAll(id=removeid))
        return soup
    # def safeRemoveTag(self, subtree):
        # return self.myKiller.safeRemovePart(subtree, True)
    def removeTagsByName(self, soup):
        self.myKiller.safeRemovePart(soup.script, True)
        self.myKiller.safeRemovePart(soup.iframe, True)
        self.myKiller.safeRemovePart(soup.style, True)
        self.myKiller.safeRemovePart(soup.noscript, True)
        return soup
    def removeEmptyTags(self,soup,run=0):
        if SHOWDEBUG0 == True:
            mlog.addDebug('starting removeEmptyTags')
            if SHOWDEBUG1 == True:
                run += 1
                mlog.addDebug(run)
                if SHOWDEBUG2 == True:
                    mlog.addDebug(str(soup.prettify()))
            mlog.showDebug()
        emptymatches = re.compile('^(&nbsp;|\s|\n|\r|\t)*$')
        emptytags = soup.findAll(lambda tag: tag.find(True) is None and (tag.string is None or tag.string.strip()=="" or tag.string.strip()==emptymatches) and not tag.isSelfClosing)
        if emptytags and not (emptytags == None or emptytags == []):
            if SHOWDEBUG1 == True:
                mlog.addDebug('tags found')
                mlog.addDebug(str(emptytags))
            self.removeArrayOfTags(emptytags)
            #recursive in case removing empty tag creates new empty tag
            self.removeEmptyTags(soup, run=run)
        else:
            if SHOWDEBUG1 == True:
                mlog.addDebug('no empty tags found')
                mlog.showDebug()
        if SHOWDEBUG0 == True:
            if SHOWDEBUG2 == True:
                mlog.addDebug('new soup:')
                mlog.addDebug(str(soup.prettify()))
            mlog.addDebug('RemoveEmptyTags Completed')
            mlog.showDebug()
        return soup
    def removeFirstAndLastPart(self,soup):
        def findparenttag(lookuptag):
            if lookuptag and not lookuptag == None:
                return lookuptag.findParents()
        findtag = soup.find(id="date")
        self.previousNextSibRemover(findtag, previous=True, soupIsArray=False)
        self.previousNextSibRemover(findparenttag(findtag), previous=True, soupIsArray=True)
        for endtag in [soup.find(id="share-and-byline"), soup.find("div", { "class" : "gallery-text" })]:
            self.previousNextSibRemover(endtag, previous=False, soupIsArray=False)
            self.previousNextSibRemover(findparenttag(endtag), previous=False, soupIsArray=True)
        return soup
--- a/recipes/metro_uk.recipe
+++ b/recipes/metro_uk.recipe
@ -5,8 +5,8 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    description = 'News as provide by The Metro -UK'
    __author__ = 'Dave Asbury'
    #last update 3/12/11
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
    no_stylesheets = True
    oldest_article = 1
    max_articles_per_feed = 20
@ -26,15 +26,17 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    keep_only_tags = [
-	dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
+    dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
                    dict(attrs={'class':['img-cnt figure']}),
-    	dict(attrs={'class':['art-img']}),
+        dict(attrs={'class':['art-img']}),
                    dict(name='div', attrs={'class':'art-lft'}),
                    dict(name='p')
    ]
-    remove_tags    = [dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
+    remove_tags    = [
-                             'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]}),
+                             dict(name = 'div',attrs={'id' : ['comments-news','formSubmission']}),
-	          dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']})
+                             dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
                             'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r','username','clrd' ]}),
              dict(attrs={'class':['username', 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime','addYourComment','displayName']})
                              ,dict(name='div', attrs={'class' : 'clrd art-fd fd-gr1-b'})
                               ]
    feeds          = [
@ -42,9 +44,9 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    extra_css  = '''
                    body {font: sans-serif medium;}'
-	h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
+    h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
-               	h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
+                h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
-                	span{ font-size:9.5px; font-weight:bold;font-style:italic}
+                    span{ font-size:9.5px; font-weight:bold;font-style:italic}
                    p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
-	 '''
+     '''
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@ -10,6 +10,10 @@ __MakePeriodical__ = True
 __UseChineseTitle__ = False
 # Set it to False if you want to skip images (Default: True)
 __KeepImages__ = True
 # Set it to True if you want to include a summary in Kindle's article view (Default: False)
 __IncludeSummary__ = False
 # Set it to True if you want thumbnail images in Kindle's article view (Default: True)
 __IncludeThumbnails__ = True
 # (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
 __UseLife__ = True
 # (HK only) It is to disable premium content (Default: False)
@ -24,6 +28,10 @@ __Date__ = ''
 '''
 Change Log:
 2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
            from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
            download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
 2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
 2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
 2011/10/19: fix a bug in txt source parsing
 2011/10/17: disable fetching of premium content, also improved txt source parsing
@ -52,6 +60,7 @@ Change Log:
 2010/10/31: skip repeated articles in section pages
 '''
 from calibre.utils.date import now as nowf
 import os, datetime, re, mechanize
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
@ -59,11 +68,15 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.localization import canonicalize_lang
 # MAIN CLASS
 class MPRecipe(BasicNewsRecipe):
    if __Region__ == 'Hong Kong':
-        title       = 'Ming Pao - Hong Kong'
+        if __UseChineseTitle__ == True:
            title = u'\u660e\u5831 (\u9999\u6e2f)'
        else:
            title   = 'Ming Pao - Hong Kong'
        description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
        category    = 'Chinese, News, Hong Kong'
        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
@ -108,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
                              lambda match: "</b>")
                             ]
    elif __Region__ == 'Vancouver':
-        title       = 'Ming Pao - Vancouver'
+        if __UseChineseTitle__ == True:
            title   = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
        else:
            title   = 'Ming Pao - Vancouver'
        description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
        category    = 'Chinese, News, Vancouver'
        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -126,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
                              lambda match: ''),
                             ]
    elif __Region__ == 'Toronto':
-        title       = 'Ming Pao - Toronto'
+        if __UseChineseTitle__ == True:
            title   = u'\u660e\u5831 (\u591a\u502b\u591a)'
        else:
            title   = 'Ming Pao - Toronto'
        description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
        category    = 'Chinese, News, Toronto'
        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -160,9 +179,9 @@ class MPRecipe(BasicNewsRecipe):
    def get_dtlocal(self):
        dt_utc = datetime.datetime.utcnow()
        if __Region__ == 'Hong Kong':
-            # convert UTC to local hk time - at HKT 5.30am, all news are available
+            # convert UTC to local hk time - at HKT 4.30am, all news are available
-            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
+            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
-            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
+            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
        elif __Region__ == 'Vancouver':
            # convert UTC to local Vancouver time - at PST time 5.30am, all news are available
            dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
@ -185,6 +204,18 @@ class MPRecipe(BasicNewsRecipe):
        else:
            return self.get_dtlocal().strftime("%Y-%m-%d")
    def get_fetchyear(self):
        if __Date__ <> '':
            return __Date__[0:4]
        else:
            return self.get_dtlocal().strftime("%Y")
    def get_fetchmonth(self):
        if __Date__ <> '':
            return __Date__[4:6]
        else:
            return self.get_dtlocal().strftime("%m")
    def get_fetchday(self):
        if __Date__ <> '':
            return __Date__[6:8]
@ -533,12 +564,22 @@ class MPRecipe(BasicNewsRecipe):
                new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
                next_is_img_txt = False
                title_started = False
                title_break_reached = False
                met_article_start_char = False
                for item in splitter.split(raw_html):
                    item = item.strip()
-                    if item.startswith(u'\u3010'):
+                    # if title already reached but break between title and content not yet found, record title_break_reached
-                        met_article_start_char = True
+                    if title_started == True and title_break_reached == False and item == '':
-                        new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
+                        title_break_reached = True
                    # if title reached and title_break_reached and met_article_start_char == False and item is not empty
                    # start content
                    elif title_started == True and title_break_reached == True and met_article_start_char == False:
                        if item <> '':
                            met_article_start_char = True
                            new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
                    #if item.startswith(u'\u3010'):
                    #    met_article_start_char = True
                    #    new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
                    else:
                        if next_is_img_txt == False:
                            if item.startswith("=@"):
@ -643,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
            del item['absmiddle']
        return soup
    def populate_article_metadata(self, article, soup, first):
        # thumbnails shouldn't be available if using hi-res images
        if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
            img = soup.find('img')
            if img is not None:
                self.add_toc_thumbnail(article, img['src'])
        try:
            if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
                # look for content
                articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
                if not articlebodies:
                    articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
                if not articlebodies:
                    articlebodies = soup.findAll('div',attrs={'class':'content'})
                if not articlebodies:
                    articlebodies = soup.findAll('div', attrs={'id':'font'})
                if articlebodies:
                    for articlebody in articlebodies:
                        if articlebody:
                            # the text may or may not be enclosed in <p></p> tag
                            paras = articlebody.findAll('p')
                            if not paras:
                            	paras = articlebody
                            textFound = False
                            for p in paras:
                                if not textFound:
                                    summary_candidate = self.tag_to_string(p).strip()
                                    summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
                                    if len(summary_candidate) > 0:
                                        article.summary = article.text_summary = summary_candidate
                                        textFound = True
            else:
                # display a simple text
                #article.summary = article.text_summary = u'\u66f4\u591a......'
                # display word counts
                counts = 0
                articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
                if not articlebodies:
                    articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
                if not articlebodies:
                    articlebodies = soup.findAll('div',attrs={'class':'content'})
                if not articlebodies:
                    articlebodies = soup.findAll('div', attrs={'id':'font'})
                if articlebodies:
                    for articlebody in articlebodies:
                        # the text may or may not be enclosed in <p></p> tag
                        paras = articlebody.findAll('p')
                        if not paras:
                            paras = articlebody
                        for p in paras:
                            summary_candidate = self.tag_to_string(p).strip()
                            counts += len(summary_candidate)
                    article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
        except:
            self.log("Error creating article descriptions")
            return
    # override from the one in version 0.8.31
    def create_opf(self, feeds, dir=None):
        if dir is None:
            dir = self.output_dir
-        if __UseChineseTitle__ == True:
+        title = self.short_title()
-            if __Region__ == 'Hong Kong':
+        # change 1: allow our own flag to tell if a periodical is to be generated
-                title = u'\u660e\u5831 (\u9999\u6e2f)'
+        # also use customed date instead of current time
-            elif __Region__ == 'Vancouver':
+        if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
                title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
            elif __Region__ == 'Toronto':
                title = u'\u660e\u5831 (\u591a\u502b\u591a)'
        else:
            title = self.short_title()
        # if not generating a periodical, force date to apply in title
        if __MakePeriodical__ == False:
            title = title + ' ' + self.get_fetchformatteddate()
-        if True:
+        # end of change 1
-            mi = MetaInformation(title, [self.publisher])
+        # change 2: __appname__ replaced by newspaper publisher
-            mi.publisher = self.publisher
+        __appname__ = self.publisher
-            mi.author_sort = self.publisher
+        mi = MetaInformation(title, [__appname__])
-            if __MakePeriodical__ == True:
+        mi.publisher = __appname__
-                mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        mi.author_sort = __appname__
-            else:
+        # change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
-                mi.publication_type = self.publication_type+':'+self.short_title()
+        if __MakePeriodical__ == True:
-            #mi.timestamp = nowf()
+            mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
-            mi.timestamp = self.get_dtlocal()
+        else:
-            mi.comments = self.description
+            mi.publication_type = self.publication_type+':'+self.short_title()
-            if not isinstance(mi.comments, unicode):
+        #mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
-                mi.comments = mi.comments.decode('utf-8', 'replace')
+        # change 4: in the following, all the nowf() are changed to adjusted time
-            #mi.pubdate = nowf()
+        # This one doesn't matter
-            mi.pubdate = self.get_dtlocal()
+        mi.timestamp = nowf()
-            opf_path = os.path.join(dir, 'index.opf')
+        # change 5: skip listing the articles
-            ncx_path = os.path.join(dir, 'index.ncx')
+        #article_titles, aseen = [], set()
-            opf = OPFCreator(dir, mi)
+        #for f in feeds:
-            # Add mastheadImage entry to <guide> section
+        #    for a in f:
-            mp = getattr(self, 'masthead_path', None)
+        #        if a.title and a.title not in aseen:
-            if mp is not None and os.access(mp, os.R_OK):
+        #            aseen.add(a.title)
-                from calibre.ebooks.metadata.opf2 import Guide
+        #            article_titles.append(force_unicode(a.title, 'utf-8'))
                ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
                ref.type = 'masthead'
                ref.title = 'Masthead Image'
                opf.guide.append(ref)
-            manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+        #mi.comments = self.description
-            manifest.append(os.path.join(dir, 'index.html'))
+        #if not isinstance(mi.comments, unicode):
-            manifest.append(os.path.join(dir, 'index.ncx'))
+        #    mi.comments = mi.comments.decode('utf-8', 'replace')
        #mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
        #        '\n\n'.join(article_titles))
-            # Get cover
+        language = canonicalize_lang(self.language)
-            cpath = getattr(self, 'cover_path', None)
+        if language is not None:
-            if cpath is None:
+            mi.language = language
-                pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+        # This one affects the pub date shown in kindle title
-                if self.default_cover(pf):
+        #mi.pubdate = nowf()
-                    cpath =  pf.name
+        # now appears to need the time field to be > 12.00noon as well
-            if cpath is not None and os.access(cpath, os.R_OK):
+        mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
-                opf.cover = cpath
+        opf_path = os.path.join(dir, 'index.opf')
-                manifest.append(cpath)
+        ncx_path = os.path.join(dir, 'index.ncx')
-            # Get masthead
+        opf = OPFCreator(dir, mi)
-            mpath = getattr(self, 'masthead_path', None)
+        # Add mastheadImage entry to <guide> section
-            if mpath is not None and os.access(mpath, os.R_OK):
+        mp = getattr(self, 'masthead_path', None)
-                manifest.append(mpath)
+        if mp is not None and os.access(mp, os.R_OK):
            from calibre.ebooks.metadata.opf2 import Guide
            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
            ref.type = 'masthead'
            ref.title = 'Masthead Image'
            opf.guide.append(ref)
        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
        manifest.append(os.path.join(dir, 'index.html'))
        manifest.append(os.path.join(dir, 'index.ncx'))
        # Get cover
        cpath = getattr(self, 'cover_path', None)
        if cpath is None:
            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
            if self.default_cover(pf):
                cpath =  pf.name
        if cpath is not None and os.access(cpath, os.R_OK):
            opf.cover = cpath
            manifest.append(cpath)
        # Get masthead
        mpath = getattr(self, 'masthead_path', None)
        if mpath is not None and os.access(mpath, os.R_OK):
            manifest.append(mpath)
        opf.create_manifest_from_files_in(manifest)
        for mani in opf.manifest:
            if mani.path.endswith('.ncx'):
                mani.id = 'ncx'
            if mani.path.endswith('mastheadImage.jpg'):
                mani.id = 'masthead-image'
        entries = ['index.html']
        toc = TOC(base_path=dir)
        self.play_order_counter = 0
        self.play_order_map = {}
            opf.create_manifest_from_files_in(manifest)
            for mani in opf.manifest:
                if mani.path.endswith('.ncx'):
                    mani.id = 'ncx'
                if mani.path.endswith('mastheadImage.jpg'):
                    mani.id = 'masthead-image'
            entries = ['index.html']
            toc = TOC(base_path=dir)
            self.play_order_counter = 0
            self.play_order_map = {}
        def feed_index(num, parent):
            f = feeds[num]
@ -728,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
                        desc = None
                    else:
                        desc = self.description_limiter(desc)
                    tt = a.toc_thumbnail if a.toc_thumbnail else None
                    entries.append('%sindex.html'%adir)
                    po = self.play_order_map.get(entries[-1], None)
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
-                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+                    parent.add_item('%sindex.html'%adir, None,
-                                    play_order=po, author=auth, description=desc)
+                            a.title if a.title else _('Untitled Article'),
                            play_order=po, author=auth,
                            description=desc, toc_thumbnail=tt)
                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
                        prefix = os.path.commonprefix([opf_path, sp])
@ -751,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
                            templ = self.navbar.generate(True, num, j, len(f),
                                            not self.has_single_feed,
-                                            a.orig_url, self.publisher, prefix=prefix,
+                                            a.orig_url, __appname__, prefix=prefix,
                                            center=self.center_navbar)
                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
                            body.insert(len(body.contents), elem)
@ -774,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
                if not desc:
                    desc = None
                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
-                           f.title, play_order=po, description=desc, author=auth))
+                    f.title, play_order=po, description=desc, author=auth))
        else:
            entries.append('feed_%d/index.html'%0)
@ -787,3 +907,5 @@ class MPRecipe(BasicNewsRecipe):
        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)
--- a/recipes/ming_pao_toronto.recipe
+++ b/recipes/ming_pao_toronto.recipe
@ -4,18 +4,41 @@ __copyright__ = '2010-2011, Eddie Lau'
 # Region - Hong Kong, Vancouver, Toronto
 __Region__ = 'Toronto'
 # Users of Kindle 3 with limited system-level CJK support
-# please replace the following "True" with "False".
+# please replace the following "True" with "False". (Default: True)
 __MakePeriodical__ = True
-# Turn below to true if your device supports display of CJK titles
+# Turn below to True if your device supports display of CJK titles (Default: False)
 __UseChineseTitle__ = False
-# Set it to False if you want to skip images
+# Set it to False if you want to skip images (Default: True)
 __KeepImages__ = True
-# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
+# Set it to True if you want to include a summary in Kindle's article view (Default: False)
 __IncludeSummary__ = False
 # Set it to True if you want thumbnail images in Kindle's article view (Default: True)
 __IncludeThumbnails__ = True
 # (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
 __UseLife__ = True
 # (HK only) It is to disable premium content (Default: False)
 __InclPremium__ = False
 # (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
 __ParsePFF__ = True
 # (HK only) Turn below to True if you wish hi-res images (Default: False)
 __HiResImg__ = False
 # Override the date returned by the program if specifying a YYYYMMDD below
 __Date__ = ''
 '''
 Change Log:
 2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
            from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
            download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
 2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
 2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
 2011/10/19: fix a bug in txt source parsing
 2011/10/17: disable fetching of premium content, also improved txt source parsing
 2011/10/04: option to get hi-res photos for the articles
 2011/09/21: fetching "column" section is made optional.
 2011/09/18: parse "column" section stuff from source text file directly.
 2011/09/07: disable "column" section as it is no longer offered free.
 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
            provide options to remove all images in the file
 2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
@ -37,30 +60,38 @@ Change Log:
 2010/10/31: skip repeated articles in section pages
 '''
-import os, datetime, re
+from calibre.utils.date import now as nowf
 import os, datetime, re, mechanize
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.localization import canonicalize_lang
 # MAIN CLASS
 class MPRecipe(BasicNewsRecipe):
    if __Region__ == 'Hong Kong':
-        title       = 'Ming Pao - Hong Kong'
+        if __UseChineseTitle__ == True:
            title = u'\u660e\u5831 (\u9999\u6e2f)'
        else:
            title   = 'Ming Pao - Hong Kong'
        description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
        category    = 'Chinese, News, Hong Kong'
-        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
+        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
        masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
        keep_only_tags = [dict(name='h1'),
                          dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
                          dict(name='font', attrs={'color':['AA0000']}), # for column articles title
                          dict(attrs={'class':['heading']}),  # for heading from txt
                          dict(attrs={'id':['newscontent']}), # entertainment and column page content
                          dict(attrs={'id':['newscontent01','newscontent02']}),
                          dict(attrs={'class':['content']}),  # for content from txt
                          dict(attrs={'class':['photo']}),
                          dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}),  # content in printed version of life.mingpao.com
-                          dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
+                          dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
                          dict(attrs={'class':['images']})   # for images from txt
                          ]
        if __KeepImages__:
            remove_tags = [dict(name='style'),
@ -90,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
                              lambda match: "</b>")
                             ]
    elif __Region__ == 'Vancouver':
-        title       = 'Ming Pao - Vancouver'
+        if __UseChineseTitle__ == True:
            title   = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
        else:
            title   = 'Ming Pao - Vancouver'
        description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
        category    = 'Chinese, News, Vancouver'
        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -108,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
                              lambda match: ''),
                             ]
    elif __Region__ == 'Toronto':
-        title       = 'Ming Pao - Toronto'
+        if __UseChineseTitle__ == True:
            title   = u'\u660e\u5831 (\u591a\u502b\u591a)'
        else:
            title   = 'Ming Pao - Toronto'
        description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
        category    = 'Chinese, News, Toronto'
        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -139,49 +176,12 @@ class MPRecipe(BasicNewsRecipe):
    conversion_options = {'linearize_tables':True}
    timefmt = ''
    def image_url_processor(cls, baseurl, url):
        # trick: break the url at the first occurance of digit, add an additional
        # '_' at the front
        # not working, may need to move this to preprocess_html() method
 #        minIdx = 10000
 #        i0 = url.find('0')
 #        if i0 >= 0 and i0 < minIdx:
 #           minIdx = i0
 #        i1 = url.find('1')
 #        if i1 >= 0 and i1 < minIdx:
 #           minIdx = i1
 #        i2 = url.find('2')
 #        if i2 >= 0 and i2 < minIdx:
 #           minIdx = i2
 #        i3 = url.find('3')
 #        if i3 >= 0 and i0 < minIdx:
 #           minIdx = i3
 #        i4 = url.find('4')
 #        if i4 >= 0 and i4 < minIdx:
 #           minIdx = i4
 #        i5 = url.find('5')
 #        if i5 >= 0 and i5 < minIdx:
 #           minIdx = i5
 #        i6 = url.find('6')
 #        if i6 >= 0 and i6 < minIdx:
 #           minIdx = i6
 #        i7 = url.find('7')
 #        if i7 >= 0 and i7 < minIdx:
 #           minIdx = i7
 #        i8 = url.find('8')
 #        if i8 >= 0 and i8 < minIdx:
 #           minIdx = i8
 #        i9 = url.find('9')
 #        if i9 >= 0 and i9 < minIdx:
 #           minIdx = i9
        return url
    def get_dtlocal(self):
        dt_utc = datetime.datetime.utcnow()
        if __Region__ == 'Hong Kong':
-            # convert UTC to local hk time - at HKT 5.30am, all news are available
+            # convert UTC to local hk time - at HKT 4.30am, all news are available
-            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
+            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
-            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
+            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
        elif __Region__ == 'Vancouver':
            # convert UTC to local Vancouver time - at PST time 5.30am, all news are available
            dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
@ -193,13 +193,34 @@ class MPRecipe(BasicNewsRecipe):
        return dt_local
    def get_fetchdate(self):
-        return self.get_dtlocal().strftime("%Y%m%d")
+        if __Date__ <> '':
            return __Date__
        else:
            return self.get_dtlocal().strftime("%Y%m%d")
    def get_fetchformatteddate(self):
-        return self.get_dtlocal().strftime("%Y-%m-%d")
+        if __Date__ <> '':
            return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
        else:
            return self.get_dtlocal().strftime("%Y-%m-%d")
    def get_fetchyear(self):
        if __Date__ <> '':
            return __Date__[0:4]
        else:
            return self.get_dtlocal().strftime("%Y")
    def get_fetchmonth(self):
        if __Date__ <> '':
            return __Date__[4:6]
        else:
            return self.get_dtlocal().strftime("%m")
    def get_fetchday(self):
-        return self.get_dtlocal().strftime("%d")
+        if __Date__ <> '':
            return __Date__[6:8]
        else:
            return self.get_dtlocal().strftime("%d")
    def get_cover_url(self):
        if __Region__ == 'Hong Kong':
@ -230,12 +251,23 @@ class MPRecipe(BasicNewsRecipe):
                                           (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
                                           (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
                                           (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
-                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
+                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
-                                           (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
+                                          ]:
-                    articles = self.parse_section2(url, keystr)
+                    if __InclPremium__ == True:
                        articles = self.parse_section2_txt(url, keystr)
                    else:
                        articles = self.parse_section2(url, keystr)
                    if articles:
                        feeds.append((title, articles))
                if __InclPremium__ == True:
                    # parse column section articles directly from .txt files
                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
                                              ]:
                        articles = self.parse_section2_txt(url, keystr)
                        if articles:
                            feeds.append((title, articles))
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
                    articles = self.parse_section(url)
@ -244,15 +276,16 @@ class MPRecipe(BasicNewsRecipe):
            else:
                for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
                                   (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
-                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
+                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
                                   (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
                    articles = self.parse_section(url)
                    if articles:
                        feeds.append((title, articles))
                # special- editorial
-                ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
+                #ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
-                if ed_articles:
+                #if ed_articles:
-                    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
+                #    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
                for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
                                   (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
@ -263,20 +296,39 @@ class MPRecipe(BasicNewsRecipe):
                # special - finance
                #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
-                fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
+                #fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
-                if fin_articles:
+                #if fin_articles:
-                    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+                #    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
-                for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+                for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
-                                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
+                    articles = self.parse_section2_txt(url, keystr)
                    articles = self.parse_section(url)
                    if articles:
                        feeds.append((title, articles))
                #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
                #                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
                #    articles = self.parse_section(url)
                #    if articles:
                #        feeds.append((title, articles))
                # special - entertainment
-                ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+                #ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
-                if ent_articles:
+                #if ent_articles:
-                    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+                #    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
                for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
                                          ]:
                    articles = self.parse_section2_txt(url, keystr)
                    if articles:
                        feeds.append((title, articles))
                if __InclPremium__ == True:
                    # parse column section articles directly from .txt files
                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
                                              ]:
                        articles = self.parse_section2_txt(url, keystr)
                        if articles:
                            feeds.append((title, articles))
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -284,11 +336,6 @@ class MPRecipe(BasicNewsRecipe):
                    if articles:
                        feeds.append((title, articles))
                # special- columns
                col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
                if col_articles:
                    feeds.append((u'\u5c08\u6b04 Columns', col_articles))
        elif __Region__ == 'Vancouver':
            for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
                               (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
@ -332,6 +379,16 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(a)
            url = a.get('href', False)
            url = 'http://news.mingpao.com/' + dateStr + '/' +url
            # replace the url to the print-friendly version
            if __ParsePFF__ == True:
                if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
                    url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
                    url = re.sub('%2F.*%2F', '/', url)
                    title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
                    url = url.replace('%2Etxt', '_print.htm')
                    url = url.replace('%5F', '_')
                else:
                    url = url.replace('.htm', '_print.htm')
            if url not in included_urls and url.rfind('Redirect') == -1:
                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
                included_urls.append(url)
@ -340,6 +397,8 @@ class MPRecipe(BasicNewsRecipe):
    # parse from life.mingpao.com
    def parse_section2(self, url, keystr):
        br = mechanize.Browser()
        br.set_handle_redirect(False)
        self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href=True)
@ -350,7 +409,29 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(i)
            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
-                url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
+                try:
                    br.open_novisit(url)
                    url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
                    current_articles.append({'title': title, 'url': url, 'description': ''})
                    included_urls.append(url)
                except:
 				    print 'skipping a premium article'
        current_articles.reverse()
        return current_articles
    # parse from text file of life.mingpao.com
    def parse_section2_txt(self, url, keystr):
        self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href=True)
        a.reverse()
        current_articles = []
        included_urls = []
        for i in a:
            title = self.tag_to_string(i)
            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
                url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/')  # use printed version of the article
                current_articles.append({'title': title, 'url': url, 'description': ''})
                included_urls.append(url)
        current_articles.reverse()
@ -438,6 +519,162 @@ class MPRecipe(BasicNewsRecipe):
        current_articles.reverse()
        return current_articles
    # preprocess those .txt and javascript based files
    def preprocess_raw_html(self, raw_html, url):
        new_html = raw_html
        if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
            if url.rfind('_print.htm') <> -1:
                # javascript based file
                splitter = re.compile(r'\n')
                new_raw_html = '<html><head><title>Untitled</title></head>'
                new_raw_html = new_raw_html + '<body>'
                for item in splitter.split(raw_html):
                    if item.startswith('var heading1 ='):
                        heading = item.replace('var heading1 = \'', '')
                        heading = heading.replace('\'', '')
                        heading = heading.replace(';', '')
                        new_raw_html = new_raw_html + '<div class="heading">' + heading
                    if item.startswith('var heading2 ='):
                        heading = item.replace('var heading2 = \'', '')
                        heading = heading.replace('\'', '')
                        heading = heading.replace(';', '')
                        if heading <> '':
                            new_raw_html = new_raw_html + '<br>' + heading + '</div>'
                        else:
                            new_raw_html = new_raw_html + '</div>'
                    if item.startswith('var content ='):
                        content = item.replace("var content = ", '')
                        content = content.replace('\'', '')
                        content = content.replace(';', '')
                        new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
                    if item.startswith('var photocontent ='):
                        photo = item.replace('var photocontent = \'', '')
                        photo = photo.replace('\'', '')
                        photo = photo.replace(';', '')
                        photo = photo.replace('<tr>', '')
                        photo = photo.replace('<td>', '')
                        photo = photo.replace('</tr>', '')
                        photo = photo.replace('</td>', '<br>')
                        photo = photo.replace('class="photo"', '')
                        new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
                new_html = new_raw_html + '</body></html>'
            else:
                # .txt based file
                splitter = re.compile(r'\n') # Match non-digits
                new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
                next_is_img_txt = False
                title_started = False
                title_break_reached = False
                met_article_start_char = False
                for item in splitter.split(raw_html):
                    item = item.strip()
                    # if title already reached but break between title and content not yet found, record title_break_reached
                    if title_started == True and title_break_reached == False and item == '':
                        title_break_reached = True
                    # if title reached and title_break_reached and met_article_start_char == False and item is not empty
                    # start content
                    elif title_started == True and title_break_reached == True and met_article_start_char == False:
                        if item <> '':
                            met_article_start_char = True
                            new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
                    #if item.startswith(u'\u3010'):
                    #    met_article_start_char = True
                    #    new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
                    else:
                        if next_is_img_txt == False:
                            if item.startswith("=@"):
                                print 'skip movie link'
                            elif item.startswith("=?"):
                                next_is_img_txt = True
                                new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
                            elif item.startswith('=='):
                                next_is_img_txt = True
                                if False:
                                    # TODO: check existence of .gif first
                                    newimg = '_' + item[2:].strip() + '.jpg'
                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
                                else:
                                    new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
                            elif item.startswith('='):
                                next_is_img_txt = True
                                if False:
                                    # TODO: check existence of .gif first
                                    newimg = '_' + item[1:].strip() + '.jpg'
                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
                                else:
                                    new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
                            else:
                                if next_is_img_txt == False and met_article_start_char == False:
                                    if item <> '':
                                        if title_started == False:
                                            #print 'Title started at ', item
                                            new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
                                            title_started = True
                                        else:
                                            new_raw_html = new_raw_html + item + '\n'
                                else:
                                    new_raw_html = new_raw_html + item + '<p>\n'
                        else:
                            next_is_img_txt = False
                            new_raw_html = new_raw_html + item + '\n'
                new_html = new_raw_html + '</div></body></html>'
        #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
        if __HiResImg__ == True:
            # TODO: add a _ in front of an image url
            if url.rfind('news.mingpao.com') > -1:
                imglist =  re.findall('src="?.*?jpg"', new_html)
                br = mechanize.Browser()
                br.set_handle_redirect(False)
                for img in imglist:
                    gifimg = img.replace('jpg"', 'gif"')
                    try:
                        br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
                        new_html = new_html.replace(img, gifimg)
                    except:
                        # find the location of the first _
                        pos = img.find('_')
                        if pos > -1:
                            # if found, insert _ after the first _
                            newimg = img[0:pos] + '_' + img[pos:]
                            new_html = new_html.replace(img, newimg)
                        else:
                            # if not found, insert _ after "
                            new_html = new_html.replace(img[1:], '"_' + img[1:])
            elif url.rfind('life.mingpao.com') > -1:
                imglist = re.findall('src=\'?.*?jpg\'', new_html)
                br = mechanize.Browser()
                br.set_handle_redirect(False)
                #print 'Img list: ', imglist, '\n'
                for img in imglist:
                    #print 'Found img: ', img
                    gifimg = img.replace('jpg\'', 'gif\'')
                    try:
                        gifurl = re.sub(r'dailynews.*txt', '', url)
                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
                        new_html = new_html.replace(img, gifimg)
                    except:
                        pos = img.rfind('/')
                        newimg = img[0:pos+1] + '_' + img[pos+1:]
                        new_html = new_html.replace(img, newimg)
                # repeat with src quoted by double quotes, for text parsed from src txt
                imglist = re.findall('src="?.*?jpg"', new_html)
                for img in imglist:
                    #print 'Found img: ', img
                    gifimg = img.replace('jpg"', 'gif"')
                    try:
                        #print 'url', url
                        pos = url.rfind('/')
                        gifurl = url[:pos+1]
                        #print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
                        new_html = new_html.replace(img, gifimg)
                    except:
                        pos = img.find('"')
                        newimg = img[0:pos+1] + '_' + img[pos+1:]
                        #print 'Use hi-res img', newimg
                        new_html = new_html.replace(img, newimg)
        return new_html
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
@ -447,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
            del item['absmiddle']
        return soup
    def populate_article_metadata(self, article, soup, first):
        # thumbnails shouldn't be available if using hi-res images
        if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
            img = soup.find('img')
            if img is not None:
                self.add_toc_thumbnail(article, img['src'])
        try:
            if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
                # look for content
                articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
                if not articlebodies:
                    articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
                if not articlebodies:
                    articlebodies = soup.findAll('div',attrs={'class':'content'})
                if not articlebodies:
                    articlebodies = soup.findAll('div', attrs={'id':'font'})
                if articlebodies:
                    for articlebody in articlebodies:
                        if articlebody:
                            # the text may or may not be enclosed in <p></p> tag
                            paras = articlebody.findAll('p')
                            if not paras:
                            	paras = articlebody
                            textFound = False
                            for p in paras:
                                if not textFound:
                                    summary_candidate = self.tag_to_string(p).strip()
                                    summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
                                    if len(summary_candidate) > 0:
                                        article.summary = article.text_summary = summary_candidate
                                        textFound = True
            else:
                # display a simple text
                #article.summary = article.text_summary = u'\u66f4\u591a......'
                # display word counts
                counts = 0
                articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
                if not articlebodies:
                    articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
                if not articlebodies:
                    articlebodies = soup.findAll('div',attrs={'class':'content'})
                if not articlebodies:
                    articlebodies = soup.findAll('div', attrs={'id':'font'})
                if articlebodies:
                    for articlebody in articlebodies:
                        # the text may or may not be enclosed in <p></p> tag
                        paras = articlebody.findAll('p')
                        if not paras:
                            paras = articlebody
                        for p in paras:
                            summary_candidate = self.tag_to_string(p).strip()
                            counts += len(summary_candidate)
                    article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
        except:
            self.log("Error creating article descriptions")
            return
    # override from the one in version 0.8.31
    def create_opf(self, feeds, dir=None):
        if dir is None:
            dir = self.output_dir
-        if __UseChineseTitle__ == True:
+        title = self.short_title()
-            if __Region__ == 'Hong Kong':
+        # change 1: allow our own flag to tell if a periodical is to be generated
-                title = u'\u660e\u5831 (\u9999\u6e2f)'
+        # also use customed date instead of current time
-            elif __Region__ == 'Vancouver':
+        if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
                title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
            elif __Region__ == 'Toronto':
                title = u'\u660e\u5831 (\u591a\u502b\u591a)'
        else:
            title = self.short_title()
        # if not generating a periodical, force date to apply in title
        if __MakePeriodical__ == False:
            title = title + ' ' + self.get_fetchformatteddate()
-        if True:
+        # end of change 1
-            mi = MetaInformation(title, [self.publisher])
+        # change 2: __appname__ replaced by newspaper publisher
-            mi.publisher = self.publisher
+        __appname__ = self.publisher
-            mi.author_sort = self.publisher
+        mi = MetaInformation(title, [__appname__])
-            if __MakePeriodical__ == True:
+        mi.publisher = __appname__
-                mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        mi.author_sort = __appname__
-            else:
+        # change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
-                mi.publication_type = self.publication_type+':'+self.short_title()
+        if __MakePeriodical__ == True:
-            #mi.timestamp = nowf()
+            mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
-            mi.timestamp = self.get_dtlocal()
+        else:
-            mi.comments = self.description
+            mi.publication_type = self.publication_type+':'+self.short_title()
-            if not isinstance(mi.comments, unicode):
+        #mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
-                mi.comments = mi.comments.decode('utf-8', 'replace')
+        # change 4: in the following, all the nowf() are changed to adjusted time
-            #mi.pubdate = nowf()
+        # This one doesn't matter
-            mi.pubdate = self.get_dtlocal()
+        mi.timestamp = nowf()
-            opf_path = os.path.join(dir, 'index.opf')
+        # change 5: skip listing the articles
-            ncx_path = os.path.join(dir, 'index.ncx')
+        #article_titles, aseen = [], set()
-            opf = OPFCreator(dir, mi)
+        #for f in feeds:
-            # Add mastheadImage entry to <guide> section
+        #    for a in f:
-            mp = getattr(self, 'masthead_path', None)
+        #        if a.title and a.title not in aseen:
-            if mp is not None and os.access(mp, os.R_OK):
+        #            aseen.add(a.title)
-                from calibre.ebooks.metadata.opf2 import Guide
+        #            article_titles.append(force_unicode(a.title, 'utf-8'))
                ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
                ref.type = 'masthead'
                ref.title = 'Masthead Image'
                opf.guide.append(ref)
-            manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+        #mi.comments = self.description
-            manifest.append(os.path.join(dir, 'index.html'))
+        #if not isinstance(mi.comments, unicode):
-            manifest.append(os.path.join(dir, 'index.ncx'))
+        #    mi.comments = mi.comments.decode('utf-8', 'replace')
        #mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
        #        '\n\n'.join(article_titles))
-            # Get cover
+        language = canonicalize_lang(self.language)
-            cpath = getattr(self, 'cover_path', None)
+        if language is not None:
-            if cpath is None:
+            mi.language = language
-                pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+        # This one affects the pub date shown in kindle title
-                if self.default_cover(pf):
+        #mi.pubdate = nowf()
-                    cpath =  pf.name
+        # now appears to need the time field to be > 12.00noon as well
-            if cpath is not None and os.access(cpath, os.R_OK):
+        mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
-                opf.cover = cpath
+        opf_path = os.path.join(dir, 'index.opf')
-                manifest.append(cpath)
+        ncx_path = os.path.join(dir, 'index.ncx')
-            # Get masthead
+        opf = OPFCreator(dir, mi)
-            mpath = getattr(self, 'masthead_path', None)
+        # Add mastheadImage entry to <guide> section
-            if mpath is not None and os.access(mpath, os.R_OK):
+        mp = getattr(self, 'masthead_path', None)
-                manifest.append(mpath)
+        if mp is not None and os.access(mp, os.R_OK):
            from calibre.ebooks.metadata.opf2 import Guide
            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
            ref.type = 'masthead'
            ref.title = 'Masthead Image'
            opf.guide.append(ref)
        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
        manifest.append(os.path.join(dir, 'index.html'))
        manifest.append(os.path.join(dir, 'index.ncx'))
        # Get cover
        cpath = getattr(self, 'cover_path', None)
        if cpath is None:
            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
            if self.default_cover(pf):
                cpath =  pf.name
        if cpath is not None and os.access(cpath, os.R_OK):
            opf.cover = cpath
            manifest.append(cpath)
        # Get masthead
        mpath = getattr(self, 'masthead_path', None)
        if mpath is not None and os.access(mpath, os.R_OK):
            manifest.append(mpath)
        opf.create_manifest_from_files_in(manifest)
        for mani in opf.manifest:
            if mani.path.endswith('.ncx'):
                mani.id = 'ncx'
            if mani.path.endswith('mastheadImage.jpg'):
                mani.id = 'masthead-image'
        entries = ['index.html']
        toc = TOC(base_path=dir)
        self.play_order_counter = 0
        self.play_order_map = {}
            opf.create_manifest_from_files_in(manifest)
            for mani in opf.manifest:
                if mani.path.endswith('.ncx'):
                    mani.id = 'ncx'
                if mani.path.endswith('mastheadImage.jpg'):
                    mani.id = 'masthead-image'
            entries = ['index.html']
            toc = TOC(base_path=dir)
            self.play_order_counter = 0
            self.play_order_map = {}
        def feed_index(num, parent):
            f = feeds[num]
@ -532,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
                        desc = None
                    else:
                        desc = self.description_limiter(desc)
                    tt = a.toc_thumbnail if a.toc_thumbnail else None
                    entries.append('%sindex.html'%adir)
                    po = self.play_order_map.get(entries[-1], None)
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
-                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+                    parent.add_item('%sindex.html'%adir, None,
-                                    play_order=po, author=auth, description=desc)
+                            a.title if a.title else _('Untitled Article'),
                            play_order=po, author=auth,
                            description=desc, toc_thumbnail=tt)
                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
                        prefix = os.path.commonprefix([opf_path, sp])
@ -555,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
                            templ = self.navbar.generate(True, num, j, len(f),
                                            not self.has_single_feed,
-                                            a.orig_url, self.publisher, prefix=prefix,
+                                            a.orig_url, __appname__, prefix=prefix,
                                            center=self.center_navbar)
                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
                            body.insert(len(body.contents), elem)
@ -578,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
                if not desc:
                    desc = None
                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
-                           f.title, play_order=po, description=desc, author=auth))
+                    f.title, play_order=po, description=desc, author=auth))
        else:
            entries.append('feed_%d/index.html'%0)
@ -592,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)
--- a/recipes/ming_pao_vancouver.recipe
+++ b/recipes/ming_pao_vancouver.recipe
@ -4,18 +4,41 @@ __copyright__ = '2010-2011, Eddie Lau'
 # Region - Hong Kong, Vancouver, Toronto
 __Region__ = 'Vancouver'
 # Users of Kindle 3 with limited system-level CJK support
-# please replace the following "True" with "False".
+# please replace the following "True" with "False". (Default: True)
 __MakePeriodical__ = True
-# Turn below to true if your device supports display of CJK titles
+# Turn below to True if your device supports display of CJK titles (Default: False)
 __UseChineseTitle__ = False
-# Set it to False if you want to skip images
+# Set it to False if you want to skip images (Default: True)
 __KeepImages__ = True
-# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
+# Set it to True if you want to include a summary in Kindle's article view (Default: False)
 __IncludeSummary__ = False
 # Set it to True if you want thumbnail images in Kindle's article view (Default: True)
 __IncludeThumbnails__ = True
 # (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
 __UseLife__ = True
 # (HK only) It is to disable premium content (Default: False)
 __InclPremium__ = False
 # (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
 __ParsePFF__ = True
 # (HK only) Turn below to True if you wish hi-res images (Default: False)
 __HiResImg__ = False
 # Override the date returned by the program if specifying a YYYYMMDD below
 __Date__ = ''
 '''
 Change Log:
 2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
            from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
            download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
 2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
 2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
 2011/10/19: fix a bug in txt source parsing
 2011/10/17: disable fetching of premium content, also improved txt source parsing
 2011/10/04: option to get hi-res photos for the articles
 2011/09/21: fetching "column" section is made optional.
 2011/09/18: parse "column" section stuff from source text file directly.
 2011/09/07: disable "column" section as it is no longer offered free.
 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
            provide options to remove all images in the file
 2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
@ -37,30 +60,38 @@ Change Log:
 2010/10/31: skip repeated articles in section pages
 '''
-import os, datetime, re
+from calibre.utils.date import now as nowf
 import os, datetime, re, mechanize
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.localization import canonicalize_lang
 # MAIN CLASS
 class MPRecipe(BasicNewsRecipe):
    if __Region__ == 'Hong Kong':
-        title       = 'Ming Pao - Hong Kong'
+        if __UseChineseTitle__ == True:
            title = u'\u660e\u5831 (\u9999\u6e2f)'
        else:
            title   = 'Ming Pao - Hong Kong'
        description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
        category    = 'Chinese, News, Hong Kong'
-        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
+        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
        masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
        keep_only_tags = [dict(name='h1'),
                          dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
                          dict(name='font', attrs={'color':['AA0000']}), # for column articles title
                          dict(attrs={'class':['heading']}),  # for heading from txt
                          dict(attrs={'id':['newscontent']}), # entertainment and column page content
                          dict(attrs={'id':['newscontent01','newscontent02']}),
                          dict(attrs={'class':['content']}),  # for content from txt
                          dict(attrs={'class':['photo']}),
                          dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}),  # content in printed version of life.mingpao.com
-                          dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
+                          dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
                          dict(attrs={'class':['images']})   # for images from txt
                          ]
        if __KeepImages__:
            remove_tags = [dict(name='style'),
@ -90,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
                              lambda match: "</b>")
                             ]
    elif __Region__ == 'Vancouver':
-        title       = 'Ming Pao - Vancouver'
+        if __UseChineseTitle__ == True:
            title   = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
        else:
            title   = 'Ming Pao - Vancouver'
        description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
        category    = 'Chinese, News, Vancouver'
        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -108,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
                              lambda match: ''),
                             ]
    elif __Region__ == 'Toronto':
-        title       = 'Ming Pao - Toronto'
+        if __UseChineseTitle__ == True:
            title   = u'\u660e\u5831 (\u591a\u502b\u591a)'
        else:
            title   = 'Ming Pao - Toronto'
        description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
        category    = 'Chinese, News, Toronto'
        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -139,49 +176,12 @@ class MPRecipe(BasicNewsRecipe):
    conversion_options = {'linearize_tables':True}
    timefmt = ''
    def image_url_processor(cls, baseurl, url):
        # trick: break the url at the first occurance of digit, add an additional
        # '_' at the front
        # not working, may need to move this to preprocess_html() method
 #        minIdx = 10000
 #        i0 = url.find('0')
 #        if i0 >= 0 and i0 < minIdx:
 #           minIdx = i0
 #        i1 = url.find('1')
 #        if i1 >= 0 and i1 < minIdx:
 #           minIdx = i1
 #        i2 = url.find('2')
 #        if i2 >= 0 and i2 < minIdx:
 #           minIdx = i2
 #        i3 = url.find('3')
 #        if i3 >= 0 and i0 < minIdx:
 #           minIdx = i3
 #        i4 = url.find('4')
 #        if i4 >= 0 and i4 < minIdx:
 #           minIdx = i4
 #        i5 = url.find('5')
 #        if i5 >= 0 and i5 < minIdx:
 #           minIdx = i5
 #        i6 = url.find('6')
 #        if i6 >= 0 and i6 < minIdx:
 #           minIdx = i6
 #        i7 = url.find('7')
 #        if i7 >= 0 and i7 < minIdx:
 #           minIdx = i7
 #        i8 = url.find('8')
 #        if i8 >= 0 and i8 < minIdx:
 #           minIdx = i8
 #        i9 = url.find('9')
 #        if i9 >= 0 and i9 < minIdx:
 #           minIdx = i9
        return url
    def get_dtlocal(self):
        dt_utc = datetime.datetime.utcnow()
        if __Region__ == 'Hong Kong':
-            # convert UTC to local hk time - at HKT 5.30am, all news are available
+            # convert UTC to local hk time - at HKT 4.30am, all news are available
-            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
+            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
-            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
+            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
        elif __Region__ == 'Vancouver':
            # convert UTC to local Vancouver time - at PST time 5.30am, all news are available
            dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
@ -193,13 +193,34 @@ class MPRecipe(BasicNewsRecipe):
        return dt_local
    def get_fetchdate(self):
-        return self.get_dtlocal().strftime("%Y%m%d")
+        if __Date__ <> '':
            return __Date__
        else:
            return self.get_dtlocal().strftime("%Y%m%d")
    def get_fetchformatteddate(self):
-        return self.get_dtlocal().strftime("%Y-%m-%d")
+        if __Date__ <> '':
            return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
        else:
            return self.get_dtlocal().strftime("%Y-%m-%d")
    def get_fetchyear(self):
        if __Date__ <> '':
            return __Date__[0:4]
        else:
            return self.get_dtlocal().strftime("%Y")
    def get_fetchmonth(self):
        if __Date__ <> '':
            return __Date__[4:6]
        else:
            return self.get_dtlocal().strftime("%m")
    def get_fetchday(self):
-        return self.get_dtlocal().strftime("%d")
+        if __Date__ <> '':
            return __Date__[6:8]
        else:
            return self.get_dtlocal().strftime("%d")
    def get_cover_url(self):
        if __Region__ == 'Hong Kong':
@ -230,12 +251,23 @@ class MPRecipe(BasicNewsRecipe):
                                           (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
                                           (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
                                           (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
-                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
+                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
-                                           (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
+                                          ]:
-                    articles = self.parse_section2(url, keystr)
+                    if __InclPremium__ == True:
                        articles = self.parse_section2_txt(url, keystr)
                    else:
                        articles = self.parse_section2(url, keystr)
                    if articles:
                        feeds.append((title, articles))
                if __InclPremium__ == True:
                    # parse column section articles directly from .txt files
                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
                                              ]:
                        articles = self.parse_section2_txt(url, keystr)
                        if articles:
                            feeds.append((title, articles))
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
                    articles = self.parse_section(url)
@ -244,15 +276,16 @@ class MPRecipe(BasicNewsRecipe):
            else:
                for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
                                   (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
-                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
+                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
                                   (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
                    articles = self.parse_section(url)
                    if articles:
                        feeds.append((title, articles))
                # special- editorial
-                ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
+                #ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
-                if ed_articles:
+                #if ed_articles:
-                    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
+                #    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
                for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
                                   (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
@ -263,20 +296,39 @@ class MPRecipe(BasicNewsRecipe):
                # special - finance
                #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
-                fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
+                #fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
-                if fin_articles:
+                #if fin_articles:
-                    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+                #    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
-                for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+                for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
-                                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
+                    articles = self.parse_section2_txt(url, keystr)
                    articles = self.parse_section(url)
                    if articles:
                        feeds.append((title, articles))
                #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
                #                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
                #    articles = self.parse_section(url)
                #    if articles:
                #        feeds.append((title, articles))
                # special - entertainment
-                ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+                #ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
-                if ent_articles:
+                #if ent_articles:
-                    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+                #    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
                for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
                                          ]:
                    articles = self.parse_section2_txt(url, keystr)
                    if articles:
                        feeds.append((title, articles))
                if __InclPremium__ == True:
                    # parse column section articles directly from .txt files
                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
                                              ]:
                        articles = self.parse_section2_txt(url, keystr)
                        if articles:
                            feeds.append((title, articles))
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -284,11 +336,6 @@ class MPRecipe(BasicNewsRecipe):
                    if articles:
                        feeds.append((title, articles))
                # special- columns
                col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
                if col_articles:
                    feeds.append((u'\u5c08\u6b04 Columns', col_articles))
        elif __Region__ == 'Vancouver':
            for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
                               (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
@ -332,6 +379,16 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(a)
            url = a.get('href', False)
            url = 'http://news.mingpao.com/' + dateStr + '/' +url
            # replace the url to the print-friendly version
            if __ParsePFF__ == True:
                if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
                    url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
                    url = re.sub('%2F.*%2F', '/', url)
                    title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
                    url = url.replace('%2Etxt', '_print.htm')
                    url = url.replace('%5F', '_')
                else:
                    url = url.replace('.htm', '_print.htm')
            if url not in included_urls and url.rfind('Redirect') == -1:
                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
                included_urls.append(url)
@ -340,6 +397,8 @@ class MPRecipe(BasicNewsRecipe):
    # parse from life.mingpao.com
    def parse_section2(self, url, keystr):
        br = mechanize.Browser()
        br.set_handle_redirect(False)
        self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href=True)
@ -350,7 +409,29 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(i)
            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
-                url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
+                try:
                    br.open_novisit(url)
                    url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
                    current_articles.append({'title': title, 'url': url, 'description': ''})
                    included_urls.append(url)
                except:
 				    print 'skipping a premium article'
        current_articles.reverse()
        return current_articles
    # parse from text file of life.mingpao.com
    def parse_section2_txt(self, url, keystr):
        self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href=True)
        a.reverse()
        current_articles = []
        included_urls = []
        for i in a:
            title = self.tag_to_string(i)
            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
                url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/')  # use printed version of the article
                current_articles.append({'title': title, 'url': url, 'description': ''})
                included_urls.append(url)
        current_articles.reverse()
@ -438,6 +519,162 @@ class MPRecipe(BasicNewsRecipe):
        current_articles.reverse()
        return current_articles
    # preprocess those .txt and javascript based files
    def preprocess_raw_html(self, raw_html, url):
        new_html = raw_html
        if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
            if url.rfind('_print.htm') <> -1:
                # javascript based file
                splitter = re.compile(r'\n')
                new_raw_html = '<html><head><title>Untitled</title></head>'
                new_raw_html = new_raw_html + '<body>'
                for item in splitter.split(raw_html):
                    if item.startswith('var heading1 ='):
                        heading = item.replace('var heading1 = \'', '')
                        heading = heading.replace('\'', '')
                        heading = heading.replace(';', '')
                        new_raw_html = new_raw_html + '<div class="heading">' + heading
                    if item.startswith('var heading2 ='):
                        heading = item.replace('var heading2 = \'', '')
                        heading = heading.replace('\'', '')
                        heading = heading.replace(';', '')
                        if heading <> '':
                            new_raw_html = new_raw_html + '<br>' + heading + '</div>'
                        else:
                            new_raw_html = new_raw_html + '</div>'
                    if item.startswith('var content ='):
                        content = item.replace("var content = ", '')
                        content = content.replace('\'', '')
                        content = content.replace(';', '')
                        new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
                    if item.startswith('var photocontent ='):
                        photo = item.replace('var photocontent = \'', '')
                        photo = photo.replace('\'', '')
                        photo = photo.replace(';', '')
                        photo = photo.replace('<tr>', '')
                        photo = photo.replace('<td>', '')
                        photo = photo.replace('</tr>', '')
                        photo = photo.replace('</td>', '<br>')
                        photo = photo.replace('class="photo"', '')
                        new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
                new_html = new_raw_html + '</body></html>'
            else:
                # .txt based file
                splitter = re.compile(r'\n') # Match non-digits
                new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
                next_is_img_txt = False
                title_started = False
                title_break_reached = False
                met_article_start_char = False
                for item in splitter.split(raw_html):
                    item = item.strip()
                    # if title already reached but break between title and content not yet found, record title_break_reached
                    if title_started == True and title_break_reached == False and item == '':
                        title_break_reached = True
                    # if title reached and title_break_reached and met_article_start_char == False and item is not empty
                    # start content
                    elif title_started == True and title_break_reached == True and met_article_start_char == False:
                        if item <> '':
                            met_article_start_char = True
                            new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
                    #if item.startswith(u'\u3010'):
                    #    met_article_start_char = True
                    #    new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
                    else:
                        if next_is_img_txt == False:
                            if item.startswith("=@"):
                                print 'skip movie link'
                            elif item.startswith("=?"):
                                next_is_img_txt = True
                                new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
                            elif item.startswith('=='):
                                next_is_img_txt = True
                                if False:
                                    # TODO: check existence of .gif first
                                    newimg = '_' + item[2:].strip() + '.jpg'
                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
                                else:
                                    new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
                            elif item.startswith('='):
                                next_is_img_txt = True
                                if False:
                                    # TODO: check existence of .gif first
                                    newimg = '_' + item[1:].strip() + '.jpg'
                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
                                else:
                                    new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
                            else:
                                if next_is_img_txt == False and met_article_start_char == False:
                                    if item <> '':
                                        if title_started == False:
                                            #print 'Title started at ', item
                                            new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
                                            title_started = True
                                        else:
                                            new_raw_html = new_raw_html + item + '\n'
                                else:
                                    new_raw_html = new_raw_html + item + '<p>\n'
                        else:
                            next_is_img_txt = False
                            new_raw_html = new_raw_html + item + '\n'
                new_html = new_raw_html + '</div></body></html>'
        #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
        if __HiResImg__ == True:
            # TODO: add a _ in front of an image url
            if url.rfind('news.mingpao.com') > -1:
                imglist =  re.findall('src="?.*?jpg"', new_html)
                br = mechanize.Browser()
                br.set_handle_redirect(False)
                for img in imglist:
                    gifimg = img.replace('jpg"', 'gif"')
                    try:
                        br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
                        new_html = new_html.replace(img, gifimg)
                    except:
                        # find the location of the first _
                        pos = img.find('_')
                        if pos > -1:
                            # if found, insert _ after the first _
                            newimg = img[0:pos] + '_' + img[pos:]
                            new_html = new_html.replace(img, newimg)
                        else:
                            # if not found, insert _ after "
                            new_html = new_html.replace(img[1:], '"_' + img[1:])
            elif url.rfind('life.mingpao.com') > -1:
                imglist = re.findall('src=\'?.*?jpg\'', new_html)
                br = mechanize.Browser()
                br.set_handle_redirect(False)
                #print 'Img list: ', imglist, '\n'
                for img in imglist:
                    #print 'Found img: ', img
                    gifimg = img.replace('jpg\'', 'gif\'')
                    try:
                        gifurl = re.sub(r'dailynews.*txt', '', url)
                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
                        new_html = new_html.replace(img, gifimg)
                    except:
                        pos = img.rfind('/')
                        newimg = img[0:pos+1] + '_' + img[pos+1:]
                        new_html = new_html.replace(img, newimg)
                # repeat with src quoted by double quotes, for text parsed from src txt
                imglist = re.findall('src="?.*?jpg"', new_html)
                for img in imglist:
                    #print 'Found img: ', img
                    gifimg = img.replace('jpg"', 'gif"')
                    try:
                        #print 'url', url
                        pos = url.rfind('/')
                        gifurl = url[:pos+1]
                        #print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
                        new_html = new_html.replace(img, gifimg)
                    except:
                        pos = img.find('"')
                        newimg = img[0:pos+1] + '_' + img[pos+1:]
                        #print 'Use hi-res img', newimg
                        new_html = new_html.replace(img, newimg)
        return new_html
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
@ -447,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
            del item['absmiddle']
        return soup
    def populate_article_metadata(self, article, soup, first):
        # thumbnails shouldn't be available if using hi-res images
        if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
            img = soup.find('img')
            if img is not None:
                self.add_toc_thumbnail(article, img['src'])
        try:
            if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
                # look for content
                articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
                if not articlebodies:
                    articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
                if not articlebodies:
                    articlebodies = soup.findAll('div',attrs={'class':'content'})
                if not articlebodies:
                    articlebodies = soup.findAll('div', attrs={'id':'font'})
                if articlebodies:
                    for articlebody in articlebodies:
                        if articlebody:
                            # the text may or may not be enclosed in <p></p> tag
                            paras = articlebody.findAll('p')
                            if not paras:
                            	paras = articlebody
                            textFound = False
                            for p in paras:
                                if not textFound:
                                    summary_candidate = self.tag_to_string(p).strip()
                                    summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
                                    if len(summary_candidate) > 0:
                                        article.summary = article.text_summary = summary_candidate
                                        textFound = True
            else:
                # display a simple text
                #article.summary = article.text_summary = u'\u66f4\u591a......'
                # display word counts
                counts = 0
                articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
                if not articlebodies:
                    articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
                if not articlebodies:
                    articlebodies = soup.findAll('div',attrs={'class':'content'})
                if not articlebodies:
                    articlebodies = soup.findAll('div', attrs={'id':'font'})
                if articlebodies:
                    for articlebody in articlebodies:
                        # the text may or may not be enclosed in <p></p> tag
                        paras = articlebody.findAll('p')
                        if not paras:
                            paras = articlebody
                        for p in paras:
                            summary_candidate = self.tag_to_string(p).strip()
                            counts += len(summary_candidate)
                    article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
        except:
            self.log("Error creating article descriptions")
            return
    # override from the one in version 0.8.31
    def create_opf(self, feeds, dir=None):
        if dir is None:
            dir = self.output_dir
-        if __UseChineseTitle__ == True:
+        title = self.short_title()
-            if __Region__ == 'Hong Kong':
+        # change 1: allow our own flag to tell if a periodical is to be generated
-                title = u'\u660e\u5831 (\u9999\u6e2f)'
+        # also use customed date instead of current time
-            elif __Region__ == 'Vancouver':
+        if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
                title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
            elif __Region__ == 'Toronto':
                title = u'\u660e\u5831 (\u591a\u502b\u591a)'
        else:
            title = self.short_title()
        # if not generating a periodical, force date to apply in title
        if __MakePeriodical__ == False:
            title = title + ' ' + self.get_fetchformatteddate()
-        if True:
+        # end of change 1
-            mi = MetaInformation(title, [self.publisher])
+        # change 2: __appname__ replaced by newspaper publisher
-            mi.publisher = self.publisher
+        __appname__ = self.publisher
-            mi.author_sort = self.publisher
+        mi = MetaInformation(title, [__appname__])
-            if __MakePeriodical__ == True:
+        mi.publisher = __appname__
-                mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        mi.author_sort = __appname__
-            else:
+        # change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
-                mi.publication_type = self.publication_type+':'+self.short_title()
+        if __MakePeriodical__ == True:
-            #mi.timestamp = nowf()
+            mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
-            mi.timestamp = self.get_dtlocal()
+        else:
-            mi.comments = self.description
+            mi.publication_type = self.publication_type+':'+self.short_title()
-            if not isinstance(mi.comments, unicode):
+        #mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
-                mi.comments = mi.comments.decode('utf-8', 'replace')
+        # change 4: in the following, all the nowf() are changed to adjusted time
-            #mi.pubdate = nowf()
+        # This one doesn't matter
-            mi.pubdate = self.get_dtlocal()
+        mi.timestamp = nowf()
-            opf_path = os.path.join(dir, 'index.opf')
+        # change 5: skip listing the articles
-            ncx_path = os.path.join(dir, 'index.ncx')
+        #article_titles, aseen = [], set()
-            opf = OPFCreator(dir, mi)
+        #for f in feeds:
-            # Add mastheadImage entry to <guide> section
+        #    for a in f:
-            mp = getattr(self, 'masthead_path', None)
+        #        if a.title and a.title not in aseen:
-            if mp is not None and os.access(mp, os.R_OK):
+        #            aseen.add(a.title)
-                from calibre.ebooks.metadata.opf2 import Guide
+        #            article_titles.append(force_unicode(a.title, 'utf-8'))
                ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
                ref.type = 'masthead'
                ref.title = 'Masthead Image'
                opf.guide.append(ref)
-            manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+        #mi.comments = self.description
-            manifest.append(os.path.join(dir, 'index.html'))
+        #if not isinstance(mi.comments, unicode):
-            manifest.append(os.path.join(dir, 'index.ncx'))
+        #    mi.comments = mi.comments.decode('utf-8', 'replace')
        #mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
        #        '\n\n'.join(article_titles))
-            # Get cover
+        language = canonicalize_lang(self.language)
-            cpath = getattr(self, 'cover_path', None)
+        if language is not None:
-            if cpath is None:
+            mi.language = language
-                pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+        # This one affects the pub date shown in kindle title
-                if self.default_cover(pf):
+        #mi.pubdate = nowf()
-                    cpath =  pf.name
+        # now appears to need the time field to be > 12.00noon as well
-            if cpath is not None and os.access(cpath, os.R_OK):
+        mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
-                opf.cover = cpath
+        opf_path = os.path.join(dir, 'index.opf')
-                manifest.append(cpath)
+        ncx_path = os.path.join(dir, 'index.ncx')
-            # Get masthead
+        opf = OPFCreator(dir, mi)
-            mpath = getattr(self, 'masthead_path', None)
+        # Add mastheadImage entry to <guide> section
-            if mpath is not None and os.access(mpath, os.R_OK):
+        mp = getattr(self, 'masthead_path', None)
-                manifest.append(mpath)
+        if mp is not None and os.access(mp, os.R_OK):
            from calibre.ebooks.metadata.opf2 import Guide
            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
            ref.type = 'masthead'
            ref.title = 'Masthead Image'
            opf.guide.append(ref)
        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
        manifest.append(os.path.join(dir, 'index.html'))
        manifest.append(os.path.join(dir, 'index.ncx'))
        # Get cover
        cpath = getattr(self, 'cover_path', None)
        if cpath is None:
            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
            if self.default_cover(pf):
                cpath =  pf.name
        if cpath is not None and os.access(cpath, os.R_OK):
            opf.cover = cpath
            manifest.append(cpath)
        # Get masthead
        mpath = getattr(self, 'masthead_path', None)
        if mpath is not None and os.access(mpath, os.R_OK):
            manifest.append(mpath)
        opf.create_manifest_from_files_in(manifest)
        for mani in opf.manifest:
            if mani.path.endswith('.ncx'):
                mani.id = 'ncx'
            if mani.path.endswith('mastheadImage.jpg'):
                mani.id = 'masthead-image'
        entries = ['index.html']
        toc = TOC(base_path=dir)
        self.play_order_counter = 0
        self.play_order_map = {}
            opf.create_manifest_from_files_in(manifest)
            for mani in opf.manifest:
                if mani.path.endswith('.ncx'):
                    mani.id = 'ncx'
                if mani.path.endswith('mastheadImage.jpg'):
                    mani.id = 'masthead-image'
            entries = ['index.html']
            toc = TOC(base_path=dir)
            self.play_order_counter = 0
            self.play_order_map = {}
        def feed_index(num, parent):
            f = feeds[num]
@ -532,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
                        desc = None
                    else:
                        desc = self.description_limiter(desc)
                    tt = a.toc_thumbnail if a.toc_thumbnail else None
                    entries.append('%sindex.html'%adir)
                    po = self.play_order_map.get(entries[-1], None)
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
-                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+                    parent.add_item('%sindex.html'%adir, None,
-                                    play_order=po, author=auth, description=desc)
+                            a.title if a.title else _('Untitled Article'),
                            play_order=po, author=auth,
                            description=desc, toc_thumbnail=tt)
                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
                        prefix = os.path.commonprefix([opf_path, sp])
@ -555,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
                            templ = self.navbar.generate(True, num, j, len(f),
                                            not self.has_single_feed,
-                                            a.orig_url, self.publisher, prefix=prefix,
+                                            a.orig_url, __appname__, prefix=prefix,
                                            center=self.center_navbar)
                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
                            body.insert(len(body.contents), elem)
@ -578,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
                if not desc:
                    desc = None
                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
-                           f.title, play_order=po, description=desc, author=auth))
+                    f.title, play_order=po, description=desc, author=auth))
        else:
            entries.append('feed_%d/index.html'%0)
@ -592,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)
--- a/recipes/mlody_technik_pl.recipe
+++ b/recipes/mlody_technik_pl.recipe
@ -0,0 +1,15 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class Mlody_technik(BasicNewsRecipe):
    title          = u'Mlody technik'
    __author__        = 'fenuks'
    description   = u'Młody technik'
    category       = 'science'
    language       = 'pl'
    cover_url='http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg'
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 100
    #keep_only_tags=[dict(id='container')]
    feeds          = [(u'Artyku\u0142y', u'http://www.mt.com.pl/feed')]
--- a/recipes/moneynews.recipe
+++ b/recipes/moneynews.recipe
@ -1,9 +1,7 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
-moneynews.newsmax.com
+www.moneynews.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
@ -12,40 +10,40 @@ class MoneyNews(BasicNewsRecipe):
    title                 = 'Moneynews.com'
    __author__            = 'Darko Miletic'
    description           = 'Financial news worldwide'
-    publisher             = 'moneynews.com'
+    publisher             = 'Newsmax.com'
-    language = 'en'
+    language              = 'en'
    category              = 'news, finances, USA, business'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    encoding              = 'cp1252'
+    encoding              = 'utf8'
    extra_css             = 'img{display: block} body{font-family: Arial, Helvetica, sans-serif}'
-    html2lrf_options = [
+    conversion_options = {
-                          '--comment', description
+                          'comment'   : description
-                        , '--category', category
+                        , 'tags'      : category
-                        , '--publisher', publisher
+                        , 'publisher' : publisher
-                        , '--ignore-tables'
+                        , 'language'  : language
-                        ]
+                        , 'linearize_tables' : True
-
+                        }
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
    feeds = [
-              (u'Street Talk'          , u'http://moneynews.newsmax.com/xml/streettalk.xml'  )
+              (u'Street Talk'          , u'http://www.moneynews.com/rss/StreetTalk/8.xml'  )
-             ,(u'Finance News'         , u'http://moneynews.newsmax.com/xml/FinanceNews.xml' )
+             ,(u'Finance News'         , u'http://www.moneynews.com/rss/FinanceNews/4.xml' )
-             ,(u'Economy'              , u'http://moneynews.newsmax.com/xml/economy.xml'     )
+             ,(u'Economy'              , u'http://www.moneynews.com/rss/Economy/2.xml'     )
-             ,(u'Companies'            , u'http://moneynews.newsmax.com/xml/companies.xml'   )
+             ,(u'Companies'            , u'http://www.moneynews.com/rss/Companies/6.xml'   )
-             ,(u'Markets'              , u'http://moneynews.newsmax.com/xml/Markets.xml'     )
+             ,(u'Markets'              , u'http://www.moneynews.com/rss/Markets/7.xml'     )
-             ,(u'Investing & Analysis' , u'http://moneynews.newsmax.com/xml/investing.xml'   )
+             ,(u'Investing & Analysis' , u'http://www.moneynews.com/rss/InvestingAnalysis/17.xml')
            ]
-
+    keep_only_tags = [dict(name='div', attrs={'class':'copy'})]
    keep_only_tags = [dict(name='table', attrs={'class':'copy'})]
    remove_tags = [
-                     dict(name='td'   , attrs={'id':'article_fontsize'})
+                    dict(attrs={'class':['MsoNormal', 'MsoNoSpacing']}),
-                    ,dict(name='table', attrs={'id':'toolbox'         })
+                    dict(name=['object','link','embed','form','meta'])
                    ,dict(name='tr'   , attrs={'id':'noprint3'        })
                  ]
    def print_version(self, url):
        nodeid = url.rpartition('/')[2]
        return 'http://www.moneynews.com/PrintTemplate?nodeid=' + nodeid
--- a/recipes/naczytniki.recipe
+++ b/recipes/naczytniki.recipe
@ -7,6 +7,7 @@ class naczytniki(BasicNewsRecipe):
    language       = 'pl'
    description ='everything about e-readers'
    category='readers'
    no_stylesheets=True
    oldest_article = 7
    max_articles_per_feed = 100
    remove_tags_after= dict(name='div', attrs={'class':'sociable'})
--- a/recipes/nin.recipe
+++ b/recipes/nin.recipe
@ -6,11 +6,7 @@ www.nin.co.rs
 '''
 import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from contextlib import closing
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre import entity_to_unicode
 class Nin(BasicNewsRecipe):
    title                  = 'NIN online'
@ -80,59 +76,11 @@ class Nin(BasicNewsRecipe):
                   return self.PREFIX + item.img['src']
        return cover_url
-    def parse_index(self):
+    feeds          = [(u'NIN Online', u'http://www.nin.co.rs/misc/rss.php?feed=RSS2.0')]
        articles = []
        count = 0
        soup = self.index_to_soup(self.INDEX)
        for item in soup.findAll('a',attrs={'class':'lmeninavFont'}):
            count = count +1
            if self.test and count > 2:
               return articles
            section  = self.tag_to_string(item)
            feedlink = self.PREFIX + item['href']
            feedpage = self.index_to_soup(feedlink)
            self.report_progress(0, _('Fetching feed')+' %s...'%(section))
            inarts   = []
            for art in feedpage.findAll('span',attrs={'class':'artTitle'}):
                alink = art.parent
                url   = self.PREFIX + alink['href']
                title = self.tag_to_string(art)
                sparent = alink.parent
                alink.extract()
                description = self.tag_to_string(sparent)
                date = strftime(self.timefmt)
                inarts.append({
                                  'title'      :title
                                 ,'date'       :date
                                 ,'url'        :url
                                 ,'description':description
                                })
            articles.append((section,inarts))
        return articles
-    def index_to_soup(self, url_or_raw, raw=False):
+    def get_article_url(self, article):
-        if re.match(r'\w+://', url_or_raw):
+        url = BasicNewsRecipe.get_article_url(self, article)
-            open_func = getattr(self.browser, 'open_novisit', self.browser.open)
+        return url.replace('.co.yu', '.co.rs')
            with closing(open_func(url_or_raw)) as f:
                _raw = f.read()
            if not _raw:
                raise RuntimeError('Could not fetch index from %s'%url_or_raw)
        else:
            _raw = url_or_raw
        if raw:
            return _raw
        if not isinstance(_raw, unicode) and self.encoding:
            if callable(self.encoding):
                _raw = self.encoding(_raw)
            else:
                _raw = _raw.decode(self.encoding, 'replace')
        massage = list(BeautifulSoup.MARKUP_MASSAGE)
        enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding
        massage.append((re.compile(r'&(\S+?);'), lambda match:
            entity_to_unicode(match, encoding=enc)))
        massage.append((re.compile(r'[\x00-\x08]+'), lambda match:
            ''))
        return BeautifulSoup(_raw, markupMassage=massage)
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
--- a/recipes/nol.recipe
+++ b/recipes/nol.recipe
@ -0,0 +1,54 @@
 ################################################################################
 #Description:	  http://nol.hu/ RSS channel
 #Author: 	  Bigpapa (bigpapabig@hotmail.com)
 #Date:	  2011.12.18. - V1.1
 ################################################################################
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class NOL(BasicNewsRecipe):
    title = u'NOL'
    __author__             = 'Bigpapa'
    oldest_article         = 5
    max_articles_per_feed  = 5	# Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
    no_stylesheets         = True
    #delay                  = 1
    use_embedded_content   = False
    encoding               = 'utf8'
    language               = 'hu'
    publication_type       = 'newsportal'
    conversion_options ={
 	'linearize_tables' : True,
 	}
    keep_only_tags    = [
 	dict(name='table', attrs={'class':['article-box']})
 	]
    remove_tags = [
 	dict(name='div', attrs={'class':['h','ad-container-outer','tags noborder','ad-container-inner','image-container-lead','tags','related-container']}),
 	dict(name='h4'),
 	dict(name='tfoot'),
 	dict(name='td', attrs={'class':['foot']}),
 	dict(name='span', attrs={'class':['image-container-caption']}),
 	]
    feeds          = [
    #	(u'V\xe1logat\xe1s', 'http://nol.hu/feed/valogatas.rss'),
 	(u'Belf\xf6ld', 'http://nol.hu/feed/belfold.rss'),
 	(u'K\xfclf\xf6ld', 'http://nol.hu/feed/kulfold.rss'),
 	(u'Gazdas\xe1g', 'http://nol.hu/feed/gazdasag.rss'),
 	(u'V\xe9lem\xe9ny', 'http://nol.hu/feed/velemeny.rss'),
 	(u'Kult\xfara', 'http://nol.hu/feed/kult.rss'),
 	(u'Tud/Tech', 'http://nol.hu/feed/tud-tech.rss'),
 	(u'Sport', 'http://nol.hu/feed/sport.rss'),
 	(u'Noller', 'http://nol.hu/feed/noller.rss'),
 	(u'Mozaik', 'http://nol.hu/feed/mozaik.rss'),
 	(u'Utaz\xe1s', 'http://nol.hu/feed/utazas.rss'),
 	(u'Aut\xf3', 'http://nol.hu/feed/auto.rss'),
 	(u'Voks', 'http://nol.hu/feed/voks.rss'),
                   	 ]
--- a/recipes/nowa_fantastyka.recipe
+++ b/recipes/nowa_fantastyka.recipe
@ -1,20 +1,21 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class Nowa_Fantastyka(BasicNewsRecipe):
    title          = u'Nowa Fantastyka'
    oldest_article = 7
    __author__        = 'fenuks'
    language       = 'pl'
    encoding='latin2'
    description ='site for fantasy readers'
    category='fantasy'
    max_articles_per_feed = 100
    INDEX='http://www.fantastyka.pl/'
    no_stylesheets=True
    needs_subscription = 'optional'
    remove_tags_before=dict(attrs={'class':'belka1-tlo-md'})
    #remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'})
    remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'})
-    remove_tags=[dict(attrs={'class':'avatar2'})]
+    remove_tags=[dict(attrs={'class':'avatar2'}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'})]
    feeds          = []
    def find_articles(self, url):
        articles = []
@ -45,3 +46,13 @@ class Nowa_Fantastyka(BasicNewsRecipe):
        cover=soup.find(name='img', attrs={'class':'okladka'})
        self.cover_url=self.INDEX+ cover['src']
        return getattr(self, 'cover_url', self.cover_url)
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open('http://www.fantastyka.pl/')
            br.select_form(nr=0)
            br['login']   = self.username
            br['pass'] = self.password
            br.submit()
        return br
--- a/recipes/nytimes.recipe
+++ b/recipes/nytimes.recipe
@ -1,5 +1,5 @@
 #!/usr/bin/env  python
-
+# -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
@ -707,6 +707,16 @@ class NYTimes(BasicNewsRecipe):
 		return soup
    def populate_article_metadata(self, article, soup, first):
        if first and hasattr(self, 'add_toc_thumbnail'):
            idxdiv = soup.find('div',attrs={'class':'articleSpanImage'})
            if idxdiv is not None:
                if idxdiv.img:
                    self.add_toc_thumbnail(article, idxdiv.img['src'])
            else:
                img = soup.find('img')
                if img is not None:
                    self.add_toc_thumbnail(article, img['src'])
        shortparagraph = ""
        try:
            if len(article.text_summary.strip()) == 0:
--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@ -855,6 +855,16 @@ class NYTimes(BasicNewsRecipe):
        return soup
    def populate_article_metadata(self, article, soup, first):
        if first and hasattr(self, 'add_toc_thumbnail'):
            idxdiv = soup.find('div',attrs={'class':'articleSpanImage'})
            if idxdiv is not None:
                if idxdiv.img:
                    self.add_toc_thumbnail(article, idxdiv.img['src'])
            else:
                img = soup.find('img')
                if img is not None:
                    self.add_toc_thumbnail(article, img['src'])
        shortparagraph = ""
        try:
            if len(article.text_summary.strip()) == 0:
--- a/recipes/osnews_pl.recipe
+++ b/recipes/osnews_pl.recipe
@ -23,7 +23,7 @@ class OSNewsRecipe(BasicNewsRecipe):
    oldest_article = 7
    max_articles_per_feed = 100
-
+    cover_url='http://osnews.pl/wp-content/themes/osnews/img/logo.png'
    extra_css = '''
        .news-heading {font-size:150%}
        .newsinformations li {display:inline;}
@ -44,7 +44,9 @@ class OSNewsRecipe(BasicNewsRecipe):
        dict(name = 'div', attrs = {'class' : 'sociable'}),
        dict(name = 'div', attrs = {'class' : 'post_prev'}),
        dict(name = 'div', attrs = {'class' : 'post_next'}),
-        dict(name = 'div', attrs = {'class' : 'clr'})
+        dict(name = 'div', attrs = {'class' : 'clr'}),
        dict(name = 'div', attrs = {'class' : 'tw_button'}),
        dict(name = 'div', attrs = {'style' : 'width:56px;height:60px;float:left;margin-right:10px'})
    ]
    preprocess_regexps = [(re.compile(u'</span>Komentarze: \(?[0-9]+\)? ?<span'), lambda match: '</span><span')]
--- a/recipes/prospectmaguk.recipe
+++ b/recipes/prospectmaguk.recipe
@ -0,0 +1,79 @@
 #!/usr/bin/env  python
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 __license__   = 'GPL v3'
 '''
 calibre recipe for prospectmagazine.co.uk (subscription)
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class ProspectMagUK(BasicNewsRecipe):
 	title                   = u'Prospect Magazine'
 	description				= 'A general-interest publication offering analysis and commentary about politics, news and business.'
 	__author__				= 'barty, duluoz'
 	timefmt					= ' [%d %B %Y]'
 	no_stylesheets			= True
 	publication_type        = 'magazine'
 	masthead_url            = 'http://www.prospectmagazine.co.uk/wp-content/themes/prospect/images/titleMain.jpg'
 	category                = 'news, UK'
 	language                = 'en_GB'
 	max_articles_per_feed   = 100
 	auto_cleanup            = True
 	needs_subscription      = True
 	auto_cleanup_keep = '//div[@class="lead_image"]'
 	remove_tags       = [{'class':['shareinpost','postutils','postinfo']}]
 	INDEX             = 'http://www.prospectmagazine.co.uk/current-issue'
 	def get_browser(self):
 		br = BasicNewsRecipe.get_browser()
 		if self.username is not None and self.password is not None:
 			br.open('http://www.prospectmagazine.co.uk/wp-login.php')
 			br.select_form(name='loginform')
 			br['log'] = self.username
 			br['pwd'] = self.password
 			br.submit()
 		return br
 	def parse_index(self):
 		soup = self.index_to_soup(self.INDEX)
 		#div = soup.find('h1',text=re.compile(r'Issue \d+'))
 		#fname = self.tag_to_string( div) if div is not None else 'Current Issue'
 		div = soup.find('div', id='cover_image')
 		if div is not None:
 			img = div.find('img', src=True)
 			if img is not None:
 				src = img['src']
 				if src.startswith('/'):
 					src = 'http://www.prospectmagazine.co.uk' + src
 				self.cover_url = src
 		feeds = []
 		# loop through sections
 		for sect in soup.findAll('div',attrs={'class':'sectionheading'}):
 			fname = self.tag_to_string( sect).replace('>','').strip()
 			self.log('Found section', fname)
 			articles = []
 			# note: can't just find siblings with class='post' because that will also
 			#       grab all the articles belonging to the sections that follow.
 			for item in sect.findNextSiblings('div',attrs={'class':True}):
 				if not 'post' in item['class']: break
 				a = item.find('a', href=True)
 				if a is None: continue
 				url = a['href']
 				title = self.tag_to_string(a)
 				p = item.find('p')
 				desc = self.tag_to_string( p) if p is not None else ''
 				art = {'title':title, 'description':desc,'date':' ', 'url':url}
 				p = item.find(attrs={'class':re.compile('author')})
 				self.log('\tFound article:', title, '::', url)
 				if p is not None:
 					art['author'] = self.tag_to_string( p).strip()
 				articles.append(art)
 			feeds.append((fname, articles))
 		return feeds
--- a/recipes/radikal_tr.recipe
+++ b/recipes/radikal_tr.recipe
@ -42,6 +42,9 @@ class Radikal_tr(BasicNewsRecipe):
              ,(u'Politika'    , u'http://www.radikal.com.tr/d/rss/Rss_98.xml'     )
              ,(u'Dis Haberler', u'http://www.radikal.com.tr/d/rss/Rss_100.xml'    )
              ,(u'Ekonomi'     , u'http://www.radikal.com.tr/d/rss/Rss_101.xml'    )
              ,(u'Radikal Iki'    , u'http://www.radikal.com.tr/d/rss/Rss_42.xml')
              ,(u'Radikal Hayat'  , u'http://www.radikal.com.tr/d/rss/Rss_41.xml' )
              ,(u'Radikal Kitap'    , u'http://www.radikal.com.tr/d/rss/Rss_40.xml'     )
            ]
    def print_version(self, url):
--- a/recipes/rstones.recipe
+++ b/recipes/rstones.recipe
@ -29,22 +29,7 @@ class RollingStones(BasicNewsRecipe):
    max_articles_per_feed = 25
    use_embedded_content  = False
    no_stylesheets = True
-
+    auto_cleanup = True
    remove_javascript     = True
    #####################################################################################
    # cleanup section                                                                   #
    #####################################################################################
    keep_only_tags       = [
                            dict(name='div', attrs={'class':['c65l']}),
                            dict(name='div', attrs={'id':['col1']}),
                           ]
    remove_tags = [
                    dict(name='div', attrs={'class': ['storyActions upper','storyActions lowerArticleNav']}),
                    dict(name='div', attrs={'id': ['comments','related']}),
                  ]
    feeds          = [
                       (u'News', u'http://www.rollingstone.com/siteServices/rss/allNews'),
@ -58,25 +43,7 @@ class RollingStones(BasicNewsRecipe):
-    def get_article_url(self, article):
+    def print_version(self, url):
-        return article.get('guid',  None)
+        return url +'?print=true'
    def append_page(self, soup, appendtag, position):
        '''
        Some are the articles are multipage so the below function
        will get the articles that have <next>
        '''
        pager = soup.find('li',attrs={'class':'next'})
        if pager:
           nexturl = pager.a['href']
           soup2 = self.index_to_soup(nexturl)
           texttag = soup2.find('div', attrs={'id':'storyTextContainer'})
           for it in texttag.findAll(style=True):
               del it['style']
           newpos = len(texttag.contents)
           self.append_page(soup2,texttag,newpos)
           texttag.extract()
           appendtag.insert(position,texttag)
--- a/recipes/rynek_zdrowia.recipe
+++ b/recipes/rynek_zdrowia.recipe
@ -0,0 +1,21 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class rynekzdrowia(BasicNewsRecipe):
    title          = u'Rynek Zdrowia'
    __author__ = u'spi630'
    language = 'pl'
    masthead_url = 'http://k.rynekzdrowia.pl/images/headerLogo.png'
    cover_url = 'http://k.rynekzdrowia.pl/images/headerLogo.png'
    oldest_article = 3
    max_articles_per_feed = 25
    no_stylesheets = True
    auto_cleanup = True
    remove_empty_feeds=True
    remove_tags_before = dict(name='h3')
    feeds          = [(u'Finanse i Zarz\u0105dzanie', u'http://www.rynekzdrowia.pl/Kanal/finanse.html'), (u'Inwestycje', u'http://www.rynekzdrowia.pl/Kanal/inwestycje.html'), (u'Aparatura i wyposa\u017cenie', u'http://www.rynekzdrowia.pl/Kanal/aparatura.html'), (u'Informatyka', u'http://www.rynekzdrowia.pl/Kanal/informatyka.html'), (u'Prawo', u'http://www.rynekzdrowia.pl/Kanal/prawo.html'), (u'Polityka zdrowotna', u'http://www.rynekzdrowia.pl/Kanal/polityka_zdrowotna.html'), (u'Ubezpieczenia Zdrowotne', u'http://www.rynekzdrowia.pl/Kanal/ubezpieczenia.html'), (u'Farmacja', u'http://www.rynekzdrowia.pl/Kanal/farmacja.html'), (u'Badania i rozw\xf3j', u'http://www.rynekzdrowia.pl/Kanal/badania.html'), (u'Nauka', u'http://www.rynekzdrowia.pl/Kanal/nauka.html'), (u'Po godzinach', u'http://www.rynekzdrowia.pl/Kanal/godziny.html'), (u'Us\u0142ugi medyczne', u'http://www.rynekzdrowia.pl/Kanal/uslugi.html')]
    def print_version(self, url):
        url = url.replace('.html', ',drukuj.html')
        return url
--- a/recipes/salon.recipe
+++ b/recipes/salon.recipe
@ -11,17 +11,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class Salon_com(BasicNewsRecipe):
    title = 'Salon.com'
-    __author__ = 'cix3'
+    __author__ = 'Kovid Goyal'
    description = 'Salon.com - Breaking news, opinion, politics, entertainment, sports and culture.'
    timefmt = ' [%b %d, %Y]'
    language = 'en'
    oldest_article = 7
    max_articles_per_feed = 100
-
+    auto_cleanup = True
-    remove_tags = [dict(name='div', attrs={'class':['ad_content', 'clearfix']}), dict(name='hr'), dict(name='img')]
+    auto_cleanup_keep = '//div[@class="art"]'
-
+    remove_empty_feeds = True
    remove_tags_before = dict(name='h2')
    feeds = [
        ('News & Politics', 'http://feeds.salon.com/salon/news'),
@ -40,5 +39,5 @@ class Salon_com(BasicNewsRecipe):
            ]
    def print_version(self, url):
-        return url.replace('/index.html', '/print.html')
+        return url + '/print/'
--- a/recipes/salonica_press_news.recipe
+++ b/recipes/salonica_press_news.recipe
@ -0,0 +1,17 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class spn(BasicNewsRecipe):
    title          = u'Salonica Press News'
    language = 'gr'
    __author__ = "SteliosGero"
    oldest_article = 3
    max_articles_per_feed = 100
    auto_cleanup = True
    category               = 'news, GR'
    language               = 'el'
    feeds          = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae', u'http://www.spnews.gr/politiki?format=feed&amp;type=rss'), (u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1', u'http://www.spnews.gr/oikonomia?format=feed&amp;type=rss'), (u'\u0391\u03c5\u03c4\u03bf\u03b4\u03b9\u03bf\u03af\u03ba\u03b7\u03c3\u03b7', u'http://www.spnews.gr/aftodioikisi?format=feed&amp;type=rss'), (u'\u039a\u03bf\u03b9\u03bd\u03c9\u03bd\u03af\u03b1', u'http://www.spnews.gr/koinonia?format=feed&amp;type=rss'), (u'\u0391\u03b8\u03bb\u03b7\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/sports?format=feed&amp;type=rss'), (u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae', u'http://www.spnews.gr/diethni?format=feed&amp;type=rss'), (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/politismos?format=feed&amp;type=rss'), (u'Media', u'http://www.spnews.gr/media-news?format=feed&amp;type=rss'), (u'\u0396\u03c9\u03ae', u'http://www.spnews.gr/zoi?format=feed&amp;type=rss'), (u'\u03a4\u03b5\u03c7\u03bd\u03bf\u03bb\u03bf\u03b3\u03af\u03b1', u'http://spnews.gr/texnologia?format=feed&amp;type=rss'), (u'\u03a0\u03b5\u03c1\u03b9\u03b2\u03ac\u03bb\u03bb\u03bf\u03bd', u'http://spnews.gr/periballon?format=feed&amp;type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03c0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parapolitika?format=feed&amp;type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03b4\u03b7\u03bc\u03bf\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/paradimotika?format=feed&amp;type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03b1\u03b8\u03bb\u03b7\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parathlitika?format=feed&amp;type=rss'), (u'\u0391\u03c0\u03cc\u03c8\u03b5\u03b9\u03c2', u'http://spnews.gr/apopseis?format=feed&amp;type=rss'), (u'\u03a3\u03c5\u03bd\u03b5\u03cd\u03be\u03b5\u03b9\u03c2', u'http://spnews.gr/synenteykseis?format=feed&amp;type=rss'), (u'Alert!', u'http://spnews.gr/alert?format=feed&amp;type=rss')]
    def print_version(self, url):
        return url+'?tmpl=component&print=1&layout=default&page='
--- a/Show More
+++ b/Show More