Merge

2025-07-09 03:04:10 -04:00 · 2012-01-25 22:34:50 +01:00 · 2012-01-25 22:34:50 +01:00 · 1028383a93
commit 1028383a93
parent 522787e86c e01e2c9ba6
724 changed files with 304073 additions and 156411 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -2,6 +2,7 @@
 .check-cache.pickle
 src/calibre/plugins
 resources/images.qrc
 src/calibre/ebooks/oeb/display/test/*.js
 src/calibre/manual/.build/
 src/calibre/manual/cli/
 src/calibre/manual/template_ref.rst
@ -15,6 +16,7 @@ resources/ebook-convert-complete.pickle
 resources/builtin_recipes.xml
 resources/builtin_recipes.zip
 resources/template-functions.json
 resources/display/*.js
 setup/installer/windows/calibre/build.log
 src/calibre/translations/.errors
 src/cssutils/.svn/
--- a/Changelog.old.yaml
+++ b/Changelog.old.yaml
--- a/Changelog.yaml
+++ b/Changelog.yaml
--- a/recipes/abc_au.recipe
+++ b/recipes/abc_au.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Dean Cording'
+__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
 '''
 abc.net.au/news
 '''
@ -8,7 +8,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 class ABCNews(BasicNewsRecipe):
    title                  = 'ABC News'
-    __author__             = 'Dean Cording'
+    __author__             = 'Pat Stapleton, Dean Cording'
    description            = 'News from Australia'
    masthead_url           = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
    cover_url              = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
@ -23,7 +23,9 @@ class ABCNews(BasicNewsRecipe):
    category               = 'News, Australia, World'
    language               = 'en_AU'
    publication_type       = 'newsportal'
-    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+#    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
 #Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
    preprocess_regexps     = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
@ -32,23 +34,23 @@ class ABCNews(BasicNewsRecipe):
                            ,'linearize_tables': False
                         }
-    keep_only_tags    =  dict(id='article')
+    keep_only_tags = [dict(attrs={'class':['article section']})]
-    remove_tags = [dict(attrs={'class':['related', 'tags']}),
+    remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
-                     dict(id='statepromo')
+        'inline-content story left', 'inline-content map left contracted', 'published',
-                        ]
+        'story-map', 'statepromo', 'topics', ]})]
    remove_attributes = ['width','height']
    feeds          = [
-                      ('Top Stories', 'http://www.abc.net.au/news/syndicate/topstoriesrss.xml'),
+                      ('Top Stories', 'http://www.abc.net.au/news/feed/45910/rss.xml'),
-                      ('Canberra', 'http://www.abc.net.au/news/indexes/idx-act/rss.xml'),
+                      ('Canberra', 'http://www.abc.net.au/news/feed/6910/rss.xml'),
-                      ('Sydney', 'http://www.abc.net.au/news/indexes/sydney/rss.xml'),
+                      ('Sydney', 'http://www.abc.net.au/news/feed/10232/rss.xml'),
-                      ('Melbourne', 'http://www.abc.net.au/news/indexes/melbourne/rss.xml'),
+                      ('Melbourne', 'http://www.abc.net.au/news/feed/21708/rss.xml'),
-                      ('Brisbane', 'http://www.abc.net.au/news/indexes/brisbane/rss.xml'),
+                      ('Brisbane', 'http://www.abc.net.au/news/feed/12858/rss.xml'),
-                      ('Perth', 'http://www.abc.net.au/news/indexes/perth/rss.xml'),
+                      ('Perth', 'feed://www.abc.net.au/news/feed/24886/rss.xml'),
-                      ('Australia', 'http://www.abc.net.au/news/indexes/idx-australia/rss.xml'),
+                      ('Australia', 'http://www.abc.net.au/news/feed/46182/rss.xml'),
-                      ('World', 'http://www.abc.net.au/news/indexes/world/rss.xml'),
+                      ('World', 'http://www.abc.net.au/news/feed/52278/rss.xml'),
-                      ('Business', 'http://www.abc.net.au/news/indexes/business/rss.xml'),
+                      ('Business', 'http://www.abc.net.au/news/feed/51892/rss.xml'),
-                      ('Science and Technology', 'http://www.abc.net.au/news/tag/science-and-technology/rss.xml'),
+                      ('Science and Technology', 'http://www.abc.net.au/news/feed/2298/rss.xml'),
                    ]
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@ -1,19 +1,38 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
+import re
 class Adventure_zone(BasicNewsRecipe):
    title          = u'Adventure Zone'
    __author__        = 'fenuks'
    description   = 'Adventure zone - adventure games from A to Z'
    category       = 'games'
    language       = 'pl'
    oldest_article = 15
    max_articles_per_feed = 100
    no_stylesheets = True
    oldest_article = 20
    max_articles_per_feed = 100
    use_embedded_content=False
    preprocess_regexps     = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
    remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
-    remove_tags_after= dict(name='td', attrs={'class':'main-body middle-border'})
+    remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
    remove_tags_after= dict(id='comments')
    extra_css              = '.main-bg{text-align: left;}  td.capmain{ font-size: 22px; }'
    feeds          = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
    def parse_feeds (self): 
      feeds = BasicNewsRecipe.parse_feeds(self) 
      soup=self.index_to_soup(u'http://www.adventure-zone.info/fusion/feeds/news.php')
      tag=soup.find(name='channel')
      titles=[]
      for r in tag.findAll(name='image'):
          r.extract()
      art=tag.findAll(name='item')
      for i in art:
            titles.append(i.title.string)
      for feed in feeds:
        for article in feed.articles[:]:
            article.title=titles[feed.articles.index(article)]
      return feeds
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
        cover=soup.find(id='box_OstatninumerAZ')
@ -22,17 +41,10 @@ class Adventure_zone(BasicNewsRecipe):
    def skip_ad_pages(self, soup):
-        skip_tag = soup.body.findAll(name='a')
+        skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
-        if skip_tag is not None:
+        skip_tag = skip_tag.findAll(name='a')
        for r in skip_tag:
-                 if 'articles.php?' in r['href']:
+           if r.strong:
                     if r.strong is not None:
                 word=r.strong.string
-                         if ('zapowied' or 'recenzj') in word:
+                 if word and (('zapowied' in word) or ('recenzj' in word)  or ('solucj' in word)):
-                             return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item_id'+r['href'][r['href'].find('_id')+3:], raw=True)
+                   return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
        else:
            None
    def print_version(self, url):
        return url.replace('news.php?readmore', 'print.php?type=N&item_id')
--- a/recipes/al_masry_al_youm.recipe
+++ b/recipes/al_masry_al_youm.recipe
@ -0,0 +1,50 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
 '''
 abc.net.au/news
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class TheDailyNewsEG(BasicNewsRecipe):
    title          	   = u'al-masry al-youm'
    __author__             = 'Omm Mishmishah'
    description            = 'Independent News from Egypt'
    masthead_url           = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
    cover_url              = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
    auto_cleanup           = True
    oldest_article         = 7
    max_articles_per_feed  = 100
    no_stylesheets         = False
    #delay                  = 1
    use_embedded_content   = False
    encoding               = 'utf8'
    publisher              = 'Independent News Egypt'
    category               = 'News, Egypt, World'
    language               = 'en_EG'
    publication_type       = 'newsportal'
 #    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
 #Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
    preprocess_regexps     = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': False
                         }
    keep_only_tags = [dict(attrs={'class':['article section']})]
    remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
        'inline-content story left', 'inline-content map left contracted', 'published',
        'story-map', 'statepromo', 'topics', ]})]
    remove_attributes = ['width','height']
    feeds          = [(u'English News', u'http://www.almasryalyoum.com/en/rss_feed_term/113/rss.xml'),
                      (u'News Features', u'http://www.almasryalyoum.com/en/rss_feed_term/115/rss.xml'),
                      (u'Culture', u'http://www.almasryalyoum.com/en/rss_feed_term/133/rss.xml'),
                      (u'Cinema', u'http://www.almasryalyoum.com/en/rss_feed_term/134/rss.xml')
                     ]
--- a/recipes/alternet.recipe
+++ b/recipes/alternet.recipe
@ -36,3 +36,5 @@ class Alternet(BasicNewsRecipe):
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
    conversion_options = {'linearize_tables': True}
--- a/recipes/ap.recipe
+++ b/recipes/ap.recipe
@ -11,7 +11,6 @@ class AssociatedPress(BasicNewsRecipe):
    language = 'en'
    no_stylesheets = True
    max_articles_per_feed = 15
    html2lrf_options = ['--force-page-break-before-tag="chapter"']
    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
--- a/recipes/astro_news_pl.recipe
+++ b/recipes/astro_news_pl.recipe
@ -1,5 +1,4 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AstroNEWS(BasicNewsRecipe):
    title          = u'AstroNEWS'
    __author__        = 'fenuks'
@ -8,11 +7,16 @@ class AstroNEWS(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
-    auto_cleanup = True
+    #extra_css= 'table {text-align: left;}'
    no_stylesheets=True
    cover_url='http://news.astronet.pl/img/logo_news.jpg'
-   # no_stylesheets= True
+    remove_tags=[dict(name='hr')]
    feeds          = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
    def print_version(self, url):
        return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')
    def preprocess_html(self, soup):
        for item in soup.findAll(align=True):
            del item['align']
        return soup
--- a/recipes/bbc.recipe
+++ b/recipes/bbc.recipe
@ -1,61 +1,648 @@
-__license__   = 'GPL v3'
+##
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+## Title:        BBC News, Sport, and Blog Calibre Recipe
 ## Contact:      mattst - jmstanfield@gmail.com
 ##
 ## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
 ## Copyright:    mattst - jmstanfield@gmail.com
 ##
 ## Written:      November 2011
 ## Last Edited:  2011-11-19
 ##
 __license__     = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
 __copyright__   = 'mattst - jmstanfield@gmail.com'
 '''
-news.bbc.co.uk
+BBC News, Sport, and Blog Calibre Recipe
 '''
 # Import the regular expressions module.
 import re
 # Import the BasicNewsRecipe class which this class extends.
 from calibre.web.feeds.recipes import BasicNewsRecipe
-class BBC(BasicNewsRecipe):
+class BBCNewsSportBlog(BasicNewsRecipe):
    title                  = 'BBC News'
    __author__             = 'Darko Miletic, Starson17'
    description            = 'News from UK. '
    oldest_article         = 2
    max_articles_per_feed  = 100
    no_stylesheets         = True
    #delay                  = 1
    use_embedded_content   = False
    encoding               = 'utf8'
    publisher              = 'BBC'
    category               = 'news, UK, world'
    language               = 'en_GB'
    publication_type       = 'newsportal'
    extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                         }
    keep_only_tags    = [
                       dict(name='div', attrs={'class':['layout-block-a layout-block']})
                       ,dict(attrs={'class':['story-body','storybody']})
                        ]
    remove_tags = [
                       dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper',
                                                    'story-feature wide ', 'story-feature narrow']}),
                       dict(id=['hypertab', 'comment-form']),
                        ]
    remove_attributes = ['width','height']
    #
    #    **** IMPORTANT USERS READ ME ****
    #
    #  First select the feeds you want then scroll down below the feeds list
    #  and select the values you want for the other user preferences, like
    #  oldest_article and such like.
    #
    #
    #  Select the BBC rss feeds which you want in your ebook.
    #  Selected feed have NO '#' at their start, de-selected feeds begin with a '#'.
    #
    #  Eg.  ("News Home", "http://feeds.bbci.co.uk/... - include feed.
    #  Eg. #("News Home", "http://feeds.bbci.co.uk/... - do not include feed.
    #
    # There are 68 feeds below which constitute the bulk of the available rss
    # feeds on the BBC web site. These include 5 blogs by editors and
    # correspondants, 16 sports feeds, 15 'sub' regional feeds (Eg. North West
    # Wales, Scotland Business), and 7 Welsh language feeds.
    #
    # Some of the feeds are low volume (Eg. blogs), or very low volume (Eg. Click)
    # so if "oldest_article = 1.5" (only articles published in the last 36 hours)
    # you may get some 'empty feeds' which will not then be included in the ebook.
    #
    # The 15 feeds currently selected below are simply my default ones.
    #
    # Note: With all 68 feeds selected, oldest_article set to 2,
    # max_articles_per_feed set to 100, and simultaneous_downloads set to 10,
    # the ebook creation took 29 minutes on my speedy 100 mbps net connection,
    # fairly high-end desktop PC running Linux (Ubuntu Lucid-Lynx).
    # More realistically with 15 feeds selected, oldest_article set to 1.5,
    # max_articles_per_feed set to 100, and simultaneous_downloads set to 20,
    # it took 6 minutes. If that's too slow increase 'simultaneous_downloads'.
    #
    # Select / de-select the feeds you want in your ebook.
    #
    feeds = [
-                      ('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
+              ("News Home", "http://feeds.bbci.co.uk/news/rss.xml"),
-                      ('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
+              ("UK", "http://feeds.bbci.co.uk/news/uk/rss.xml"),
-                      ('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
+              ("World", "http://feeds.bbci.co.uk/news/world/rss.xml"),
-                      ('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
+              #("England", "http://feeds.bbci.co.uk/news/england/rss.xml"),
-                      ('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
+              #("Scotland", "http://feeds.bbci.co.uk/news/scotland/rss.xml"),
-                      ('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
+              #("Wales", "http://feeds.bbci.co.uk/news/wales/rss.xml"),
-                      ('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
+              #("N. Ireland", "http://feeds.bbci.co.uk/news/northern_ireland/rss.xml"),
-                      ('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
+              #("Africa", "http://feeds.bbci.co.uk/news/world/africa/rss.xml"),
-                      ('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
+              #("Asia", "http://feeds.bbci.co.uk/news/world/asia/rss.xml"),
-                      ('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
+              #("Europe", "http://feeds.bbci.co.uk/news/world/europe/rss.xml"),
-                      ('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
+              #("Latin America", "http://feeds.bbci.co.uk/news/world/latin_america/rss.xml"),
-                      ('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
+              #("Middle East", "http://feeds.bbci.co.uk/news/world/middle_east/rss.xml"),
-                      ('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
+              ("US & Canada", "http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"),
              ("Politics", "http://feeds.bbci.co.uk/news/politics/rss.xml"),
              ("Science/Environment", "http://feeds.bbci.co.uk/news/science_and_environment/rss.xml"),
              ("Technology", "http://feeds.bbci.co.uk/news/technology/rss.xml"),
              ("Magazine", "http://feeds.bbci.co.uk/news/magazine/rss.xml"),
              ("Entertainment/Arts", "http://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"),
              #("Health", "http://feeds.bbci.co.uk/news/health/rss.xml"),
              #("Education/Family", "http://feeds.bbci.co.uk/news/education/rss.xml"),
              ("Business", "http://feeds.bbci.co.uk/news/business/rss.xml"),
              ("Special Reports", "http://feeds.bbci.co.uk/news/special_reports/rss.xml"),
              ("Also in the News", "http://feeds.bbci.co.uk/news/also_in_the_news/rss.xml"),
              #("Newsbeat", "http://www.bbc.co.uk/newsbeat/rss.xml"),
              #("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"),
              ("Blog: Nick Robinson (Political Editor)", "http://feeds.bbci.co.uk/news/correspondents/nickrobinson/rss.sxml"),
              #("Blog: Mark D'Arcy (Parliamentary Correspondent)", "http://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"),
              #("Blog: Robert Peston (Business Editor)", "http://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"),
              #("Blog: Stephanie Flanders (Economics Editor)", "http://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"),
              ("Blog: Rory Cellan-Jones (Technology correspondent)", "http://feeds.bbci.co.uk/news/correspondents/rorycellanjones/rss.sxml"),
              ("Sport Front Page", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml"),
              #("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"),
              #("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"),
              #("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"),
              #("Rugby League", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_league/rss.xml"),
              #("Tennis", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/tennis/rss.xml"),
              #("Golf", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/golf/rss.xml"),
              #("Motorsport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/motorsport/rss.xml"),
              #("Boxing", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/boxing/rss.xml"),
              #("Athletics", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/athletics/rss.xml"),
              #("Snooker", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/snooker/rss.xml"),
              #("Horse Racing", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/horse_racing/rss.xml"),
              #("Cycling", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/cycling/rss.xml"),
              #("Disability Sport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/disability_sport/rss.xml"),
              #("Other Sport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/rss.xml"),
              #("Olympics 2012", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/olympics_2012/rss.xml"),
              #("N. Ireland Politics", "http://feeds.bbci.co.uk/news/northern_ireland/northern_ireland_politics/rss.xml"),
              #("Scotland Politics", "http://feeds.bbci.co.uk/news/scotland/scotland_politics/rss.xml"),
              #("Scotland Business", "http://feeds.bbci.co.uk/news/scotland/scotland_business/rss.xml"),
              #("E. Scotland, Edinburgh & Fife", "http://feeds.bbci.co.uk/news/scotland/edinburgh_east_and_fife/rss.xml"),
              #("W. Scotland & Glasgow", "http://feeds.bbci.co.uk/news/scotland/glasgow_and_west/rss.xml"),
              #("Highlands & Islands", "http://feeds.bbci.co.uk/news/scotland/highlands_and_islands/rss.xml"),
              #("NE. Scotland, Orkney & Shetland", "http://feeds.bbci.co.uk/news/scotland/north_east_orkney_and_shetland/rss.xml"),
              #("South Scotland", "http://feeds.bbci.co.uk/news/scotland/south_scotland/rss.xml"),
              #("Central Scotland & Tayside", "http://feeds.bbci.co.uk/news/scotland/tayside_and_central/rss.xml"),
              #("Wales Politics", "http://feeds.bbci.co.uk/news/wales/wales_politics/rss.xml"),
              #("NW. Wales", "http://feeds.bbci.co.uk/news/wales/north_west_wales/rss.xml"),
              #("NE. Wales", "http://feeds.bbci.co.uk/news/wales/north_east_wales/rss.xml"),
              #("Mid. Wales", "http://feeds.bbci.co.uk/news/wales/mid_wales/rss.xml"),
              #("SW. Wales", "http://feeds.bbci.co.uk/news/wales/south_west_wales/rss.xml"),
              #("SE. Wales", "http://feeds.bbci.co.uk/news/wales/south_east_wales/rss.xml"),
              #("Newyddion - News in Welsh", "http://feeds.bbci.co.uk/newyddion/rss.xml"),
              #("Gwleidyddiaeth", "http://feeds.bbci.co.uk/newyddion/gwleidyddiaeth/rss.xml"),
              #("Gogledd-Ddwyrain", "http://feeds.bbci.co.uk/newyddion/gogledd-ddwyrain/rss.xml"),
              #("Gogledd-Orllewin", "http://feeds.bbci.co.uk/newyddion/gogledd-orllewin/rss.xml"),
              #("Canolbarth", "http://feeds.bbci.co.uk/newyddion/canolbarth/rss.xml"),
              #("De-Ddwyrain", "http://feeds.bbci.co.uk/newyddion/de-ddwyrain/rss.xml"),
              #("De-Orllewin", "http://feeds.bbci.co.uk/newyddion/de-orllewin/rss.xml"),
            ]
    #    **** SELECT YOUR USER PREFERENCES ****
    # Title to use for the ebook.
    #
    title = 'BBC News'
    # A brief description for the ebook.
    #
    description = u'BBC web site ebook created using rss feeds.'
    # The max number of articles which may be downloaded from each feed.
    # I've never seen more than about 70 articles in a single feed in the
    # BBC feeds.
    #
    max_articles_per_feed = 100
    # The max age of articles which may be downloaded from each feed. This is
    # specified in days - note fractions of days are allowed, Eg. 2.5 (2 and a
    # half days). My default of 1.5 days is the last 36 hours, the point at
    # which I've decided 'news' becomes 'old news', but be warned this is not
    # so good for the blogs, technology, magazine, etc., and sports feeds.
    # You may wish to extend this to 2-5 but watch out ebook creation time will
    # increase as well. Setting this to 30 will get everything (AFAICT) as long
    # as max_articles_per_feed remains set high (except for 'Click' which is
    # v. low volume and its currently oldest article is 4th Feb 2011).
    #
    oldest_article = 1.5
    # Number of simultaneous downloads. 20 is consistantly working fine on the
    # BBC News feeds with no problems. Speeds things up from the defualt of 5.
    # If you have a lot of feeds and/or have increased oldest_article above 2
    # then you may wish to try increasing simultaneous_downloads to 25-30,
    # Or, of course, if you are in a hurry. [I've not tried beyond 20.]
    #
    simultaneous_downloads = 20
    # Timeout for fetching files from the server in seconds. The default of
    # 120 seconds, seems somewhat excessive.
    #
    timeout = 30
    # The format string for the date shown on the ebook's first page.
    # List of all values: http://docs.python.org/library/time.html
    # Default in news.py has a leading space so that's mirrored here.
    # As with 'feeds' select/de-select by adding/removing the initial '#',
    # only one timefmt should be selected, here's a few to choose from.
    #
    timefmt = ' [%a, %d %b %Y]'              # [Fri, 14 Nov 2011] (Calibre default)
    #timefmt = ' [%a, %d %b %Y %H:%M]'       # [Fri, 14 Nov 2011 18:30]
    #timefmt = ' [%a, %d %b %Y %I:%M %p]'    # [Fri, 14 Nov 2011 06:30 PM]
    #timefmt = ' [%d %b %Y]'                 # [14 Nov 2011]
    #timefmt = ' [%d %b %Y %H:%M]'           # [14 Nov 2011 18.30]
    #timefmt = ' [%Y-%m-%d]'                 # [2011-11-14]
    #timefmt = ' [%Y-%m-%d-%H-%M]'           # [2011-11-14-18-30]
    #
    #    **** IMPORTANT ****
    #
    #    DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
    #
    #    DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
    #
    #    I MEAN IT, YES I DO, ABSOLUTELY, AT YOU OWN RISK. :)
    #
    #    **** IMPORTANT ****
    #
    # Author of this recipe.
    __author__ = 'mattst'
    # Specify English as the language of the RSS feeds (ISO-639 code).
    language = 'en_GB'
    # Set tags.
    tags = 'news, sport, blog'
    # Set publisher and publication type.
    publisher = 'BBC'
    publication_type = 'newspaper'
    # Disable stylesheets from site.
    no_stylesheets = True
    # Specifies an override encoding for sites that have an incorrect charset
    # specified. Default of 'None' says to auto-detect. Some other BBC recipes
    # use 'utf8', which works fine (so use that if necessary) but auto-detecting
    # with None is working fine, so stick with that for robustness.
    encoding = None
    # Sets whether a feed has full articles embedded in it. The BBC feeds do not.
    use_embedded_content = False
    # Removes empty feeds - why keep them!?
    remove_empty_feeds = True
    # Create a custom title which fits nicely in the Kindle title list.
    # Requires "import time" above class declaration, and replacing
    # title with custom_title in conversion_options (right column only).
    # Example of string below: "BBC News - 14 Nov 2011"
    #
    # custom_title = "BBC News - " + time.strftime('%d %b %Y')
    '''
    # Conversion options for advanced users, but don't forget to comment out the
    # current conversion_options below. Avoid setting 'linearize_tables' as that
    # plays havoc with the 'old style' table based pages.
    #
    conversion_options = { 'title'       : title,
                           'comments'    : description,
                           'tags'        : tags,
                           'language'    : language,
                           'publisher'   : publisher,
                           'authors'     : publisher,
                           'smarten_punctuation' : True
                         }
    '''
    conversion_options = { 'smarten_punctuation' : True }
    # Specify extra CSS - overrides ALL other CSS (IE. Added last).
    extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
                 .introduction, .first { font-weight: bold; } \
                 .cross-head { font-weight: bold; font-size: 125%; } \
                 .cap, .caption { display: block; font-size: 80%; font-style: italic; } \
                 .cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } \
                 .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
                    .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
                    text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } \
                 .story-date, .published { font-size: 80%; } \
                 table { width: 100%; } \
                 td img { display: block; margin: 5px auto; } \
                 ul { padding-top: 10px; } \
                 ol { padding-top: 10px; } \
                 li { padding-top: 5px; padding-bottom: 5px; } \
                 h1 { text-align: center; font-size: 175%; font-weight: bold; } \
                 h2 { text-align: center; font-size: 150%; font-weight: bold; } \
                 h3 { text-align: center; font-size: 125%; font-weight: bold; } \
                 h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'
    # Remove various tag attributes to improve the look of the ebook pages.
    remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
                          'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
    # Remove the (admittedly rarely used) line breaks, "<br />", which sometimes
    # cause a section of the ebook to start in an unsightly fashion or, more
    # frequently, a "<br />" will muck up the formatting of a correspondant's byline.
    # "<br />" and "<br clear/>" are far more frequently used on the table formatted
    # style of pages, and really spoil the look of the ebook pages.
    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: '')]
    # Create regular expressions for tag keeping and removal to make the matches more
    # robust against minor changes and errors in the HTML, Eg. double spaces, leading
    # and trailing spaces, missing hyphens, and such like.
    # Python regular expression ('re' class) page: http://docs.python.org/library/re.html
    # ***************************************
    # Regular expressions for keep_only_tags:
    # ***************************************
    # The BBC News HTML pages use variants of 'storybody' to denote the section of a HTML
    # page which contains the main text of the article. Match storybody variants: 'storybody',
    # 'story-body', 'story body','storybody ', etc.
    storybody_reg_exp = '^.*story[_ -]*body.*$'
    # The BBC sport and 'newsbeat' (features) HTML pages use 'blq_content' to hold the title
    # and published date. This is one level above the usual news pages which have the title
    # and date within 'story-body'. This is annoying since 'blq_content' must also be kept,
    # resulting in a lot of extra things to be removed by remove_tags.
    blq_content_reg_exp = '^.*blq[_ -]*content.*$'
    # The BBC has an alternative page design structure, which I suspect is an out-of-date
    # design but which is still used in some articles, Eg. 'Click' (technology), 'FastTrack'
    # (travel), and in some sport pages. These alternative pages are table based (which is
    # why I think they are an out-of-date design) and account for -I'm guesstimaking- less
    # than 1% of all articles. They use a table class 'storycontent' to hold the article
    # and like blq_content (above) have required lots of extra removal by remove_tags.
    story_content_reg_exp = '^.*story[_ -]*content.*$'
    # Keep the sections of the HTML which match the list below. The HTML page created by
    # Calibre will fill <body> with those sections which are matched. Note that the
    # blq_content_reg_exp must be listed before storybody_reg_exp in keep_only_tags due to
    # it being the parent of storybody_reg_exp, that is to say the div class/id 'story-body'
    # will be inside div class/id 'blq_content' in the HTML (if 'blq_content' is there at
    # all). If they are the other way around in keep_only_tags then blq_content_reg_exp
    # will end up being discarded.
    keep_only_tags = [ dict(name='table', attrs={'class':re.compile(story_content_reg_exp, re.IGNORECASE)}),
                       dict(name='div',   attrs={'class':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
                       dict(name='div',   attrs={'id':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
                       dict(name='div',   attrs={'class':re.compile(storybody_reg_exp, re.IGNORECASE)}),
                       dict(name='div',   attrs={'id':re.compile(storybody_reg_exp, re.IGNORECASE)}) ]
    # ************************************
    # Regular expressions for remove_tags:
    # ************************************
    # Regular expression to remove share-help and variant tags. The share-help class
    # is used by the site for a variety of 'sharing' type links, Eg. Facebook, delicious,
    # twitter, email. Removed to avoid page clutter.
    share_help_reg_exp = '^.*share[_ -]*help.*$'
    # Regular expression to remove embedded-hyper and variant tags. This class is used to
    # display links to other BBC News articles on the same/similar subject.
    embedded_hyper_reg_exp = '^.*embed*ed[_ -]*hyper.*$'
    # Regular expression to remove hypertabs and variant tags. This class is used to
    # display a tab bar at the top of an article which allows the user to switch to
    # an article (viewed on the same page) providing further info., 'in depth' analysis,
    # an editorial, a correspondant's blog entry, and such like. The ability to handle
    # a tab bar of this nature is currently beyond the scope of this recipe and
    # possibly of Calibre itself (not sure about that - TO DO - check!).
    hypertabs_reg_exp = '^.*hyper[_ -]*tabs.*$'
    # Regular expression to remove story-feature and variant tags. Eg. 'story-feature',
    # 'story-feature related narrow', 'story-feature wide', 'story-feature narrow'.
    # This class is used to add additional info. boxes, or small lists, outside of
    # the main story. TO DO: Work out a way to incorporate these neatly.
    story_feature_reg_exp = '^.*story[_ -]*feature.*$'
    # Regular expression to remove video and variant tags, Eg. 'videoInStoryB',
    # 'videoInStoryC'. This class is used to embed video.
    video_reg_exp = '^.*video.*$'
    # Regular expression to remove audio and variant tags, Eg. 'audioInStoryD'.
    # This class is used to embed audio.
    audio_reg_exp = '^.*audio.*$'
    # Regular expression to remove pictureGallery and variant tags, Eg. 'pictureGallery'.
    # This class is used to embed a photo slideshow. See also 'slideshow' below.
    picture_gallery_reg_exp = '^.*picture.*$'
    # Regular expression to remove slideshow and variant tags, Eg. 'dslideshow-enclosure'.
    # This class is used to embed a slideshow (not necessarily photo) but both
    # 'slideshow' and 'pictureGallery' are used for slideshows.
    slideshow_reg_exp = '^.*slide[_ -]*show.*$'
    # Regular expression to remove social-links and variant tags. This class is used to
    # display links to a BBC bloggers main page, used in various columnist's blogs
    # (Eg. Nick Robinson, Robert Preston).
    social_links_reg_exp = '^.*social[_ -]*links.*$'
    # Regular expression to remove quote and (multi) variant tags, Eg. 'quote',
    # 'endquote', 'quote-credit', 'quote-credit-title', etc. These are usually
    # removed by 'story-feature' removal (as they are usually within them), but
    # not always. The quotation removed is always (AFAICT) in the article text
    # as well but a 2nd copy is placed in a quote tag to draw attention to it.
    # The quote class tags may or may not appear in div's.
    quote_reg_exp = '^.*quote.*$'
    # Regular expression to remove hidden and variant tags, Eg. 'hidden'.
    # The purpose of these is unclear, they seem to be an internal link to a
    # section within the article, but the text of the link (Eg. 'Continue reading
    # the main story') never seems to be displayed anyway. Removed to avoid clutter.
    # The hidden class tags may or may not appear in div's.
    hidden_reg_exp = '^.*hidden.*$'
    # Regular expression to remove comment and variant tags, Eg. 'comment-introduction'.
    # Used on the site to display text about registered users entering comments.
    comment_reg_exp = '^.*comment.*$'
    # Regular expression to remove form and variant tags, Eg. 'comment-form'.
    # Used on the site to allow registered BBC users to fill in forms, typically
    # for entering comments about an article.
    form_reg_exp = '^.*form.*$'
    # Extra things to remove due to the addition of 'blq_content' in keep_only_tags.
    #<div class="story-actions"> Used on sports pages for 'email' and 'print'.
    story_actions_reg_exp = '^.*story[_ -]*actions.*$'
    #<div class="bookmark-list"> Used on sports pages instead of 'share-help' (for
    # social networking links).
    bookmark_list_reg_exp = '^.*bookmark[_ -]*list.*$'
    #<div id="secondary-content" class="content-group">
    # NOTE: Don't remove class="content-group" that is needed.
    # Used on sports pages to link to 'similar stories'.
    secondary_content_reg_exp = '^.*secondary[_ -]*content.*$'
    #<div id="featured-content" class="content-group">
    # NOTE: Don't remove class="content-group" that is needed.
    # Used on sports pages to link to pages like 'tables', 'fixtures', etc.
    featured_content_reg_exp = '^.*featured[_ -]*content.*$'
    #<div id="navigation">
    # Used on sports pages to link to pages like 'tables', 'fixtures', etc.
    # Used sometimes instead of "featured-content" above.
    navigation_reg_exp = '^.*navigation.*$'
    #<a class="skip" href="#blq-container-inner">Skip to top</a>
    # Used on sports pages to link to the top of the page.
    skip_reg_exp = '^.*skip.*$'
    # Extra things to remove due to the addition of 'storycontent' in keep_only_tags,
    # which are the alterative table design based pages. The purpose of some of these
    # is not entirely clear from the pages (which are a total mess!).
    # Remove mapping based tags, Eg. <map id="world_map">
    # The dynamic maps don't seem to work during ebook creation. TO DO: Investigate.
    map_reg_exp = '^.*map.*$'
    # Remove social bookmarking variation, called 'socialBookMarks'.
    social_bookmarks_reg_exp = '^.*social[_ -]*bookmarks.*$'
    # Remove page navigation tools, like 'search', 'email', 'print', called 'blq-mast'.
    blq_mast_reg_exp = '^.*blq[_ -]*mast.*$'
    # Remove 'sharesb', I think this is a generic 'sharing' class. It seems to appear
    # alongside 'socialBookMarks' whenever that appears. I am removing it as well
    # under the assumption that it can appear alone as well.
    sharesb_reg_exp = '^.*sharesb.*$'
    # Remove class 'o'. The worst named user created css class of all time. The creator
    # should immediately be fired. I've seen it used to hold nothing at all but with
    # 20 or so empty lines in it. Also to hold a single link to another article.
    # Whatever it was designed to do it is not wanted by this recipe. Exact match only.
    o_reg_exp = '^o$'
    # Remove 'promotopbg' and 'promobottombg', link lists. Have decided to
    # use two reg expressions to make removing this (and variants) robust.
    promo_top_reg_exp = '^.*promotopbg.*$'
    promo_bottom_reg_exp = '^.*promobottombg.*$'
    # Remove 'nlp', provides heading for link lists. Requires an exact match due to
    # risk of matching those letters in something needed, unless I see a variation
    # of 'nlp' used at a later date.
    nlp_reg_exp = '^nlp$'
    # Remove 'mva', provides embedded floating content of various types. Variant 'mvb'
    # has also now been seen. Requires an exact match of 'mva' or 'mvb' due to risk of
    # matching those letters in something needed.
    mva_or_mvb_reg_exp = '^mv[ab]$'
    # Remove 'mvtb', seems to be page navigation tools, like 'blq-mast'.
    mvtb_reg_exp = '^mvtb$'
    # Remove 'blq-toplink', class to provide a link to the top of the page.
    blq_toplink_reg_exp = '^.*blq[_ -]*top[_ -]*link.*$'
    # Remove 'products and services' links, Eg. desktop tools, alerts, and so on.
    # Eg. Class="servicev4 ukfs_services" - what a mess of a name. Have decided to
    # use two reg expressions to make removing this (and variants) robust.
    prods_services_01_reg_exp = '^.*servicev4.*$'
    prods_services_02_reg_exp = '^.*ukfs[_ -]*services.*$'
    # Remove -what I think is- some kind of navigation tools helper class, though I am
    # not sure, it's called: 'blq-rst blq-new-nav'. What I do know is it pops up
    # frequently and it is not wanted. Have decided to use two reg expressions to make
    # removing this (and variants) robust.
    blq_misc_01_reg_exp = '^.*blq[_ -]*rst.*$'
    blq_misc_02_reg_exp = '^.*blq[_ -]*new[_ -]*nav.*$'
    # Remove 'puffbox' - this may only appear inside 'storyextra', so it may not
    # need removing - I have no clue what it does other than it contains links.
    # Whatever it is - it is not part of the article and is not wanted.
    puffbox_reg_exp = '^.*puffbox.*$'
    # Remove 'sibtbg' and 'sibtbgf' - some kind of table formatting classes.
    sibtbg_reg_exp = '^.*sibtbg.*$'
    # Remove 'storyextra' - links to relevant articles and external sites.
    storyextra_reg_exp = '^.*story[_ -]*extra.*$'
    remove_tags = [ dict(name='div',  attrs={'class':re.compile(story_feature_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(share_help_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(embedded_hyper_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(hypertabs_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(video_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(audio_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(picture_gallery_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(slideshow_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(story_actions_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(bookmark_list_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'id':re.compile(secondary_content_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'id':re.compile(featured_content_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'id':re.compile(navigation_reg_exp, re.IGNORECASE)}),
                    dict(name='form', attrs={'id':re.compile(form_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(social_links_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(skip_reg_exp, re.IGNORECASE)}),
                    dict(name='map', attrs={'id':re.compile(map_reg_exp, re.IGNORECASE)}),
                    dict(name='map', attrs={'name':re.compile(map_reg_exp, re.IGNORECASE)}),
                    dict(name='div', attrs={'id':re.compile(social_bookmarks_reg_exp, re.IGNORECASE)}),
                    dict(name='div', attrs={'id':re.compile(blq_mast_reg_exp, re.IGNORECASE)}),
                    dict(name='div', attrs={'class':re.compile(sharesb_reg_exp, re.IGNORECASE)}),
                    dict(name='div', attrs={'class':re.compile(o_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(promo_top_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(promo_bottom_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(nlp_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(mva_or_mvb_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(mvtb_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(blq_toplink_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(prods_services_01_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(prods_services_02_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(blq_misc_01_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(blq_misc_02_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(puffbox_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(sibtbg_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(storyextra_reg_exp, re.IGNORECASE)})
                  ]
    # Uses url to create and return the 'printer friendly' version of the url.
    # In other words the 'print this page' address of the page.
    #
    # There are 3 types of urls used in the BBC site's rss feeds. There is just
    # 1 type for the standard news while there are 2 used for sports feed urls.
    # Note: Sports urls are linked from regular news feeds (Eg. 'News Home') when
    # there is a major story of interest to 'everyone'. So even if no BBC sports
    # feeds are added to 'feeds' the logic of this method is still needed to avoid
    # blank / missing / empty articles which have an index title and then no body.
    def print_version(self, url):
        # Handle sports page urls type 01:
        if (url.find("go/rss/-/sport1/") != -1):
            temp_url = url.replace("go/rss/-/", "")
        # Handle sports page urls type 02:
        elif (url.find("go/rss/int/news/-/sport1/") != -1):
            temp_url = url.replace("go/rss/int/news/-/", "")
        # Handle regular news page urls:
        else:
            temp_url = url.replace("go/rss/int/news/-/", "")
        # Always add "?print=true" to the end of the url.
        print_url = temp_url + "?print=true"
        return print_url
    # Remove articles in feeds based on a string in the article title or url.
    #
    # Code logic written by: Starson17 - posted in: "Recipes - Re-usable code"
    # thread, in post with title: "Remove articles from feed", see url:
    # http://www.mobileread.com/forums/showpost.php?p=1165462&postcount=6
    # Many thanks and all credit to Starson17.
    #
    # Starson17's code has obviously been altered to suite my requirements.
    def parse_feeds(self):
        # Call parent's method.
        feeds = BasicNewsRecipe.parse_feeds(self)
        # Loop through all feeds.
        for feed in feeds:
            # Loop through all articles in feed.
            for article in feed.articles[:]:
                # Match key words and remove article if there's a match.
                # Most BBC rss feed video only 'articles' use upper case 'VIDEO'
                # as a title prefix. Just match upper case 'VIDEO', so that
                # articles like 'Video game banned' won't be matched and removed.
                if 'VIDEO' in article.title:
                    feed.articles.remove(article)
                # Most BBC rss feed audio only 'articles' use upper case 'AUDIO'
                # as a title prefix. Just match upper case 'AUDIO', so that
                # articles like 'Hi-Def audio...' won't be matched and removed.
                elif 'AUDIO' in article.title:
                    feed.articles.remove(article)
                # Most BBC rss feed photo slideshow 'articles' use 'In Pictures',
                # 'In pictures', and 'in pictures', somewhere in their title.
                # Match any case of that phrase.
                elif 'IN PICTURES' in article.title.upper():
                    feed.articles.remove(article)
                # As above, but user contributed pictures. Match any case.
                elif 'YOUR PICTURES' in article.title.upper():
                    feed.articles.remove(article)
                # 'Sportsday Live' are articles which contain a constantly and
                # dynamically updated 'running commentary' during a live sporting
                # event. Match any case.
                elif 'SPORTSDAY LIVE' in article.title.upper():
                    feed.articles.remove(article)
                # Sometimes 'Sportsday Live' (above) becomes 'Live - Sport Name'.
                # These are being matched below using 'Live - ' because removing all
                # articles with 'live' in their titles would remove some articles
                # that are in fact not live sports pages. Match any case.
                elif 'LIVE - ' in article.title.upper():
                    feed.articles.remove(article)
                # 'Quiz of the week' is a Flash player weekly news quiz. Match only
                # the 'Quiz of the' part in anticipation of monthly and yearly
                # variants. Match any case.
                elif 'QUIZ OF THE' in article.title.upper():
                    feed.articles.remove(article)
                # Remove articles with 'scorecards' in the url. These are BBC sports
                # pages which just display a cricket scorecard. The pages have a mass
                # of table and css entries to display the scorecards nicely. Probably
                # could make them work with this recipe, but might take a whole day
                # of work to sort out all the css - basically a formatting nightmare.
                elif 'scorecards' in article.url:
                    feed.articles.remove(article)
        return feeds
 # End of class and file.
--- a/recipes/berliner_zeitung.recipe
+++ b/recipes/berliner_zeitung.recipe
@ -1,61 +1,44 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
-import re
+
 '''Calibre recipe to convert the RSS feeds of the Berliner Zeitung to an ebook.'''
 class SportsIllustratedRecipe(BasicNewsRecipe) :
-    __author__    = 'ape'
+    __author__    = 'a.peter'
-    __copyright__ = 'ape'
+    __copyright__ = 'a.peter'
    __license__   = 'GPL v3'
    language      = 'de'
-    description   = 'Berliner Zeitung'
+    description   = 'Berliner Zeitung RSS'
-    version       = 2
+    version       = 4
    title         = u'Berliner Zeitung'
    timefmt       = ' [%d.%m.%Y]'
    #oldest_article = 7.0
    no_stylesheets = True
    remove_javascript = True
    use_embedded_content = False
    publication_type = 'newspaper'
-    keep_only_tags = [dict(name='div', attrs={'class':'teaser t_split t_artikel'})]
+    remove_tags_before = dict(name='div', attrs={'class':'newstype'})
    remove_tags_after = [dict(id='article_text')]
-    INDEX = 'http://www.berlinonline.de/berliner-zeitung/'
+    feeds = [(u'Startseite', u'http://www.berliner-zeitung.de/home/10808950,10808950,view,asFeed.xml'),
-
+             (u'Politik', u'http://www.berliner-zeitung.de/home/10808018,10808018,view,asFeed.xml'),
-    def parse_index(self):
+             (u'Wirtschaft', u'http://www.berliner-zeitung.de/home/10808230,10808230,view,asFeed.xml'),
-        base = 'http://www.berlinonline.de'
+             (u'Berlin', u'http://www.berliner-zeitung.de/home/10809148,10809148,view,asFeed.xml'),
-        answer = []
+             (u'Brandenburg', u'http://www.berliner-zeitung.de/home/10809312,10809312,view,asFeed.xml'),
-        articles = {}
+             (u'Wissenschaft', u'http://www.berliner-zeitung.de/home/10808894,10808894,view,asFeed.xml'),
-        more = 1
+             (u'Digital', u'http://www.berliner-zeitung.de/home/10808718,10808718,view,asFeed.xml'),
-
+             (u'Kultur', u'http://www.berliner-zeitung.de/home/10809150,10809150,view,asFeed.xml'),
-        soup = self.index_to_soup(self.INDEX)
+             (u'Panorama', u'http://www.berliner-zeitung.de/home/10808334,10808334,view,asFeed.xml'),
-
+             (u'Sport', u'http://www.berliner-zeitung.de/home/10808794,10808794,view,asFeed.xml'),
-        # Get list of links to ressorts from index page
+             (u'Hertha', u'http://www.berliner-zeitung.de/home/10808800,10808800,view,asFeed.xml'),
-        ressort_list = soup.findAll('ul', attrs={'class': re.compile('ressortlist')})
+             (u'Union', u'http://www.berliner-zeitung.de/home/10808802,10808802,view,asFeed.xml'),
-        for ressort in ressort_list[0].findAll('a'):
+             (u'Verkehr', u'http://www.berliner-zeitung.de/home/10809298,10809298,view,asFeed.xml'),
-            feed_title = ressort.string
+             (u'Polizei', u'http://www.berliner-zeitung.de/home/10809296,10809296,view,asFeed.xml'),
-            print 'Analyzing', feed_title
+             (u'Meinung', u'http://www.berliner-zeitung.de/home/10808020,10808020,view,asFeed.xml')]
            if not articles.has_key(feed_title):
                articles[feed_title] = []
                answer.append(feed_title)
            # Load ressort page.
            feed = self.index_to_soup('http://www.berlinonline.de' + ressort['href'])
            # find mainbar div which contains the list of all articles
            for article_container in feed.findAll('div', attrs={'class': re.compile('mainbar')}):
                # iterate over all articles
                for article_teaser in article_container.findAll('div', attrs={'class': re.compile('teaser')}):
                    # extract title of article
                    if article_teaser.h3 != None:
                        article = {'title' : article_teaser.h3.a.string, 'date' : u'', 'url'  : base + article_teaser.h3.a['href'], 'description' : u''}
                        articles[feed_title].append(article)
                    else:
                        # Skip teasers for missing photos
                        if article_teaser.div.p.contents[0].find('Foto:') > -1:
                            continue
                        article = {'title': 'Weitere Artikel ' + str(more), 'date': u'', 'url': base + article_teaser.div.p.a['href'], 'description': u''}
                        articles[feed_title].append(article)
                        more += 1
        answer = [[key, articles[key]] for key in answer if articles.has_key(key)]
        return answer
    def get_masthead_url(self):
-        return 'http://www.berlinonline.de/.img/berliner-zeitung/blz_logo.gif'
+        return 'http://www.berliner-zeitung.de/image/view/10810244,7040611,data,logo.png'
    def print_version(self, url):
        return url.replace('.html', ',view,printVersion.html')
--- a/recipes/biamag.recipe
+++ b/recipes/biamag.recipe
@ -0,0 +1,38 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 bianet.com.tr
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Radikal_tr(BasicNewsRecipe):
    title                 = 'BiaMag'
    __author__            = 'Osman Kaysan'
    description           = 'Independent News from Turkey'
    publisher             = 'BiaMag'
    category              = 'news, politics, Turkey'
    oldest_article        = 15
    max_articles_per_feed = 120
    masthead_url          = 'http://bianet.org/images/biamag_logo.gif'
    language              = 'tr'
    no_stylesheets        = True
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                ,'remove_paragraph_spacing': True,
                          }
    remove_tags_before  = dict(name='div', attrs={'class':'manset'})
    remove_tags = [ dict(name='ul', attrs={'class':['altul']}), dict(name='div', attrs={'id':['habermenu']}), dict(name='div', attrs={'class':['mail']}), dict(name='div', attrs={'class':['from']})]
    remove_tags_after   = dict(name='div', attrs={'id':'habermenu'})
    feeds = [(u'BiaMag', u'http://www.bianet.org/biamag.rss')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/biamag_en.recipe
+++ b/recipes/biamag_en.recipe
@ -0,0 +1,38 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 bianet.com.tr
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Radikal_tr(BasicNewsRecipe):
    title                 = 'Bianet-English'
    __author__            = 'Osman Kaysan'
    description           = 'Independent News Network from Turkey(English)'
    publisher             = 'Bianet'
    category              = 'news, politics, Turkey'
    oldest_article        = 7
    max_articles_per_feed = 150
    masthead_url          = 'http://bianet.org/images/english_logo.gif'
    language              = 'en_TR'
    no_stylesheets        = True
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                ,'remove_paragraph_spacing': True,
                          }
    remove_tags_before  = dict(name='div', attrs={'class':'manset'})
    remove_tags = [ dict(name='ul', attrs={'class':['altul']}), dict(name='div', attrs={'id':['habermenu']}), dict(name='div', attrs={'class':['mail']}), dict(name='div', attrs={'class':['from']})]
    remove_tags_after   = dict(name='div', attrs={'id':'habermenu'})
    feeds = [(u'Bianet-English', u'http://www.bianet.org/english.rss')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/bianet.recipe
+++ b/recipes/bianet.recipe
@ -0,0 +1,38 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 bianet.com.tr
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Radikal_tr(BasicNewsRecipe):
    title                 = 'Bianet'
    __author__            = 'Osman Kaysan'
    description           = 'Independent News from Turkey'
    publisher             = 'Bianet'
    category              = 'news, politics, Turkey'
    oldest_article        = 7
    max_articles_per_feed = 120
    masthead_url          = 'http://bianet.org/images/bianet_logo.gif'
    language              = 'tr'
    no_stylesheets        = True
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                ,'remove_paragraph_spacing': True,
                          }
    remove_tags_before  = dict(name='div', attrs={'class':'manset'})
    remove_tags = [ dict(name='ul', attrs={'class':['altul']}), dict(name='div', attrs={'id':['habermenu']}), dict(name='div', attrs={'class':['mail']}), dict(name='div', attrs={'class':['from']})]
    remove_tags_after   = dict(name='div', attrs={'id':'habermenu'})
    feeds = [(u'Bianet', u'http://bianet.org/bianet.rss')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/biolog_pl.recipe
+++ b/recipes/biolog_pl.recipe
@ -0,0 +1,19 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class Biolog_pl(BasicNewsRecipe):
    title          = u'Biolog.pl'
    oldest_article = 7
    max_articles_per_feed = 100
    remove_empty_feeds=True
    __author__        = 'fenuks'
    description   = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.'
    category       = 'biology'
    language       = 'pl'
    cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
    no_stylesheets = True
    #keeps_only_tags=[dict(id='main')]
    remove_tags_before=dict(id='main')
    remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
    remove_tags=[dict(name='img', attrs={'alt':'Komentarze'})]
    feeds          = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')]
--- a/recipes/birgun_gazetesi.recipe
+++ b/recipes/birgun_gazetesi.recipe
@ -0,0 +1,50 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class Birgun (BasicNewsRecipe):
    title                  = u'Birgün Gazetesi'
    __author__             = u'Osman Kaysan'
    oldest_article         = 7
    max_articles_per_feed  =150
    use_embedded_content  = False
    description           = 'Birgun gazatesi haberleri, kose yazarlari'
    publisher              = 'Birgün'
    category               = 'news,haberler,turkce,gazete,birgun'
    language               = 'tr'
    no_stylesheets        = True
    publication_type = 'newspaper'
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                ,'remove_paragraph_spacing': True,
                          }
    cover_img_url = 'http://www.birgun.net/i/birgun.png'
    masthead_url = 'http://www.birgun.net/i/birgun.png'
    remove_attributes = ['width','height']
    remove_tags_before  = dict(name='h2', attrs={'class':'storyHeadline'})
    #remove_tags_after   = dict(name='div', attrs={'class':'toollinks'})
    remove_tags_after   = dict(name='tr', attrs={'valign':'top'})
    remove_tags   = [ dict(name='div', attrs={'id':'byLine'}), dict(name='div', attrs={'class':'toollinks'})
 , dict(name='div', attrs={'class':'main-lead'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})
 , dict(name='a', attrs={'class':'addthis_button'})]
    remove_empty_feeds= True
    feeds          = [
                      ( u'Güncel', u'http://www.birgun.net/actuels.xml')
         ,( u'Köşe Yazarları', u'http://www.birgun.net/writer.xml')
         ,( u'Politika', u'http://www.birgun.net/politics.xml')
         ,( u'Ekonomi', u'http://www.birgun.net/economic.xml')
         ,( u'Çalışma Yaşamı', u'http://www.birgun.net/workers.xml')
         ,( u'Dünya', u'http://www.birgun.net/worlds.xml')
         ,( u'Yaşam', u'http://www.birgun.net/lifes.xml')
                     ]
--- a/recipes/birmingham_post.recipe
+++ b/recipes/birmingham_post.recipe
@ -0,0 +1,44 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Birmingham post'
    description = 'News for Birmingham UK'
    timefmt = ''
    __author__ = 'Dave Asbury'
    cover_url = 'http://1.bp.blogspot.com/_GwWyq5eGw9M/S9BHPHxW55I/AAAAAAAAB6Q/iGCWl0egGzg/s320/Birmingham+post+Lite+front.JPG'
    oldest_article = 1
    max_articles_per_feed = 20
    remove_empty_feeds = True
    remove_javascript     = True
    auto_cleanup = True
    language = 'en_GB'
    masthead_url        = 'http://www.pressgazette.co.uk/Pictures/web/t/c/g/birmingham_post.jpg'
    keep_only_tags = [
    #dict(name='h1',attrs={'id' : 'article-headline'}),
                    #dict(attrs={'class':['article-meta-author','article-meta-date','article main','art-o art-align-center otm-1 ']}),
    #dict(name='p')
    #dict(attrs={'id' : 'three-col'})
        ]
    remove_tags    = [
             # dict(name='div',attrs={'class' : 'span-33 last header-links'})
                               ]
    feeds          = [
        #(u'News',u'http://www.birminghampost.net/news/rss.xml'),
        (u'Local News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
        (u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'),
        (u'Sports',u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'),
        (u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml')
         ]
    extra_css  = '''
                    body {font: sans-serif medium;}'
    h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
                h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
                    span{ font-size:9.5px; font-weight:bold;font-style:italic}
                    p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
     '''
--- a/recipes/blic.recipe
+++ b/recipes/blic.recipe
@ -1,6 +1,6 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 blic.rs
 '''
@ -73,7 +73,10 @@ class Blic(BasicNewsRecipe):
    def print_version(self, url):
        return url + '/print'
-    def preprocess_html(self, soup):
+    def get_cover_url(self):
-        for item in soup.findAll(style=True):
+        soup = self.index_to_soup('http://www.blic.rs/')
-            del item['style']    
+        alink = soup.find('a', attrs={'id':'blic_naslovna_print'})
-        return soup
+        if alink:
           return 'http://www.blic.rs' + alink['href']
        return None
--- a/recipes/blues.recipe
+++ b/recipes/blues.recipe
@ -0,0 +1,26 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Oskar Kunicki <rakso at interia.pl>'
 '''
 Changelog:
 2011-11-27
 News from BluesRSS.info
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class BluesRSS(BasicNewsRecipe):
    title                     = 'Blues News'
    __author__          = 'Oskar Kunicki'
    description           ='Blues news from around the world'
    publisher             = 'BluesRSS.info'
    category              = 'news, blues, USA,UK'
    oldest_article        = 5
    max_articles_per_feed = 100
    language              = 'en'
    cover_url             = 'http://bluesrss.info/cover.jpg'
    masthead_url       = 'http://bluesrss.info/cover.jpg'
    no_stylesheets = True
    remove_tags    = [dict(name='div', attrs={'class':'wp-pagenavi'})]
    feeds = [(u'News', u'http://bluesrss.info/feed/')]
--- a/recipes/buffalo_news.recipe
+++ b/recipes/buffalo_news.recipe
@ -10,30 +10,19 @@ http://www.buffalonews.com/RSS/
 from calibre.web.feeds.news import BasicNewsRecipe
-class AdvancedUserRecipe1298680852(BasicNewsRecipe):
+class BuffaloNews(BasicNewsRecipe):
    title          = u'Buffalo News'
    oldest_article = 2
    language = 'en'
-    __author__ = 'ChappyOnIce'
+    __author__ = 'ChappyOnIce, Krittika Goyal'
    max_articles_per_feed = 20
    encoding = 'utf-8'
    masthead_url = 'http://www.buffalonews.com/buffalonews/skins/buffalonews/images/masthead/the_buffalo_news_logo.png'
-    remove_javascript = True
+    auto_cleanup = True
-    extra_css = 'body {text-align: justify;}\n  \
+    remove_empty_feeds = True
       p {text-indent: 20px;}'
-    keep_only_tags    = [
+    feeds          = [
-                       dict(name='div', attrs={'class':['main-content-left']})
+            (u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'),
                        ]
    remove_tags = [
                       dict(name='div', attrs={'id':['commentCount']}),
       dict(name='div', attrs={'class':['story-list-links']})
                        ]
    remove_tags_after  = dict(name='div', attrs={'class':['body storyContent']})
    feeds          = [(u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'),
            (u'Southern Erie County', u'http://www.buffalonews.com/city/communities/southern-erie/?widget=rssfeed&view=feed&contentId=77944'),
            (u'Eastern Erie County', u'http://www.buffalonews.com/city/communities/eastern-erie/?widget=rssfeed&view=feed&contentId=77944'),
            (u'Southern Tier', u'http://www.buffalonews.com/city/communities/southern-tier/?widget=rssfeed&view=feed&contentId=77944'),
@ -56,3 +45,4 @@ class AdvancedUserRecipe1298680852(BasicNewsRecipe):
            (u'Off Main Street', u'http://www.buffalonews.com/city/columns/off-main-street/?widget=rssfeed&view=feed&contentId=77944'),
            (u'Editorials', u'http://www.buffalonews.com/editorial-page/buffalo-news-editorials/?widget=rssfeed&view=feed&contentId=77944')
         ]
--- a/recipes/catavencu.recipe
+++ b/recipes/catavencu.recipe
@ -4,16 +4,16 @@
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
-catavencu.ro
+academiacatavencu.info
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
-class Catavencu(BasicNewsRecipe):
+class AcademiaCatavencu(BasicNewsRecipe):
    title                 = u'Academia Ca\u0163avencu'
    __author__            = u'Silviu Cotoar\u0103'
    description           = 'Tagma cum laude'
-    publisher             = 'Catavencu'
+    publisher             = u'Ca\u0163avencu'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
@ -21,7 +21,7 @@ class Catavencu(BasicNewsRecipe):
    use_embedded_content  = False
    category              = 'Ziare'
    encoding              = 'utf-8'
-    cover_url         = 'http://upload.wikimedia.org/wikipedia/en/1/1e/Academia_Catavencu.jpg'
+    cover_url         = 'http://www.academiacatavencu.info/images/logo.png'
    conversion_options = {
                             'comments'   : description
@ -31,22 +31,21 @@ class Catavencu(BasicNewsRecipe):
                         }
    keep_only_tags = [
-            dict(name='ul', attrs={'class':'articles'})
+            dict(name='h1', attrs={'class':'art_title'}),
 			dict(name='div', attrs={'class':'art_text'})
                     ]
    remove_tags = [
-             dict(name='div', attrs={'class':['tools']})
+             dict(name='div', attrs={'class':['desp_m']})
-           , dict(name='div', attrs={'class':['share']})
+           , dict(name='div', attrs={'id':['tags']})          
           , dict(name='div', attrs={'class':['category']})
           , dict(name='div', attrs={'id':['comments']})
                  ]
    remove_tags_after = [
-              dict(name='div', attrs={'id':'comments'})
+              dict(name='div', attrs={'class':['desp_m']})
            ]
    feeds          = [
-            (u'Feeds', u'http://catavencu.ro/feed/rss')
+            (u'Feeds', u'http://www.academiacatavencu.info/rss.xml')
                 ]
    def preprocess_html(self, soup):
--- a/recipes/cgm_pl.recipe
+++ b/recipes/cgm_pl.recipe
@ -27,7 +27,7 @@ class CGM(BasicNewsRecipe):
            del item['style']
        ad=soup.findAll('a')
        for r in ad:
-            if 'http://www.hustla.pl' in r['href']:                
+            if 'http://www.hustla.pl' in r['href'] or 'http://www.ebilet.pl' in r['href']:                
                 r.extract()
        gallery=soup.find('div', attrs={'class':'galleryFlash'})
        if gallery:
--- a/recipes/cnd.recipe
+++ b/recipes/cnd.recipe
@ -23,7 +23,9 @@ class TheCND(BasicNewsRecipe):
 	remove_tags		= [dict(name='table', attrs={'align':'right'}), dict(name='img', attrs={'src':'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
 	no_stylesheets	 = True
-	preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+	preprocess_regexps = [  (re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
 				(re.compile('<table width.*?</table>', re.DOTALL), lambda m: ''),
 				]
 	def print_version(self, url):
 		if url.find('news/article.php') >= 0:
@ -46,13 +48,15 @@ class TheCND(BasicNewsRecipe):
 			title = self.tag_to_string(a)
 			self.log('\tFound article: ', title, 'at', url)
 			date = a.nextSibling
 			if re.search('cm', date):
 				continue
 			if (date is not None) and len(date)>2:
 				if not articles.has_key(date):
 					articles[date] = []
 				articles[date].append({'title':title, 'url':url, 'description': '', 'date':''})
 				self.log('\t\tAppend to : ', date)
-		self.log('log articles', articles)
+		#self.log('log articles', articles)
 		mostCurrent = sorted(articles).pop()
 		self.title = 'CND ' + mostCurrent		
--- a/recipes/cnd_weekly.recipe
+++ b/recipes/cnd_weekly.recipe
@ -0,0 +1,72 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Derek Liang <Derek.liang.ca @@@at@@@ gmail.com>'
 '''
 cnd.org
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class TheCND(BasicNewsRecipe):
 	title	  = 'CND Weekly'
 	__author__ = 'Derek Liang'
 	description = ''
 	INDEX = 'http://cnd.org'
 	language = 'zh'
 	conversion_options = {'linearize_tables':True}
 	remove_tags_before = dict(name='div', id='articleHead')
 	remove_tags_after  = dict(id='copyright')
 	remove_tags		= [dict(name='table', attrs={'align':'right'}), dict(name='img', attrs={'src':'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
 	no_stylesheets	 = True
 	preprocess_regexps = [  (re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
 				(re.compile('<table width.*?</table>', re.DOTALL), lambda m: ''),
 				]
 	def print_version(self, url):
 		if url.find('news/article.php') >= 0:
 			return re.sub("^[^=]*", "http://my.cnd.org/modules/news/print.php?storyid", url)
 		else:
 			return re.sub("^[^=]*", "http://my.cnd.org/modules/wfsection/print.php?articleid", url)
 	def parse_index(self):
 		soup = self.index_to_soup(self.INDEX)
 		feeds = []
 		articles = {}
 		for a in soup.findAll('a', attrs={'target':'_cnd'}):
 			url = a['href']
 			if url.find('article.php') < 0 :
 				continue
 			if url.startswith('/'):
 				url = 'http://cnd.org'+url
 			title = self.tag_to_string(a)
 			date = a.nextSibling
 			if not re.search('cm', date):
 				continue
 			self.log('\tFound article: ', title, 'at', url, '@', date)
 			if (date is not None) and len(date)>2:
 				if not articles.has_key(date):
 					articles[date] = []
 				articles[date].append({'title':title, 'url':url, 'description': '', 'date':''})
 				self.log('\t\tAppend to : ', date)
 		sorted_articles = sorted(articles)
 		while sorted_articles:
 			mostCurrent = sorted_articles.pop()
 			self.title = 'CND ' + mostCurrent
 			feeds.append((self.title, articles[mostCurrent]))
 		return feeds
 	def populate_article_metadata(self, article, soup, first):
 		header = soup.find('h3')
 		self.log('header: ' + self.tag_to_string(header))
 		pass
--- a/recipes/computerworld_pl.recipe
+++ b/recipes/computerworld_pl.recipe
@ -0,0 +1,22 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class Computerworld_pl(BasicNewsRecipe):
    title          = u'Computerworld.pl'
    __author__        = 'fenuks'
    description   = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
    category       = 'IT'
    language       = 'pl'
    no_stylesheets=True
    oldest_article = 7
    max_articles_per_feed = 100
    keep_only_tags=[dict(name='div', attrs={'id':'s'})]
    remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
    remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
    feeds          = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.computerworld.pl/')
        cover=soup.find(name='img', attrs={'class':'prawo'})
        self.cover_url=cover['src']
        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/cosmopolitan_uk.recipe
+++ b/recipes/cosmopolitan_uk.recipe
@ -0,0 +1,52 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 #from calibre import __appname__
 from calibre.utils.magick import Image
 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Cosmopolitan UK'
    description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
    __author__ = 'Dave Asbury'
    #last update 21/12/11
    # greyscale code by Starson
    cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 20
    remove_empty_feeds = True
    remove_javascript     = True
    preprocess_regexps = [
    (re.compile(r'<!-- Begin tmpl module_competition_offer -->.*?<!-- End tmpl module_competition_offer-->', re.IGNORECASE | re.DOTALL), lambda match: '')]
    language = 'en_GB'
    masthead_url        = 'http://www.cosmopolitan.co.uk/cm/cosmopolitanuk/site_images/header/cosmouk_logo_home.gif'
    keep_only_tags = [
                              dict(attrs={'class' : ['dateAuthor', 'publishDate']}),
                              dict(name='div',attrs ={'id' : ['main_content']})
                              ]
    remove_tags    = [
                              dict(name='div',attrs={'class' : ['blogInfo','viral_toolbar','comment_number','prevEntry nav']}),
                              dict(name='div',attrs={'class' : 'blog_module_about_the_authors'}),
                              dict(attrs={'id': ['breadcrumbs','comment','related_links_list','right_rail','content_sec_fb_more','content_sec_mostpopularstories','content-sec_fb_frame_viewfb_bot']}),
                              dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']}),
                              dict(name='li',attrs={'class' : 'thumb'})
              ]
    feeds          = [
        (u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')]
    def postprocess_html(self, soup, first):
        #process all the images
        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
            iurl = tag['src']
            img = Image()
            img.open(iurl)
            if img < 0:
                raise RuntimeError('Out of memory')
            img.type = "GrayscaleType"
            img.save(iurl)
        return soup
--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@ -5,7 +5,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    description = 'News as provide by The Daily Mirror -UK'
    __author__ = 'Dave Asbury'
-    # last updated 30/10/11
+    # last updated 26/12/11
    language = 'en_GB'
    cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
@ -13,30 +13,22 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
-    oldest_article = 2
+    oldest_article = 1
-    max_articles_per_feed = 30
+    max_articles_per_feed = 20
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
-    extra_css  = '''
+    auto_cleanup = True
 	body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
                	 '''
    keep_only_tags = [
       dict(name='div',attrs={'id' : 'body-content'})
        ]
    remove_tags_after = [dict (name='div',attrs={'class' : 'related'})]
    remove_tags = [
-           dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}),
+           dict(name='title'),
-           dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}),
+           dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),
           dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}),
           dict(name='div',attrs={'class' : 'span-12 last sl-others addthis_toolbox addthis_default_style'})
          ]
    preprocess_regexps = [
-    (re.compile(r'<dl class="q-search">.*?</dl>', re.IGNORECASE | re.DOTALL), lambda match: '')]
+    (re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')]
    preprocess_regexps = [
    (re.compile(r'Advertisement >>', re.IGNORECASE | re.DOTALL), lambda match: '')]
    feeds          = [
@ -53,5 +45,10 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
        ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
           # example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
  ]
    extra_css  = '''
 	body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
                    h1{ font-size:18px;}
                    img { display:block}
                	 '''
--- a/recipes/daily_writing_tips.recipe
+++ b/recipes/daily_writing_tips.recipe
@ -0,0 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class DailyWritingTips(BasicNewsRecipe):
    title          = u'Daily Writing Tips'
    language       = 'en_GB'
    __author__ = 'NotTaken'
    oldest_article = 7 #days
    max_articles_per_feed = 40
    use_embedded_content = True
    no_stylesheets = True
    auto_cleanup = False
    encoding = 'utf-8'
    feeds          = [
 ('Latest tips',
 'http://feeds2.feedburner.com/DailyWritingTips'),
 ]
--- a/recipes/datasport.recipe
+++ b/recipes/datasport.recipe
@ -0,0 +1,15 @@
 __license__   = 'GPL v3'
 __author__    = 'faber1971'
 description   = 'Italian soccer news website - v1.00 (17, December 2011)'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1324114272(BasicNewsRecipe):
    title          = u'Datasport'
    language = 'it'
    __author__ = 'faber1971'
    oldest_article = 1
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Datasport', u'http://www.datasport.it/calcio/rss.xml')]
--- a/recipes/derin_dusunce.recipe
+++ b/recipes/derin_dusunce.recipe
@ -0,0 +1,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1324913694(BasicNewsRecipe):
    title          = u'Derin Dusunce'
    language = 'tr'
    __author__ = 'asalet_r'
    oldest_article = 7
    max_articles_per_feed = 20
    auto_cleanup = True
    feeds          = [(u'Derin D\xfc\u015f\xfcnce', u'http://www.derindusunce.org/feed/')]
--- a/recipes/descopera_org.recipe
+++ b/recipes/descopera_org.recipe
@ -0,0 +1,27 @@
 # -*- coding: utf-8 -*-
 '''
 descopera.org
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Descopera(BasicNewsRecipe):
    title = u'Descoperă.org'
    __author__  = 'Marius Ignătescu'
    description = 'Descoperă. Placerea de a cunoaște'
    publisher = 'descopera.org'
    category = 'science, technology, culture, history, earth'
    language = 'ro'
    oldest_article = 14
    max_articles_per_feed = 100
    encoding = 'utf8'
    no_stylesheets = True
    extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
    keep_only_tags    = [dict(name='div', attrs={'class':['post']})]
    remove_tags = [dict(name='div', attrs={'class':['topnav', 'box_a', 'shr-bookmarks shr-bookmarks-expand shr-bookmarks-center shr-bookmarks-bg-knowledge']})]
    remove_attributes = ['width','height']
    cover_url = 'http://www.descopera.org/wp-content/themes/dorg/styles/default/img/b_top.png?width=400'
    feeds  = [(u'Articles', u'http://www.descopera.org/feed/')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/di.recipe
+++ b/recipes/di.recipe
@ -46,7 +46,8 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
 		dict(name = 'div', attrs = {'class' : 'poradniki_context'}),
 		dict(name = 'div', attrs = {'class' : 'uniBox'}),
 		dict(name = 'object', attrs = {}),
-		dict(name = 'h3', attrs = {})
+		dict(name = 'h3', attrs = {}),
 		dict(attrs={'class':'twitter-share-button'})
 	]
 	preprocess_regexps = [
@ -58,3 +59,8 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
 			(r'\s*</', lambda match: '</'),
 		]
 	]
 	def skip_ad_pages(self, soup):
 		if 'Advertisement' in soup.title:
 			nexturl=soup.find('a')['href']
 			return self.index_to_soup(nexturl, raw=True)
--- a/recipes/dunya_bizim.recipe
+++ b/recipes/dunya_bizim.recipe
@ -0,0 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1324736687(BasicNewsRecipe):
    title          = u'D\xfcnya Bizim'
    language = 'tr'
    __author__ = 'asalet_r'
    oldest_article = 7
    max_articles_per_feed = 10
    auto_cleanup = True
    feeds          = [(u'Aktif \u0130mamlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=31'), (u'Ayr\u0131nt\u0131 Defteri', u'http://dunyabizim.com/servisler/rss.php?kategoriID=58'), (u'Baba Kitaplar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=4'), (u'Bu da Oldu', u'http://dunyabizim.com/servisler/rss.php?kategoriID=32'), (u'\xc7-al\u0131nt\u0131 Yaz\u0131lar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=33'), (u'Dar\xfclmedya', u'http://dunyabizim.com/servisler/rss.php?kategoriID=49'), (u'Gidenler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=59'), (u'G\xfczel Mekanlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=43'), (u'\u0130yi Haberler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=18'), (u'\u0130yi M\xfczikler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=2'), (u'Kalite Dergiler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=3'), (u'Konu\u015fa Konu\u015fa', u'http://dunyabizim.com/servisler/rss.php?kategoriID=24'), (u'M\xfcstesta G\xfczeller', u'http://dunyabizim.com/servisler/rss.php?kategoriID=65'), (u'O \u015eimdi Nerede?', u'http://dunyabizim.com/servisler/rss.php?kategoriID=52'), (u'Olsa Ke\u015fke', u'http://dunyabizim.com/servisler/rss.php?kategoriID=34'), (u'Orada Ne Oldu?', u'http://dunyabizim.com/servisler/rss.php?kategoriID=38'), (u'\xd6nemli Adamlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=1'), (u'Polemik', u'http://dunyabizim.com/servisler/rss.php?kategoriID=39'), (u'Sinema', u'http://dunyabizim.com/servisler/rss.php?kategoriID=23'), (u'Yalan Haber', u'http://dunyabizim.com/servisler/rss.php?kategoriID=40'), (u'Yeni \u015eeyler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=57'), (u'Zekeriya Sofras\u0131', u'http://dunyabizim.com/servisler/rss.php?kategoriID=60')]
--- a/recipes/dunya_bulteni.recipe
+++ b/recipes/dunya_bulteni.recipe
@ -0,0 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1321194347(BasicNewsRecipe):
    title          = u'D\xfcnya B\xfclteni'
    language = 'tr'
    __author__ = 'asalet_r'
    oldest_article = 7
    max_articles_per_feed = 50
    auto_cleanup = True
    feeds          = [(u'Tarih Dosyas\u0131', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=157'), (u'R\xf6portaj', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=153'), (u'Makale-Yorum', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=174'), (u'K\xfclt\xfcr-Sanat', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=66'), (u'Hayat\u0131n \u0130\xe7inden', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=200'), (u'Haber Analiz', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=123'), (u'Gezi-\u0130zlenim', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=90'), (u'Aile Sa\u011fl\u0131k E\u011fitim', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=75')]
--- a/recipes/dziennik_pl.recipe
+++ b/recipes/dziennik_pl.recipe
@ -0,0 +1,58 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class Dziennik_pl(BasicNewsRecipe):
    title          = u'Dziennik.pl'
    __author__        = 'fenuks'
    description   = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.'
    category       = 'newspaper'
    language       = 'pl'
    cover_url='http://6.s.dziennik.pl/images/og_dziennik.jpg'
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 100
    remove_javascript=True
    remove_empty_feeds=True
    preprocess_regexps     = [(re.compile("Komentarze:"), lambda m: '')]
    keep_only_tags=[dict(id='article')]
    remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget']}), dict(name='a', attrs={'class':'komentarz'})]
    feeds          = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
 		(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
 		(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
 		(u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'),
 		(u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'),
 		(u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'),
 		(u'Film', u'http://rss.dziennik.pl/Dziennik-Film'),
 		(u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'),
 		(u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'),
 		(u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'),
 		(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
 		(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]
    def append_page(self, soup, appendtag):
        tag=soup.find('a', attrs={'class':'page_next'})
        if tag:
            appendtag.find('div', attrs={'class':'article_paginator'}).extract()
        while tag:
            soup2= self.index_to_soup(tag['href'])
            tag=soup2.find('a', attrs={'class':'page_next'})
            if not tag:
                for r in appendtag.findAll('div', attrs={'class':'art_src'}):
                    r.extract()
            pagetext = soup2.find(name='div', attrs={'class':'article_body'})
            for dictionary in self.remove_tags:
                 v=pagetext.findAll(name=dictionary['name'], attrs=dictionary['attrs'])
                 for delete in v:
                     delete.extract()
            pos = len(appendtag.contents)
            appendtag.insert(pos, pagetext)
            if appendtag.find('div', attrs={'class':'article_paginator'}):
                appendtag.find('div', attrs={'class':'article_paginator'}).extract()
    def preprocess_html(self, soup):
         self.append_page(soup, soup.body)
         return soup
--- a/recipes/echo_online.recipe
+++ b/recipes/echo_online.recipe
@ -0,0 +1,46 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
 '''
 Fetch echo-online.de
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Echo_Online(BasicNewsRecipe):
    title          = u'Echo Online' # 2011-12-28 AGe
    description = '-Echo Online-'
    publisher = 'Echo Online GmbH'
    category = 'News, Germany'
    __author__ = 'Armin Geller' # 2011-12-28 AGe
    language = 'de'
    lang = 'de-DE'
    encoding = 'iso-8859-1'
    timefmt = ' [%a, %d %b %Y]'
    oldest_article = 7
    max_articles_per_feed = 50 # 2011-12-28 AGe
    no_stylesheets = True
    auto_cleanup = True
    remove_javascript = True
    feeds = [
              (u'Topnews', u'http://www.echo-online.de/storage/rss/rss/topnews.xml'),
              (u'Darmstadt', u'http://www.echo-online.de/rss/darmstadt.xml'),
              (u'Darmstadt-Dieburg', u'http://www.echo-online.de/rss/darmstadtdieburg.xml'),
              (u'Kreis Gro\xdf-Gerau', u'http://www.echo-online.de/rss/kreisgrossgerau.xml'),
              (u'R\xfcsselsheim', u'http://www.echo-online.de/rss/ruesselsheim.xml'),
              (u'Kreis Bergstra\xdfe', u'http://www.echo-online.de/rss/bergstrasse.xml'),
              (u'Odenwaldkreis', u'http://www.echo-online.de/rss/odenwald.xml'),
              (u'SV 98', u'http://www.echo-online.de/rss/sv98.xml'),
              (u'Kino', u'http://www.echo-online.de/rss/kino.xml'),
              (u'Ausstellungen', u'http://www.echo-online.de/rss/ausstellungen.xml'),
              (u'Ausflug & Reise', u'http://www.echo-online.de/rss/ausflugreise.xml'),
             ]
    def print_version(self, url):
          return self.browser.open_novisit(url).geturl() + '?_FRAME=33&_FORMAT=PRINT'
    remove_tags = [dict(name='div', attrs={'class':["header", "name"]}),]
    auto_cleanup_keep = '//div[@class="bild_gross w270"]'
    cover_url = 'http://adcounter.darmstaedter-echo.de/webdav/files/config/gui/images/Zeitungsfaecher.gif'
--- a/recipes/edge_conversations.recipe
+++ b/recipes/edge_conversations.recipe
@ -0,0 +1,50 @@
 __license__   = 'GPL v3'
 __copyright__ = '2012 Levien van Zon <levien@zonnetjes.net>'
 '''
 Fetch Edge.org conversations
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class EdgeConversationRSS(BasicNewsRecipe):
    title          = u'Edge.org Conversations'
    __author__ = 'levien'
    language = 'en'
    description = '''Edge.org offers "open-minded, free ranging, intellectually
    playful ... an unadorned pleasure in curiosity, a collective expression of
    wonder at the living and inanimate world ... an ongoing and thrilling
    colloquium.'''
    oldest_article = 60
    max_articles_per_feed = 100
    no_stylesheets = True
    keep_only_tags = [dict(name='div', attrs={'class':'HomeLeftPannel IMGCTRL'}) ]
    remove_tags    = [
        dict(name='div',attrs={'class':'Logo'})
        ]
    feeds          = [(u'Edge RSS', u'http://edge.org/feeds/')]
    def print_version(self, url):
        return url.replace('conversation/', 'conversation.php?cid=')
    def parse_feeds(self):
        # Call parent's method.
        feeds = BasicNewsRecipe.parse_feeds(self)
        # Loop through all feeds.
        for feed in feeds:
            # Loop through all articles in feed.
            for article in feed.articles[:]:
            # Remove anything that is not a conversation, and remove PDF files as well...
                if not ('CONVERSATION' in article.title):
                    feed.articles.remove(article)
                elif 'pdf' in article.url:
                    feed.articles.remove(article)
        return feeds
--- a/recipes/el_periodico.recipe
+++ b/recipes/el_periodico.recipe
@ -5,12 +5,11 @@ __license__     = 'GPL v3'
 __copyright__   = '04 December 2010, desUBIKado'
 __author__      = 'desUBIKado'
 __description__ = 'Daily newspaper from Aragon'
-__version__     = 'v0.07'
+__version__     = 'v0.08'
-__date__        = '06, February 2011'
+__date__        = '13, November 2011'
 '''
 elperiodicodearagon.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
@ -20,13 +19,13 @@ class elperiodicodearagon(BasicNewsRecipe):
    description           = u'Noticias desde Aragon'
    publisher             = u'elperiodicodearagon.com'
    category              = u'news, politics, Spain, Aragon'
-    oldest_article        = 2
+    oldest_article        = 1
    delay                 = 0
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    language              = 'es'
-    encoding              = 'utf8'
+    encoding              = 'iso-8859-1'
    remove_empty_feeds    = True
    remove_javascript     = True
@ -39,61 +38,30 @@ class elperiodicodearagon(BasicNewsRecipe):
                         }
    feeds              = [
-                           (u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
+                           (u'Portada', u'http://zetaestaticos.com/aragon/rss/portada_es.xml'),
-                           (u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
+                           (u'Arag\xf3n', u'http://zetaestaticos.com/aragon/rss/2_es.xml'),
-                           (u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
+                           (u'Internacional', u'http://zetaestaticos.com/aragon/rss/4_es.xml'),
-                           (u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
+                           (u'Espa\xf1a', u'http://zetaestaticos.com/aragon/rss/3_es.xml'),
-                           (u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
+                           (u'Econom\xeda', u'http://zetaestaticos.com/aragon/rss/5_es.xml'),
-                           (u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
+                           (u'Deportes', u'http://zetaestaticos.com/aragon/rss/7_es.xml'),
-                           (u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
+                           (u'Real Zaragoza', u'http://zetaestaticos.com/aragon/rss/10_es.xml'),
-                           (u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
+                           (u'CAI Zaragoza', u'http://zetaestaticos.com/aragon/rss/91_es.xml'),
-                           (u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
+                           (u'Monta\xf1ismo', u'http://zetaestaticos.com/aragon/rss/354_es.xml'),
-                           (u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')
+                           (u'Opini\xf3n', u'http://zetaestaticos.com/aragon/rss/103_es.xml'),
                           (u'Tema del d\xeda', u'http://zetaestaticos.com/aragon/rss/102_es.xml'),
                           (u'Escenarios', u'http://zetaestaticos.com/aragon/rss/105_es.xml'),
                           (u'Sociedad', u'http://zetaestaticos.com/aragon/rss/104_es.xml'),
                           (u'Gente', u'http://zetaestaticos.com/aragon/rss/330_es.xml'),
                           (u'Espacio 3', u'http://zetaestaticos.com/aragon/rss/328_es.xml'),
                           (u'Fiestas del Pilar', u'http://zetaestaticos.com/aragon/rss/107_es.xml')
                         ]
    extra_css = '''
                    h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
                    h2 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
                    h4 {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:20px;}
                    .columnaDeRecursosRelacionados {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
                    img{margin-bottom: 0.4em}
 		'''
    remove_attributes = ['height','width']
-    keep_only_tags     = [dict(name='div', attrs={'id':'contenidos'})]
+    keep_only_tags     = [dict(name='div', attrs={'id':'Noticia'})]
    # Quitar toda la morralla
    remove_tags        = [dict(name='ul', attrs={'class':'herramientasDeNoticia'}),
                          dict(name='span', attrs={'class':'MasInformacion '}),
                          dict(name='span', attrs={'class':'MasInformacion'}),
                          dict(name='div', attrs={'class':'Middle'}),
                          dict(name='div', attrs={'class':'MenuCabeceraRZaragoza'}),
                          dict(name='div', attrs={'id':'MenuCabeceraRZaragoza'}),
                          dict(name='div', attrs={'class':'MenuEquipo'}),
                          dict(name='div', attrs={'class':'TemasRelacionados'}),
                          dict(name='div', attrs={'class':'GaleriaEnNoticia'}),
                          dict(name='div', attrs={'class':'Recorte'}),
                          dict(name='div', attrs={'id':'NoticiasenRecursos'}),
                          dict(name='div', attrs={'id':'NoticiaEnPapel'}),
                          dict(name='p', attrs={'class':'RecorteEnNoticias'}),
                          dict(name='div', attrs={'id':'Comparte'}),
                          dict(name='div', attrs={'id':'CajaComparte'}),
                          dict(name='a', attrs={'class':'EscribirComentario'}),
                          dict(name='a', attrs={'class':'AvisoComentario'}),
                          dict(name='div', attrs={'class':'CajaAvisoComentario'}),
                          dict(name='div', attrs={'class':'navegaNoticias'}),
                          dict(name='div', attrs={'class':'Mensaje'}),
                          dict(name='div', attrs={'id':'PaginadorDiCom'}),
                          dict(name='div', attrs={'id':'CajaAccesoCuentaUsuario'}),
                          dict(name='div', attrs={'id':'CintilloComentario'}),
                          dict(name='div', attrs={'id':'EscribeComentario'}),
                          dict(name='div', attrs={'id':'FormularioComentario'}),
                          dict(name='div', attrs={'id':'FormularioNormas'})]
    # Recuperamos la portada de papel (la imagen format=1 tiene mayor resolucion)
    def get_cover_url(self):
@ -104,23 +72,7 @@ class elperiodicodearagon(BasicNewsRecipe):
              return image['src'].rstrip('format=2') + 'format=1'
        return None
-    # Para quitar espacios entre la noticia y los comentarios (lineas 1 y 2)
+    # Usamos la versión para móviles
    # El indice no apuntaba correctamente al empiece de la noticia (linea 3)
-    preprocess_regexps = [
+    def print_version(self, url):
-        (re.compile(r'<p>&nbsp;</p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
+          return url.replace('http://www.elperiodicodearagon.com/', 'http://www.elperiodicodearagon.com/m/')
        (re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'<p id="">', re.DOTALL|re.IGNORECASE), lambda match: '<p>')
        ]
    # Para sustituir el video incrustado de YouTube por una imagen
    def preprocess_html(self, soup):
        for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
            if video_yt:
               video_yt.name = 'img'
               fuente = video_yt['src']
               fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
               video_yt['src'] = fuente2 + '/0.jpg'
        return soup
--- a/recipes/elet_es_irodalom.recipe
+++ b/recipes/elet_es_irodalom.recipe
@ -0,0 +1,58 @@
 ################################################################################
 #Description:     http://es.hu/ RSS channel
 #Author:      Bigpapa (bigpapabig@hotmail.com)
 #Date:    2012.01.20. - V1.2
 ################################################################################
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class elet_es_irodalom(BasicNewsRecipe):
    title                  = u'\u00c9let \u00e9s Irodalom'
    __author__             = 'Bigpapa'
    oldest_article         = 7
    max_articles_per_feed  = 30 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
    no_stylesheets         = True
    #delay                  = 1
    use_embedded_content   = False
    encoding               = 'iso-8859-2'
    category               = 'Cikkek'
    language               = 'hu'
    publication_type       = 'newsportal'
    extra_css              = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} '
    needs_subscription = 'optional'
    masthead_url = 'http://www.es.hu/images/logo.jpg'
    timefmt = ' [%Y %b %d, %a]'
 #Nem ide a kódba kell beleírni a hozzáférés adatait, hanem azt akkor adod meg, ha le akarod tölteni!
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open('http://www.es.hu/')
            br.select_form(name='userfrmlogin')
            br['cusername'] = self.username
            br['cpassword'] = self.password
            br.submit()
        return br
    keep_only_tags    = [
                       dict(name='div', attrs={'class':['doc_author', 'doc_title', 'doc']})
    ]
    remove_tags = [
     dict(name='a', attrs={'target':['_TOP']}),
    dict(name='div', attrs={'style':['float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}),
    ]
    feeds          = [
    (u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'),
    (u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'),
    (u'Visszhang', 'http://www.feed43.com/3727375706873086.xml'),
    (u'P\xe1ratlan oldal', 'http://www.feed43.com/2525784782475057.xml'),
    (u'Feuilleton', 'http://www.feed43.com/7216025082703073.xml'),
    (u'Pr\xf3za', 'http://www.feed43.com/8760248802326384.xml'),
    (u'Vers', 'http://www.feed43.com/1737324675134275.xml'),
    (u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'),
    (u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml')
    ]
--- a/recipes/elmundo.recipe
+++ b/recipes/elmundo.recipe
@ -4,7 +4,8 @@ __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 elmundo.es
 '''
-
+import re
 import time
 from calibre.web.feeds.news import BasicNewsRecipe
 class ElMundo(BasicNewsRecipe):
@ -18,12 +19,15 @@ class ElMundo(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'iso8859_15'
    remove_javascript     = True
    remove_empty_feeds    = True
    language              = 'es'
    masthead_url          = 'http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png'
    publication_type      = 'newspaper'
    extra_css             = """
                               body{font-family: Arial,Helvetica,sans-serif}
                               .metadata_noticia{font-size: small}
                               .pestana_GDP{font-size: small; font-weight:bold}
                               h1,h2,h3,h4,h5,h6,.subtitulo {color: #3F5974}
                               .hora{color: red}
                               .update{color: gray}
@ -41,8 +45,11 @@ class ElMundo(BasicNewsRecipe):
    remove_tags_after  = dict(name='div' , attrs={'id':['desarrollo_noticia','tamano']})
    remove_attributes  = ['lang','border']
    remove_tags = [
-                     dict(name='div', attrs={'class':['herramientas','publicidad_google']})
+                     dict(name='div', attrs={'class':['herramientas','publicidad_google','comenta','col col-2b','apoyos','no-te-pierdas']})
-                    ,dict(name='div', attrs={'id':'modulo_multimedia' })
+                    ,dict(name='div', attrs={'class':['publicidad publicidad_cuerpo_noticia','comentarios_nav','mensaje_privado','interact']})
                    ,dict(name='div', attrs={'class':['num_comentarios estirar']})
                    ,dict(name='span', attrs={'class':['links_comentar']})
                    ,dict(name='div', attrs={'id':['comentar']})
                    ,dict(name='ul', attrs={'class':'herramientas' })
                    ,dict(name=['object','link','embed','iframe','base','meta'])
                  ]
@ -50,13 +57,31 @@ class ElMundo(BasicNewsRecipe):
    feeds = [
              (u'Portada'         , u'http://estaticos.elmundo.es/elmundo/rss/portada.xml'       )                                      
             ,(u'Deportes'        , u'http://estaticos.elmundo.es/elmundodeporte/rss/portada.xml')
-             ,(u'Economia'        , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml'      )
+             ,(u'Econom\xeda'     , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml'      )
-             ,(u'Espana'          , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml'        )
+             ,(u'Espa\xf1a'       , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml'        )
             ,(u'Internacional'   , u'http://estaticos.elmundo.es/elmundo/rss/internacional.xml' )
             ,(u'Cultura'         , u'http://estaticos.elmundo.es/elmundo/rss/cultura.xml'       )
-             ,(u'Ciencia/Ecologia', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml'       )
+             ,(u'Ciencia/Ecolog\xeda', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml'    )
-             ,(u'Comunicacion'    , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml'  )
+             ,(u'Comunicaci\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml'  )
-             ,(u'Television'      , u'http://estaticos.elmundo.es/elmundo/rss/television.xml'    )
+             ,(u'Televisi\xf3n'   , u'http://estaticos.elmundo.es/elmundo/rss/television.xml'    )
             ,(u'Salud'           , u'http://estaticos.elmundo.es/elmundosalud/rss/portada.xml'  )
             ,(u'Solidaridad'     , u'http://estaticos.elmundo.es/elmundo/rss/solidaridad.xml'   )
             ,(u'Su vivienda'     , u'http://estaticos.elmundo.es/elmundo/rss/suvivienda.xml'    )             
             ,(u'Motor'           , u'http://estaticos.elmundo.es/elmundomotor/rss/portada.xml'  )             
             ,(u'Madrid'          , u'http://estaticos.elmundo.es/elmundo/rss/madrid.xml'        )
             ,(u'Barcelona'       , u'http://estaticos.elmundo.es/elmundo/rss/barcelona.xml'     )
             ,(u'Pa\xeds Vasco'   , u'http://estaticos.elmundo.es/elmundo/rss/paisvasco.xml'     )	     
             ,(u'Baleares'        , u'http://estaticos.elmundo.es/elmundo/rss/baleares.xml'      )
 	     ,(u'Castilla y Le\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/castillayleon.xml' )	     
 	     ,(u'Valladolid'      , u'http://estaticos.elmundo.es/elmundo/rss/valladolid.xml'    )
 	     ,(u'Valencia'        , u'http://estaticos.elmundo.es/elmundo/rss/valencia.xml'      )
 	     ,(u'Alicante'        , u'http://estaticos.elmundo.es/elmundo/rss/alicante.xml'      )
 	     ,(u'Castell\xf3n'    , u'http://estaticos.elmundo.es/elmundo/rss/castellon.xml'     )	
 	     ,(u'Andaluc\xeda'    , u'http://estaticos.elmundo.es/elmundo/rss/andalucia.xml'     )
 	     ,(u'Sevilla'         , u'http://estaticos.elmundo.es/elmundo/rss/andalucia_sevilla.xml'  )
 	     ,(u'M\xe1laga'       , u'http://estaticos.elmundo.es/elmundo/rss/andalucia_malaga.xml'   )
            ]
    def preprocess_html(self, soup):
@ -67,3 +92,34 @@ class ElMundo(BasicNewsRecipe):
    def get_article_url(self, article):
        return article.get('guid',  None)
    preprocess_regexps = [     
                           # Para presentar la imagen de los videos incrustados                           
                           (re.compile(r'var imagen', re.DOTALL|re.IGNORECASE), lambda match: '--></script><img src'),
                           (re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
                           (re.compile(r'var video=', re.DOTALL|re.IGNORECASE), lambda match: '<script language="Javascript" type="text/javascript"><!--'),
                           # Para que no salga la numeración de comentarios: 1, 2, 3 ...
                           (re.compile(r'<ol>\n<li style="z-index:', re.DOTALL|re.IGNORECASE), lambda match: '<ul><li style="z-index:'),
                           (re.compile(r'</ol>\n<div class="num_comentarios estirar">', re.DOTALL|re.IGNORECASE), lambda match: '</ul><div class="num_comentarios estirar">'),
                         ]
    # Obtener la imagen de portada
    def get_cover_url(self):
       cover = None
       st = time.localtime()
       year = str(st.tm_year)
       month = "%.2d" % st.tm_mon
       day = "%.2d" % st.tm_mday
 		#http://img.kiosko.net/2011/11/19/es/elmundo.750.jpg
       cover='http://img.kiosko.net/'+ year + '/' +  month + '/' + day +'/es/elmundo.750.jpg'
       br = BasicNewsRecipe.get_browser()
       try:
           br.open(cover)
       except:
           self.log("\nPortada no disponible")
           cover ='http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png'
       return cover 
--- a/recipes/emuzica_pl.recipe
+++ b/recipes/emuzica_pl.recipe
@ -0,0 +1,16 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class eMuzyka(BasicNewsRecipe):
    title          = u'eMuzyka'
    __author__        = 'fenuks'
    description   = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
    category       = 'music'
    language       = 'pl'
    cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 100
    keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
    remove_tags=[dict(name='span', attrs={'id':'date'})]
    feeds          = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
--- a/recipes/espn.recipe
+++ b/recipes/espn.recipe
@ -20,7 +20,7 @@ class ESPN(BasicNewsRecipe):
    use_embedded_content = False
    remove_javascript     = True
-    needs_subscription = True
+    needs_subscription = 'optional'
    encoding= 'ISO-8859-1'
    remove_tags_before = dict(name='font', attrs={'class':'date'})
@ -75,10 +75,9 @@ class ESPN(BasicNewsRecipe):
        return soup
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username and self.password:
            br.set_handle_refresh(False)
            url = ('https://r.espn.go.com/members/v3_1/login')
            raw = br.open(url).read()
@ -100,7 +99,6 @@ class ESPN(BasicNewsRecipe):
        return article.get('guid',  None)
    def print_version(self, url):
        if 'eticket' in url:
            return url.partition('&')[0].replace('story?', 'print?')
        match = re.search(r'story\?(id=\d+)', url)
--- a/recipes/expansion_spanish.recipe
+++ b/recipes/expansion_spanish.recipe
@ -1,35 +1,43 @@
 #!/usr/bin/env  python
 __license__     = 'GPL v3'
-__author__    = 'Gerardo Diez'
+__copyright__   = '5, January 2011 Gerardo Diez<gerardo.diez.garcia@gmail.com> & desUBIKado'
-__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
+__author__      = 'desUBIKado, based on an earlier version by Gerardo Diez'
-description   = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
+__version__     = 'v1.01'
-__docformat__ = 'restructuredtext en'
+__date__        = '13, November 2011'
 '''
-expansion.es
+[url]http://www.expansion.com/[/url]
 '''
 import time
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
-class Publico(BasicNewsRecipe):
+
-    title               =u'Expansion.com'
+class expansion_spanish(BasicNewsRecipe):
-    __author__      ='Gerardo Diez'
+    __author__      ='Gerardo Diez & desUBIKado'
-    publisher       =u'Unidad Editorial Información Económica, S.L.'
+    description     ='Financial news from Spain'
-    category                ='finances, catalunya'
+    title           =u'Expansion'
-    oldest_article      =1
+    publisher       =u'Unidad Editorial Internet, S.L.'
    category        ='news, finances, Spain'
    oldest_article  = 2
    simultaneous_downloads = 10
    max_articles_per_feed   =100
-    simultaneous_downloads  =10
+    timefmt         = '[%a, %d %b, %Y]'
-    cover_url       =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
+    encoding        ='iso-8859-15'
    timefmt         ='[%A, %d %B, %Y]'
    encoding        ='latin'
    language        ='es'
-    remove_javascript   =True
+    use_embedded_content  = False
-    no_stylesheets      =True
+    remove_javascript     = True
    no_stylesheets        = True
    remove_empty_feeds    = True
    keep_only_tags      =dict(name='div', attrs={'class':['noticia primer_elemento']})
    remove_tags         =[
-                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
+                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto','tit_relacionadas','interact','paginacion estirar','sumario derecha']}),
-                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
+                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia','not_logged']}),
                dict(name='span', attrs={'class':['comentarios']}),
                dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
-                dict(name='div', attrs={'id':['comentarios_lectores_listado']})
+                dict(name='div', attrs={'id':['comentarios_lectores_listado','comentar']})
                            ]
    feeds               =[
                (u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
@ -38,42 +46,112 @@ class Publico(BasicNewsRecipe):
                (u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
                (u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
                (u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),
                (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
                (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
-                (u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
+                (u'Cr\xe9ditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
                (u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
-                (u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
+                (u'Fondos de Inversi\xf3n', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
                (u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),
                (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
                (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
                (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
-                (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
+                (u'Energ\xeda', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
-                (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
+                (u'Inmobiliario y Construcci\xf3n', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
                (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
-                (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
+                (u'Automoci\xf3n e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
-                (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
+                (u'Distribuci\xf3n', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
-                (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
+                (u'Deporte y Negocio', u' [url]http://estaticos.expansion.com/rss/empresasdeporte.xml[/url]'),
                (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
                (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
                (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),
-
+                (u'Portada: Econom\xeda y Pol\xedtica', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
-                (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
+                (u'Pol\xedtica', u'http://estaticos.expansion.com/rss/economia.xml'),
                (u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
                (u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),
-
+                (u'Portada: Opini\xf3n', u'http://estaticos.expansion.com/rss/opinion.xml'),
                (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
                (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
                (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),
-
+                (u'Portada: Jur\xeddico', u'http://estaticos.expansion.com/rss/juridico.xml'),
                (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
                (u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
-                (u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
+                (u'Opini\xf3n', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
                (u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),
                (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
-                (u'Catalu&ntilde;a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
+                (u'Catalu\xf1a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
-                (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
+                (u'Funci\xf3n p\xfablica', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
                ]
    # Obtener la imagen de portada
    def get_cover_url(self):
       cover = None
       st = time.localtime()
       year = str(st.tm_year)
       month = "%.2d" % st.tm_mon
       day = "%.2d" % st.tm_mday
 		#[url]http://img5.kiosko.net/2011/11/14/es/expansion.750.jpg[/url]
       cover='http://img5.kiosko.net/'+ year + '/' +  month + '/' + day +'/es/expansion.750.jpg'
       br = BasicNewsRecipe.get_browser()
       try:
           br.open(cover)
       except:
           self.log("\nPortada no disponible")
           cover ='http://www.aproahp.org/enlaces/images/diario_expansion.gif'
       return cover
    # Para que no salte la publicidad al recuperar la noticia, y que siempre se recupere
    # la página web, mando la variable "t" con la hora "linux" o "epoch" actual
    # haciendole creer al sitio web que justo se acaba de ver la publicidad
    def print_version(self, url):
           st = time.time()
           segundos = str(int(st))
           parametros = '.html?t=' + segundos
           return url.replace('.html', parametros)
    _processed_links = []
    def get_article_url(self, article):
       # Para obtener la url original del artículo a partir de la de "feedsportal"
       link = article.get('link', None)
       if link is None:
           return article
       if link.split('/')[-1]=="story01.htm":
           link=link.split('/')[-2]
           a=['0B','0C','0D','0E','0F','0G','0N'  ,'0L0S','0A']
           b=['.' ,'/' ,'?' ,'-' ,'=' ,'&' ,'.com','www.','0']
           for i in range(0,len(a)):
              link=link.replace(a[i],b[i])
           link="http://"+link
       # Eliminar artículos duplicados en otros feeds
       if not (link in self._processed_links):
            self._processed_links.append(link)
       else:
            link = None
       return link
    # Un poco de css para mejorar la presentación de las noticias
    extra_css = '''
                    .entradilla {font-family:Arial,Helvetica,sans-serif; font-weight:bold; font-style:italic; font-size:16px;}
                    .fecha_publicacion,.autor {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
                '''
    # Para presentar la imagen de los videos incrustados
    preprocess_regexps = [
                           (re.compile(r'var imagen', re.DOTALL|re.IGNORECASE), lambda match: '--></script><img src'),
                           (re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
                           (re.compile(r'var id_reproductor', re.DOTALL|re.IGNORECASE), lambda match: '<script language="Javascript" type="text/javascript"><!--'),
                         ]
--- a/recipes/fhm_uk.recipe
+++ b/recipes/fhm_uk.recipe
@ -0,0 +1,30 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    title          = u'FHM UK'
    description = 'Good News for Men'
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
    masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
    __author__ = 'Dave Asbury'
    # last updated 27/12/11
    language = 'en_GB'
    oldest_article = 28
    max_articles_per_feed = 12
    remove_empty_feeds = True
    no_stylesheets = True
    #auto_cleanup = True
    #articles_are_obfuscated = True
    keep_only_tags = [
               dict(name='h1'),
               dict(name='img',attrs={'id' : 'ctl00_Body_imgMainImage'}),
               dict(name='div',attrs={'id' : ['articleLeft']}),
                               dict(name='div',attrs={'class' : ['imagesCenterArticle','containerCenterArticle','articleBody']}),
        ]
    feeds          = [
    (u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
    (u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
    (u'Gaming',u'http://feed43.com/0755006465351035.xml'),
            ]
--- a/recipes/fisco_oggi.recipe
+++ b/recipes/fisco_oggi.recipe
@ -0,0 +1,18 @@
 __license__   = 'GPL v3'
 __author__    = 'faber1971'
 description   = 'Website of Italian Governament Income Agency (about revenue, taxation, taxes)- v1.00 (17, December 2011)'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1324112023(BasicNewsRecipe):
    title          = u'Fisco Oggi'
    language = 'it'
    __author__ = 'faber1971'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    remove_javascript = True
    no_stylesheets = True
    feeds          = [(u'Attualit\xe0', u'http://www.fiscooggi.it/taxonomy/term/1/feed'), (u'Normativa', u'http://www.fiscooggi.it/taxonomy/term/5/feed'), (u'Giurisprudenza', u'http://www.fiscooggi.it/taxonomy/term/8/feed'), (u'Dati e statistiche', u'http://www.fiscooggi.it/taxonomy/term/12/feed'), (u'Analisi e commenti', u'http://www.fiscooggi.it/taxonomy/term/13/feed'), (u'Bilancio e contabilit\xe0', u'http://www.fiscooggi.it/taxonomy/term/576/feed'), (u'Dalle regioni', u'http://www.fiscooggi.it/taxonomy/term/16/feed'), (u'Dal mondo', u'http://www.fiscooggi.it/taxonomy/term/17/feed')]
--- a/recipes/focus_pl.recipe
+++ b/recipes/focus_pl.recipe
@ -1,57 +1,68 @@
-# -*- coding: utf-8 -*-
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
-class Focus_pl(BasicNewsRecipe):
+class FocusRecipe(BasicNewsRecipe):
-    title          = u'Focus.pl'
+    __license__ = 'GPL v3'
-    oldest_article = 15
+    __author__ = u'intromatyk <intromatyk@gmail.com>'
    max_articles_per_feed = 100
    __author__        = 'fenuks'
    language = 'pl'
-    description ='polish scientific monthly magazine'
+    version = 1
    title = u'Focus'
    publisher = u'Gruner + Jahr Polska'
    category = u'News'
    description = u'Newspaper'
    category='magazine'
    cover_url=''
    remove_empty_feeds= True
    no_stylesheets=True
-    remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'})
+    oldest_article = 7
-    remove_tags_after=dict(name='div', attrs={'class':'clear'})
+    max_articles_per_feed = 100000
-    feeds          = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'),
+    recursions = 0
-	(u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
+
-	(u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
+    no_stylesheets = True
-	(u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
+    remove_javascript = True
-	(u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
+    encoding = 'utf-8'
-	(u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
+    # Seems to work best, but YMMV
-	(u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
+    simultaneous_downloads = 5
-	(u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
+
-	(u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'),
+    r = re.compile('.*(?P<url>http:\/\/(www.focus.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
    keep_only_tags =[]
    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'cll'}))
    remove_tags =[]
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulm noprint'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'txb'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'h2'}))
    remove_tags.append(dict(name = 'ul', attrs = {'class' : 'txu'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulc'}))
    extra_css = '''
                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
                    h1{text-align: left;}
                    h2{font-size: medium; font-weight: bold;}
                    p.lead {font-weight: bold; text-align: left;}
                    .authordate {font-size: small; color: #696969;}
                    .fot{font-size: x-small; color: #666666;}
                    '''    
-
+    feeds          = [
-]
+                            ('Nauka', 'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
                            ('Historia', 'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
                            ('Cywilizacja', 'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
                            ('Sport', 'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
                            ('Technika', 'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
                            ('Przyroda', 'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
                            ('Technologie', 'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),                            
                          ]
    def skip_ad_pages(self, soup):
-          tag=soup.find(name='a')
+        if ('advertisement' in soup.find('title').string.lower()):
-          if tag:
+            href = soup.find('a').get('href')
-            new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True)
+            return self.index_to_soup(href, raw=True)
-            return new_soup
+        else:
-
+            return None
    def append_page(self, appendtag):
        tag=appendtag.find(name='div', attrs={'class':'arrows'})
        if tag:
            nexturl='http://www.focus.pl/'+tag.a['href']
            for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
                rem.extract()
            while nexturl:
                 soup2=self.index_to_soup(nexturl)
                 nexturl=None
                 pagetext=soup2.find(name='div', attrs={'class':'txt'})
                 tag=pagetext.find(name='div', attrs={'class':'arrows'})
                 for r in tag.findAll(name='a'):
                     if u'Następne' in r.string:
                         nexturl='http://www.focus.pl/'+r['href']
                 for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
                     rem.extract()
                 pos = len(appendtag.contents)
                 appendtag.insert(pos, pagetext)
    def get_cover_url(self):
        soup=self.index_to_soup('http://www.focus.pl/magazyn/')
@ -60,7 +71,14 @@ class Focus_pl(BasicNewsRecipe):
            self.cover_url='http://www.focus.pl/' + tag.a['href']
            return getattr(self, 'cover_url', self.cover_url)
-
+    def print_version(self, url):
-    def preprocess_html(self, soup):
+     if url.count ('focus.pl.feedsportal.com'):
-         self.append_page(soup.body)
+            u = url.find('focus0Bpl')
-         return soup
+            u = 'http://www.focus.pl/' + url[u + 11:]
            u = u.replace('0C', '/')
            u = u.replace('A', '')
            u = u.replace ('0E','-')
            u = u.replace('/nc/1//story01.htm', '/do-druku/1')
     else:
            u = url.replace('/nc/1','/do-druku/1')           
     return u
--- a/recipes/folhadesaopaulo_sub.recipe
+++ b/recipes/folhadesaopaulo_sub.recipe
@ -8,31 +8,35 @@ class FSP(BasicNewsRecipe):
    __author__ = 'fluzao'
    description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
                  u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
-    INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
+
    #found this to be the easiest place to find the index page (13-Nov-2011).
    #  searching for the "Indice Geral" link
    HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
    masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
    language = 'pt'
    no_stylesheets = True
    max_articles_per_feed  = 40
    remove_javascript     = True
    needs_subscription = True
-    remove_tags_before = dict(name='b')
+
    remove_tags_before = dict(name='p')
    remove_tags  = [dict(name='td', attrs={'align':'center'})]
    remove_attributes = ['height','width']
    masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
    # fixes the problem with the section names
    section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \
                    'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \
                    'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \
-                    'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio'}
+                    'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio', \
                    'imoveis' : u'im\xf3veis', 'negocios' : u'neg\xf3cios', \
                    'veiculos' : u've\xedculos', 'corrida' : 'folha corrida'}
    # this solves the problem with truncated content in Kindle
    conversion_options = {'linearize_tables' : True}
    # this bit removes the footer where there are links for Proximo Texto, Texto Anterior,
    #    Indice e Comunicar Erros
-    preprocess_regexps = [(re.compile(r'<BR><BR>Texto Anterior:.*<!--/NOTICIA-->',
+    preprocess_regexps = [(re.compile(r'<!--/NOTICIA-->.*Comunicar Erros</a>',
                                      re.DOTALL|re.IGNORECASE), lambda match: r''),
                          (re.compile(r'<BR><BR>Pr&oacute;ximo Texto:.*<!--/NOTICIA-->',
                                      re.DOTALL|re.IGNORECASE), lambda match: r'')]
    def get_browser(self):
@ -49,7 +53,25 @@ class FSP(BasicNewsRecipe):
    def parse_index(self):
-        soup = self.index_to_soup(self.INDEX)
+        #Searching for the index page on the HOMEPAGE
        hpsoup = self.index_to_soup(self.HOMEPAGE)
        indexref = hpsoup.find('a', href=re.compile('^indices.*'))
        self.log('--> tag containing the today s index: ', indexref)
        INDEX = indexref['href']
        INDEX = 'http://www1.folha.uol.com.br/fsp/'+INDEX
        self.log('--> INDEX after extracting href and adding prefix: ', INDEX)
        # ... and taking the opportunity to get the cover image link
        coverurl = hpsoup.find('a', href=re.compile('^cp.*'))['href']
        if coverurl:
            self.log('--> tag containing the today s cover: ', coverurl)
            coverurl = coverurl.replace('htm', 'jpg')
            coverurl = 'http://www1.folha.uol.com.br/fsp/images/'+coverurl
            self.log('--> coverurl after extracting href and adding prefix: ', coverurl)
            self.cover_url = coverurl
        #soup = self.index_to_soup(self.INDEX)
        soup = self.index_to_soup(INDEX)
        feeds = []
        articles = []
        section_title = "Preambulo"
@ -68,8 +90,12 @@ class FSP(BasicNewsRecipe):
                self.log('--> new section title:   ', section_title)
            if strpost.startswith('<a href'):
                url = post['href']
                #this bit is kept if they ever go back to the old format (pre Nov-2011)
                if url.startswith('/fsp'):
                    url = 'http://www1.folha.uol.com.br'+url
                #
                if url.startswith('http://www1.folha.uol.com.br/fsp'):
                    #url = 'http://www1.folha.uol.com.br'+url
                    title = self.tag_to_string(post)
                    self.log()
                    self.log('--> post:  ', post)
@ -82,15 +108,11 @@ class FSP(BasicNewsRecipe):
        # keeping the front page url
        minha_capa = feeds[0][1][1]['url']
-        # removing the 'Preambulo' section
+        # removing the first section (now called 'top')
        del feeds[0]
        # creating the url for the cover image
        coverurl = feeds[0][1][0]['url']
        coverurl = coverurl.replace('/opiniao/fz', '/images/cp')
        coverurl = coverurl.replace('01.htm', '.jpg')
        self.cover_url = coverurl
        # inserting the cover page as the first article (nicer for kindle users)
        feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}]))
        return feeds
--- a/recipes/formulaas.recipe
+++ b/recipes/formulaas.recipe
@ -0,0 +1,50 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 formula-as.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class FormulaAS(BasicNewsRecipe):
    title                 = u'Formula AS'
    __author__            = u'Silviu Cotoar\u0103'
    publisher             = u'Formula AS'
    description           = u'Formula AS'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Romania'
    encoding              = 'utf-8'
    cover_url        	  = 'http://www.formula-as.ro/_client/img/header_logo.png'
    conversion_options = {
                'comments'    : description
                ,'tags'       : category
                ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
                      dict(name='div', attrs={'class':'item padded'})					 
                     ]
    remove_tags = [
 					dict(name='ul', attrs={'class':'subtitle lower'})
                  ]
    remove_tags_after = [
 			         dict(name='ul', attrs={'class':'subtitle lower'}),
 					 dict(name='div', attrs={'class':'item-brief-options'})					 
 	               ]
    feeds  = [
        (u'\u0218tiri', u'http://www.formula-as.ro/rss/articole.xml')
         ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/frazpc.recipe
+++ b/recipes/frazpc.recipe
@ -18,7 +18,7 @@ class FrazPC(BasicNewsRecipe):
    max_articles_per_feed = 100
    use_embedded_content = False
    no_stylesheets = True
-
+    cover_url='http://www.frazpc.pl/images/logo.png'
    feeds          = [
        (u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'), 
        (u'Artyku\u0142y', u'http://www.frazpc.pl/feed/artykuly')
@ -33,6 +33,7 @@ class FrazPC(BasicNewsRecipe):
        dict(name='div', attrs={'class':'comments_box'})
    ]
    remove_tags_after=dict(name='div', attrs={'class':'content'})
    preprocess_regexps = [(re.compile(r'\| <a href="#comments">Komentarze \([0-9]*\)</a>'), lambda match: '')]
    remove_attributes = [ 'width', 'height' ]
--- a/recipes/gazeta_pl_szczecin.recipe
+++ b/recipes/gazeta_pl_szczecin.recipe
@ -0,0 +1,35 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 import re
 import string
 from calibre.web.feeds.news import BasicNewsRecipe
 class GazetaPlSzczecin(BasicNewsRecipe):
    title          = u'Gazeta.pl Szczecin'
    description    = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
    __author__     = u'Michał Szkutnik'
    __license__    = u'GPL v3'
    language       = 'pl'
    publisher      = 'Agora S.A.'
    category       = 'news, szczecin'
    oldest_article = 2
    max_articles_per_feed = 100
    auto_cleanup   = True
    remove_tags    = [ { "name" : "a", "attrs" : { "href" : "http://szczecin.gazeta.pl/szczecin/www.gazeta.pl" }}]
    cover_url      = "http://bi.gazeta.pl/i/hp/hp2009/logo.gif"
    feeds          = [(u'Wszystkie', u'http://rss.feedsportal.com/c/32739/f/530434/index.rss')]
    def get_article_url(self, article):
        s = re.search("""/0L(szczecin.*)/story01.htm""", article.link)
        s = s.group(1)
        replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I"  : "_"}
        for (a, b) in replacements.iteritems():
            s = string.replace(s, a, b)
        s = string.replace(s, "0A", "0")
        return "http://"+s
    def print_version(self, url):
        s = re.search("""/(\d*),(\d*),(\d*),.*\.html""", url)
        no1 = s.group(2)
        no2 = s.group(3)
        return """http://szczecin.gazeta.pl/szczecin/2029020,%s,%s.html""" % (no1, no2)
--- a/recipes/givemesomethingtoread.recipe
+++ b/recipes/givemesomethingtoread.recipe
@ -0,0 +1,90 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class GiveMeSomethingToRead(BasicNewsRecipe):
    title          = u'Give Me Something To Read'
    description    = 'Curation / aggregation of articles on diverse topics'
    language = 'en'
    __author__     = 'barty on mobileread.com forum'
    max_articles_per_feed = 100
    no_stylesheets = False
    timefmt        = ' [%a, %d %b, %Y]'
    oldest_article = 365
    auto_cleanup   = True
    INDEX          = 'http://givemesomethingtoread.com'
    CATEGORIES     = [
        # comment out categories you don't want
        # (user friendly name, system name, max number of articles to load)
        ('The Arts','arts',25),
        ('Science','science',30),
        ('Technology','technology',30),
        ('Politics','politics',20),
        ('Media','media',30),
        ('Crime','crime',15),
        ('Other articles','',10)
        ]
    def parse_index(self):
        self.cover_url = 'http://thegretchenshow.files.wordpress.com/2009/12/well-read-cat-small.jpg'
        feeds = []
        seen_urls = set([])
        regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
        for category in self.CATEGORIES:
            (cat_name, tag, max_articles) = category
            tagurl = '' if tag=='' else '/tagged/'+tag
            self.log('Reading category:', cat_name)
            articles = []
            pageno = 1
            while len(articles) < max_articles and pageno < 100:
                page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
                pageno += 1
                self.log('\tReading page:', page)
                try:
                    soup = self.index_to_soup(page)
                except:
                    break
                headers = soup.findAll('h2')
                if len(headers) == .0:
                    break
                for header in headers:
                    atag = header.find('a')
                    url = atag['href']
                    # skip promotionals and duplicate
                    if url.startswith('http://givemesomethingtoread') or url.startswith('/') or url in seen_urls:
                        continue
                    seen_urls.add(url)
                    title = self.tag_to_string(header)
                    self.log('\tFound article:', title)
                    #self.log('\t', url)
                    desc = header.parent.find('blockquote')
                    desc = self.tag_to_string(desc) if desc else ''
                    m = regex.match( url)
                    if m:
                        desc = "[%s] %s" %  (m.group(2), desc)
                    #self.log('\t', desc)
                    date = ''
                    p = header.parent.previousSibling
                    # navigate up to find h3, which contains the date
                    while p:
                        if hasattr(p,'name') and p.name == 'h3':
                            date = self.tag_to_string(p)
                            break
                        p = p.previousSibling
                    articles.append({'title':title,'url':url,'description':desc,'date':date})
                    if len(articles) >= max_articles:
                        break
            if articles:
                feeds.append((cat_name, articles))
        return feeds
--- a/recipes/glasgow_herald.recipe
+++ b/recipes/glasgow_herald.recipe
@ -1,4 +1,3 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class GlasgowHerald(BasicNewsRecipe):
@ -9,12 +8,16 @@ class GlasgowHerald(BasicNewsRecipe):
    language = 'en_GB'
    __author__     = 'Kovid Goyal'
    use_embedded_content = False
-    keep_only_tags = [dict(attrs={'class':'article'})]
+    no_stylesheets = True
-    remove_tags = [
+    auto_cleanup = True
-            dict(id=['pic-nav']),
+
-            dict(attrs={'class':['comments-top']})
+    #keep_only_tags = [dict(attrs={'class':'article'})]
-            ]
+    #remove_tags = [
            #dict(id=['pic-nav']),
            #dict(attrs={'class':['comments-top']})
            #]
    feeds          = [
@ -26,4 +29,3 @@ class GlasgowHerald(BasicNewsRecipe):
                        u'http://www.heraldscotland.com/cmlink/1.768',),
                        (u'Columnists', u'http://www.heraldscotland.com/cmlink/1.658574')]
--- a/recipes/globe_and_mail.recipe
+++ b/recipes/globe_and_mail.recipe
@ -51,6 +51,13 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
            {'class':['articleTools', 'pagination', 'Ads', 'topad',
                'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]
    def populate_article_metadata(self, article, soup, first):
        if first and hasattr(self, 'add_toc_thumbnail'):
            picdiv = soup.find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,picdiv['src'])
    #Use the mobile version rather than the web version
    def print_version(self, url):
        return url.rpartition('?')[0] + '?service=mobile'
--- a/recipes/goal.recipe
+++ b/recipes/goal.recipe
@ -0,0 +1,13 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1325677767(BasicNewsRecipe):
    title          = u'Goal'
    oldest_article = 1
    language = 'it'
    max_articles_per_feed = 100
    auto_cleanup = True
    remove_tags_after = [dict(id='article_content')]
    feeds          = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')]
    __author__      = 'faber1971'
    description    = 'Sports news from Italy'
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@ -12,7 +12,6 @@ class GN(BasicNewsRecipe):
        EDITION = 0
        __author__ = 'Piotr Kontek'
        title = u'Gość niedzielny'
        description = 'Weekly magazine'
        encoding = 'utf-8'
        no_stylesheets = True
@ -20,6 +19,8 @@ class GN(BasicNewsRecipe):
        remove_javascript = True
        temp_files = []
        simultaneous_downloads = 1
        masthead_url = 'http://gosc.pl/files/11/03/12/949089_top.gif'
        title = u'Gość niedzielny'
        articles_are_obfuscated = True
@ -64,7 +65,6 @@ class GN(BasicNewsRecipe):
                    if img != None:
                        a = img.parent
                        self.EDITION = a['href']
                        self.title = img['alt']
                        self.cover_url = 'http://www.gosc.pl' + img['src']
                        if not first:
                            break
--- a/recipes/grantland.recipe
+++ b/recipes/grantland.recipe
@ -0,0 +1,76 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class GrantLand(BasicNewsRecipe):
    title          = u"Grantland"
    description    = 'Writings on Sports & Pop Culture'
    language       = 'en'
    __author__     = 'barty on mobileread.com forum'
    max_articles_per_feed = 100
    no_stylesheets = True
    # auto_cleanup is too aggressive sometimes and we end up with blank articles
    auto_cleanup   = False
    timefmt        = ' [%a, %d %b %Y]'
    oldest_article = 90
    cover_url      = 'http://cdn0.sbnation.com/imported_assets/740965/blog_grantland_grid_3.jpg'
    masthead_url   = 'http://a1.espncdn.com/prod/assets/grantland/grantland-logo.jpg'
    INDEX          = 'http://www.grantland.com'
    CATEGORIES     = [
        # comment out second line if you don't want older articles
        # (user friendly name, url suffix, max number of articles to load)
        ('Today in Grantland','',20),
        ('In Case You Missed It','incaseyoumissedit',35),
        ]
    remove_tags    = [
        {'name':['style','aside','nav','footer','script']},
        {'name':'h1','text':'Grantland'},
        {'id':['header','col-right']},
        {'class':['connect_widget']},
        {'name':'section','class':re.compile(r'\b(ad|module)\b')},
        ]
    preprocess_regexps = [
        # remove blog banners
        (re.compile(r'<a href="/blog/(?:(?!</a>).)+</a>', re.DOTALL|re.IGNORECASE), lambda m: ''),
        ]
    def parse_index(self):
        feeds = []
        seen_urls = set([])
        for category in self.CATEGORIES:
            (cat_name, tag, max_articles) = category
            self.log('Reading category:', cat_name)
            articles = []
            page = "%s/%s" % (self.INDEX, tag)
            soup = self.index_to_soup(page)
            main = soup.find('div',id='col-main')
            if main is None:
                main = soup
            for tag in main.findAll('a', href=re.compile(r'(story|post)/_/id/\d+')):
                url = tag['href']
                if url in seen_urls:
                    continue
                title = tag.string
                # blank title probably means <a href=".."><img /></a>.  skip
                if not title:
                    continue
                self.log('\tFound article:', title)
                self.log('\t', url)
                articles.append({'title':title,'url':url})
                seen_urls.add(url)
                if len(articles) >= max_articles:
                    break
            if articles:
                feeds.append((cat_name, articles))
        return feeds
--- a/recipes/gs24_pl.recipe
+++ b/recipes/gs24_pl.recipe
@ -0,0 +1,43 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 import re
 import string
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1322322819(BasicNewsRecipe):
    title          = u'GS24.pl (Głos Szczeciński)'
    description    = u'Internetowy serwis Głosu Szczecińskiego'
    __author__     = u'Michał Szkutnik'
    __license__    = u'GPL v3'
    language       = 'pl'
    publisher      = 'Media Regionalne sp. z o.o.'
    category       = 'news, szczecin'
    oldest_article = 2
    max_articles_per_feed = 100
    auto_cleanup = True
    cover_url = "http://www.gs24.pl/images/top_logo.png"
    feeds          = [
    # (u'Wszystko', u'http://www.gs24.pl/rss.xml'),
     (u'Szczecin', u'http://www.gs24.pl/szczecin.xml'),
     (u'Stargard', u'http://www.gs24.pl/stargard.xml'),
     (u'Świnoujście', u'http://www.gs24.pl/swinoujscie.xml'),
     (u'Goleniów', u'http://www.gs24.pl/goleniow.xml'),
     (u'Gryfice', u'http://www.gs24.pl/gryfice.xml'),
     (u'Kamień Pomorski', u'http://www.gs24.pl/kamienpomorski.xml'),
     (u'Police', u'http://www.gs24.pl/police.xml'),
     (u'Region', u'http://www.gs24.pl/region.xml'),
     (u'Sport', u'http://www.gs24.pl/sport.xml'),
                    ]
    def get_article_url(self, article):
        s = re.search("""/0L0S(gs24.*)/story01.htm""", article.link)
        s = s.group(1)
        replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I"  : "_", "0D" : "?", "0F" : "="}
        for (a, b) in replacements.iteritems():
            s = string.replace(s, a, b)
        s = string.replace(s, "0A", "0")
        return "http://"+s
    def print_version(self, url):
        return url + "&Template=printpicart"
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -9,6 +9,7 @@ www.guardian.co.uk
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from datetime import date
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 class Guardian(BasicNewsRecipe):
@ -16,9 +17,11 @@ class Guardian(BasicNewsRecipe):
    if date.today().weekday() == 6:
        base_url = "http://www.guardian.co.uk/theobserver"
        cover_pic = 'Observer digital edition'
        masthead_url = 'http://static.guim.co.uk/sys-images/Guardian/Pix/site_furniture/2010/10/19/1287478087992/The-Observer-001.gif'
    else:
        base_url = "http://www.guardian.co.uk/theguardian"
        cover_pic = 'Guardian digital edition'
        masthead_url = 'http://static.guim.co.uk/static/f76b43f9dcfd761f0ecf7099a127b603b2922118/common/images/logos/the-guardian/titlepiece.gif'
    __author__ = 'Seabound and Sujata Raman'
    language = 'en_GB'
@ -26,6 +29,7 @@ class Guardian(BasicNewsRecipe):
    oldest_article              = 7
    max_articles_per_feed       = 100
    remove_javascript           = True
    encoding                    = 'utf-8'
    # List of section titles to ignore
    # For example: ['Sport']
@ -41,6 +45,16 @@ class Guardian(BasicNewsRecipe):
                        dict(name='div', attrs={'class':["guardian-tickets promo-component",]}),
                        dict(name='ul', attrs={'class':["pagination"]}),
                        dict(name='ul', attrs={'id':["content-actions"]}),
                        # article history link
                        dict(name='a', attrs={'class':["rollover history-link"]}),
                        # "a version of this article ..." speil
                        dict(name='div' , attrs = { 'class' : ['section']}),
                        # "about this article" js dialog
                        dict(name='div', attrs={'class':["share-top",]}),
                        # author picture
                        dict(name='img', attrs={'class':["contributor-pic-small"]}),
                        # embedded videos/captions
                        dict(name='span',attrs={'class' : ['inline embed embed-media']}),
                        #dict(name='img'),
                        ]
    use_embedded_content    = False
@ -65,8 +79,21 @@ class Guardian(BasicNewsRecipe):
              url = None
          return url
    def populate_article_metadata(self, article, soup, first):
        if first and hasattr(self, 'add_toc_thumbnail'):
            picdiv = soup.find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,picdiv['src'])
    def preprocess_html(self, soup):
          # multiple html sections in soup, useful stuff in the first
          html = soup.find('html')
          soup2 = BeautifulSoup()
          soup2.insert(0,html) 
          soup = soup2  
          for item in soup.findAll(style=True):
              del item['style']
@ -75,6 +102,17 @@ class Guardian(BasicNewsRecipe):
          for tag in soup.findAll(name=['ul','li']):
                tag.name = 'div'
         # removes number next to rating stars
          items_to_remove = []
          rating_container = soup.find('div', attrs = {'class': ['rating-container']})
          if rating_container:
            for item in rating_container:
                if isinstance(item, Tag) and str(item.name) == 'span':
                    items_to_remove.append(item)
          for item in items_to_remove:
            item.extract()
          return soup
    def find_sections(self):
--- a/recipes/hackernews.recipe
+++ b/recipes/hackernews.recipe
@ -9,9 +9,9 @@ from calibre.ptempfile import PersistentTemporaryFile
 from urlparse import urlparse
 import re
-class HackerNews(BasicNewsRecipe):
+class HNWithCommentsLink(BasicNewsRecipe):
-    title                 = 'Hacker News'
+    title                 = 'HN With Comments Link'
-    __author__            = 'Tom Scholl'
+    __author__            = 'Tom Scholl & David Kerschner'
    description           = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.'
    publisher             = 'Y Combinator'
    category              = 'news, programming, it, technology'
@ -80,6 +80,11 @@ class HackerNews(BasicNewsRecipe):
        body = body + comments
        return u'<html><title>' + title + u'</title><body>' + body + '</body></html>'
    def parse_feeds(self):
        a = super(HNWithCommentsLink, self).parse_feeds()
        self.hn_articles = a[0].articles
        return a
    def get_obfuscated_article(self, url):
        if url.startswith('http://news.ycombinator.com'):
            content = self.get_hn_content(url)
@ -97,6 +102,13 @@ class HackerNews(BasicNewsRecipe):
            else:
                content = self.get_readable_content(url)
            article = 0
            for a in self.hn_articles:
                if a.url == url:
                    article = a
        content = re.sub(r'</body>\s*</html>\s*$', '', content) + article.summary + '</body></html>'
        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
        self.temp_files[-1].write(content)
        self.temp_files[-1].close()
--- a/recipes/haksoz.recipe
+++ b/recipes/haksoz.recipe
@ -0,0 +1,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1324739199(BasicNewsRecipe):
    title          = u'Haks\xf6z'
    oldest_article = 7
    max_articles_per_feed = 20
    auto_cleanup = True
    language = 'tr'
    __author__ = 'asalet_r'
    feeds          = [(u'Haks\xf6z', u'http://www.haksozhaber.net/rss/')]
--- a/recipes/hamilton_spectator.recipe
+++ b/recipes/hamilton_spectator.recipe
@ -0,0 +1,58 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 '''
 Hamilton Spectator Calibre Recipe
 '''
 class HamiltonSpectator(BasicNewsRecipe):
    title = u'Hamilton Spectator'
    oldest_article = 2
    max_articles_per_feed = 100
    auto_cleanup = True
    __author__ = u'Eric Coolman'
    publisher = u'thespec.com'
    description = u'Ontario Canada Newspaper'
    category = u'News, Ontario, Canada'
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'en_CA'
    encoding = 'utf-8'
    feeds          = [
 		(u'Top Stories',u'http://www.thespec.com/rss?query=/&assetType=Article'),
 		(u'All News',u'http://www.thespec.com/rss?query=/news&assetType=Article'),
 		(u'Local',u'http://www.thespec.com/rss?query=/local&assetType=Article'),
 		(u'Ontario',u'http://www.thespec.com/rss?query=/ontario&assetType=Article'),
 		(u'Canada',u'http://www.thespec.com/rss?query=/canada&assetType=Article'),
 		(u'World News',u'http://www.thespec.com/rss?query=/world&assetType=Article'),
 		(u'Business',u'http://www.thespec.com/rss?query=/business&assetType=Article'),
 		(u'Crime',u'http://www.thespec.com/rss?query=/crime&assetType=Article'),
 		(u'All Sports',u'http://www.thespec.com/rss?query=/sports&assetType=Article'),
 		(u'Ticats',u'http://www.thespec.com/rss?query=/sports/ticats&assetType=Article'),
 		(u'Bulldogs',u'http://www.thespec.com/rss?query=/sports/bulldogs&assetType=Article'),
 		(u'High School Sports',u'http://www.thespec.com/rss?query=/sports/highschools&assetType=Article'),
 		(u'Local Sports',u'http://www.thespec.com/rss?query=/sports/local&assetType=Article'),
 		(u'What''s On',u'http://www.thespec.com/rss?query=/whatson&assetType=Article'),
 		(u'Arts and Entertainment',u'http://www.thespec.com/rss?query=/whatson/artsentertainment&assetType=Article'),
 		(u'Books',u'http://www.thespec.com/rss?query=/whatson/books&assetType=Article'),
 		(u'Movies',u'http://www.thespec.com/rss?query=/whatson/movies&assetType=Article'),
 		(u'Music',u'http://www.thespec.com/rss?query=/whatson/music&assetType=Article'),
 		(u'Restaurant Reviews',u'http://www.thespec.com/rss?query=/whatson/restaurants&assetType=Article'),
 		(u'Opinion',u'http://www.thespec.com/rss?query=/opinion&assetType=Article'),
 		(u'Opinion Columns',u'http://www.thespec.com/rss?query=/opinion/columns&assetType=Article'),
 		(u'Cartoons',u'http://www.thespec.com/rss?query=/opinion/cartoons&assetType=Article'),
 		(u'Letters',u'http://www.thespec.com/rss?query=/opinion/letters&assetType=Article'),
 		(u'Editorial',u'http://www.thespec.com/rss?query=/opinion/editorial&assetType=Article'),
 		(u'Community',u'http://www.thespec.com/rss?query=/community&assetType=Article'),
 		(u'Education',u'http://www.thespec.com/rss?query=/community/education&assetType=Article'),
 		(u'Faith',u'http://www.thespec.com/rss?query=/community/faith&assetType=Article'),
 		(u'Contests',u'http://www.thespec.com/rss?query=/community/contests&assetType=Article'),
 		(u'Living',u'http://www.thespec.com/rss?query=/living&assetType=Article'),
 		(u'Food',u'http://www.thespec.com/rss?query=/living/food&assetType=Article'),
 		(u'Health and Fitness',u'http://www.thespec.com/rss?query=/living/healthfitness&assetType=Article'),
 		(u'Your Home',u'http://www.thespec.com/rss?query=/living/home&assetType=Article'),
 		(u'Travel',u'http://www.thespec.com/rss?query=/living/travel&assetType=Article'),
 		(u'Family and Parenting',u'http://www.thespec.com/rss?query=/living/familyparenting&assetType=Article'),
 		(u'Style',u'http://www.thespec.com/rss?query=/living/style&assetType=Article')
 	]
--- a/recipes/heise_online.recipe
+++ b/recipes/heise_online.recipe
@ -1,11 +1,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class AdvancedUserRecipe(BasicNewsRecipe):
-    title = 'heise online'
+    title = 'Heise-online'
    description = 'News vom Heise-Verlag'
    __author__ = 'schuster'
    masthead_url = 'http://www.heise.de/icons/ho/heise_online_logo.gif'
    publisher   = 'Heise Zeitschriften Verlag GmbH & Co. KG'
    use_embedded_content   = False
    language = 'de'
    oldest_article = 2
@ -14,11 +14,10 @@ class AdvancedUserRecipe(BasicNewsRecipe):
    remove_empty_feeds = True
    timeout = 5
    no_stylesheets = True
    encoding = 'utf-8'
    remove_tags_after = dict(name ='p', attrs={'class':'editor'})
-    remove_tags = [{'class':'navi_top_container'},
+    remove_tags = [dict(id='navi_top_container'),
                            dict(id='navi_bottom'),
                            dict(id='mitte_rechts'),
                            dict(id='navigation'),
@ -29,27 +28,31 @@ class AdvancedUserRecipe(BasicNewsRecipe):
                            dict(id='seiten_navi'),
                            dict(id='adbottom'),
                            dict(id='sitemap'),
-                            dict(name='a', href=re.compile(r'^/([a-zA-Z]+/)?')),
+                            dict(name='div', attrs={'id':'sitemap'}),
-                ]
+                            dict(name='ul', attrs={'class':'erste_zeile'}),
                            dict(name='ul', attrs={'class':'zweite_zeile'}),
                            dict(name='div', attrs={'class':'navi_top_container'})]
    feeds =  [
                   ('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
-                   ('iX', 'http://www.heise.de/ix/news/news.rdf'),
+                   ('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
                      ('Technology Review', 'http://www.heise.de/tr/news-atom.xml'),
                   ('mobil', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
                   ('Security', 'http://www.heise.de/security/news/news-atom.xml'),
                   ('Netze', 'http://www.heise.de/netze/rss/netze-atom.xml'),
                   ('Open Source', 'http://www.heise.de/open/news/news-atom.xml'),
                   ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
                   ('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
-                   ('Autos', 'http://www.heise.de/autos/rss/news.rdf'),
+                   ('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
-                   ('Mac & i', 'http://www.heise.de/mac-and-i/news.rdf'),
+                   ('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
                   ('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
                   ('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
                   ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
                   ('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
                   ('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
                   ('iX', 'http://www.heise.de/ix/news/news.rdf'),
                   ('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'),
                   ('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
                   ('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
                   ('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
-                   ('Blog: The World of IT', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
+                   ('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
-                   ('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
+                   ('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')]
             ]
    def print_version(self, url):
        return url + '?view=print'
--- a/recipes/hindustan_times.recipe
+++ b/recipes/hindustan_times.recipe
@ -1,4 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import urllib, re
 class HindustanTimes(BasicNewsRecipe):
    title          = u'Hindustan Times'
@ -26,4 +27,24 @@ class HindustanTimes(BasicNewsRecipe):
            'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
 ]
    def get_article_url(self, article):
        '''
        HT uses a variant of the feedportal RSS ad display mechanism
        '''
        try:
            s = article.summary
            return urllib.unquote(
                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
        except:
            pass
        url = BasicNewsRecipe.get_article_url(self, article)
        res = self.browser.open_novisit(url)
        url = res.geturl().split('/')[-2]
        encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
                '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
                'www.'}
        for k, v in encoding.iteritems():
            url = url.replace(k, v)
        return url
--- a/recipes/histmag.recipe
+++ b/recipes/histmag.recipe
@ -4,56 +4,20 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, matek09, matek09@gmail.com'
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class Histmag(BasicNewsRecipe):
    title          = u'Histmag'
    oldest_article = 7
    max_articles_per_feed = 100
    cover_url='http://histmag.org/grafika/loga/histmag-logo-2-300px.png'
    __author__ = 'matek09'
    description = u"Artykuly historyczne i publicystyczne"
    encoding = 'utf-8'
    #preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),(re.compile(r'<span>'), lambda match: '<br><br><span>')]
    no_stylesheets = True
    language = 'pl'
    remove_javascript = True
-	#max_articles_per_feed = 1
+    keep_only_tags=[dict(id='article')]
-	remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'article'}))
+    remove_tags=[dict(name = 'p', attrs = {'class' : 'article-tags'})]
 	remove_tags_after = dict(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
 	#keep_only_tags =[]
 	#keep_only_tags.append(dict(name = 'h2'))
 	#keep_only_tags.append(dict(name = 'p'))
 	remove_tags =[]
 	remove_tags.append(dict(name = 'p', attrs = {'class' : 'podpis'}))
 	remove_tags.append(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
 	remove_tags.append(dict(name = 'img', attrs = {'src' : 'style/buttons/wesprzyjnas-1.jpg'}))
 	preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),
 						(re.compile(r'<span>'), lambda match: '<br><br><span>')]
 	extra_css = '''
 					.left {font-size: x-small}
 					.right {font-size: x-small}
 				'''
 	def find_articles(self, soup):
 		articles = []
 		for div in soup.findAll('div', attrs={'class' : 'text'}):
 			articles.append({
 				'title' : self.tag_to_string(div.h3.a),
 				'url'   : 'http://www.histmag.org/' + div.h3.a['href'],
 				'date'  : self.tag_to_string(div.next('p')).split('|')[0],
 				'description' : self.tag_to_string(div.next('p', podpis=False)),
 				})
 		return articles
 	def parse_index(self):
 		soup = self.index_to_soup('http://histmag.org/?arc=4&dx=0')
 		feeds = []
 		feeds.append((u"Artykuly historyczne", self.find_articles(soup)))
 		soup = self.index_to_soup('http://histmag.org/?arc=5&dx=0')
 		feeds.append((u"Artykuly publicystyczne", self.find_articles(soup)))
 		soup = self.index_to_soup('http://histmag.org/?arc=1&dx=0')
 		feeds.append((u"Wydarzenia", self.find_articles(soup)))
 		return feeds
    feeds          = [(u'Wszystkie', u'http://histmag.org/rss/wszystkie.xml'), (u'Wydarzenia', u'http://histmag.org/rss/wydarzenia.xml'), (u'Recenzje', u'http://histmag.org/rss/recenzje.xml'), (u'Artykuły historyczne', u'http://histmag.org/rss/historia.xml'), (u'Publicystyka', u'http://histmag.org/rss/publicystyka.xml')]
--- a/recipes/historia_pl.recipe
+++ b/recipes/historia_pl.recipe
@ -8,6 +8,15 @@ class Historia_org_pl(BasicNewsRecipe):
    category       = 'history'
    language       = 'pl'
    oldest_article = 8
    remove_empty_feeds=True
    max_articles_per_feed = 100
-    feeds          = [(u'Artykuły', u'http://www.historia.org.pl/index.php?format=feed&type=rss')]
+    feeds          = [(u'Wszystkie', u'http://www.historia.org.pl/index.php?format=feed&type=rss'),
 		(u'Wiadomości', u'http://www.historia.org.pl/index.php/wiadomosci.feed?type=rss'),
 		(u'Publikacje', u'http://www.historia.org.pl/index.php/publikacje.feed?type=rss'),
 		(u'Publicystyka', u'http://www.historia.org.pl/index.php/publicystyka.feed?type=rss'),
 		(u'Recenzje', u'http://historia.org.pl/index.php/recenzje.feed?type=rss'),
 		(u'Kultura i sztuka', u'http://www.historia.org.pl/index.php/kultura-i-sztuka.feed?type=rss'),
 		(u'Rekonstykcje', u'http://www.historia.org.pl/index.php/rekonstrukcje.feed?type=rss'),
 		(u'Projekty', u'http://www.historia.org.pl/index.php/projekty.feed?type=rss'),
 		(u'Konkursy'), (u'http://www.historia.org.pl/index.php/konkursy.feed?type=rss')]
--- a/recipes/hvg.recipe
+++ b/recipes/hvg.recipe
@ -1,44 +1,58 @@
-# -*- coding: utf-8 -*-
+################################################################################
-import re
+#Description:	  http://hvg.hu/ RSS channel
-from calibre.web.feeds.recipes import BasicNewsRecipe
+#Author: 	  Bigpapa (bigpapabig@hotmail.com)
 #Date:	  2011.12.20. - V1.1
 ################################################################################
-class HVG(BasicNewsRecipe):
+from calibre.web.feeds.news import BasicNewsRecipe
-    title                 = 'HVG.HU'
+
-    __author__            = u'István Papp'
+class hvg(BasicNewsRecipe):
-    description           = u'Friss hírek a HVG-től'
+    title          = u'HVG'
-    timefmt               = ' [%Y. %b. %d., %a.]'
+    __author__     = 'Bigpapa'
    oldest_article        = 4
    language = 'hu'
-
+    oldest_article = 5		# Hany napos legyen a legregebbi cikk amit leszedjen.
-    max_articles_per_feed = 100
+    max_articles_per_feed = 5	# Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
    no_stylesheets = True
    use_embedded_content  = False
    encoding = 'utf8'
-    publisher             = 'HVG Online'
+    extra_css = ' h2 { font:bold 28px} '
    category              = u'news, hírek, hvg'
    extra_css             = 'body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
    preprocess_regexps    = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
    remove_tags_before    = dict(id='pg-content')
    remove_javascript     = True
    remove_empty_feeds    = True
-    feeds = [
+    remove_attributes = ['style','font', 'href']
-              (u'Itthon', u'http://hvg.hu/rss/itthon')
+
-             ,(u'Világ', u'http://hvg.hu/rss/vilag')
+    keep_only_tags    = [
-             ,(u'Gazdaság', u'http://hvg.hu/rss/gazdasag')
+		dict(name='div', attrs={'id':['pg-content']})
             ,(u'IT | Tudomány', u'http://hvg.hu/rss/tudomany')
             ,(u'Panoráma', u'http://hvg.hu/rss/Panorama')
             ,(u'Karrier', u'http://hvg.hu/rss/karrier')
             ,(u'Gasztronómia', u'http://hvg.hu/rss/gasztronomia')
             ,(u'Helyi érték', u'http://hvg.hu/rss/helyiertek')
             ,(u'Kultúra', u'http://hvg.hu/rss/kultura')
             ,(u'Cégautó', u'http://hvg.hu/rss/cegauto')
             ,(u'Vállalkozó szellem', u'http://hvg.hu/rss/kkv')
             ,(u'Egészség', u'http://hvg.hu/rss/egeszseg')
             ,(u'Vélemény', u'http://hvg.hu/rss/velemeny')
             ,(u'Sport', u'http://hvg.hu/rss/sport')
 	]
-    def print_version(self, url):
+    remove_tags = [ 
-        return url.replace ('#rss', '/print')
+	dict(name='div', attrs={'class':['box articlemenu', 'bannergoogle468', 'boxcontainer left', 'boxcontainer', 'commentbox']}),
 	dict(name='table', attrs={'class':['banner2', 'monocle']}),
 	dict(name='div', attrs={'id':['connect_widget_4cf63ca849ddf4577922632', 'sharetip', 'upprev_box']}),
 	dict(name='div', attrs={'style':['float: right; margin-bottom: 5px;', 'display: none;']}),
 	dict(name='h3', attrs={'class':['hthree']}),
 	dict(name='ul', attrs={'class':['defaultul']}),
 	dict(name='form', attrs={'id':['commentForm']}),
 	dict(name='h6', attrs={'class':['hthree']}),
 	dict(name='h6', attrs={'class':['more2']}),
 	dict(name='img', attrs={'class':['framed']}),
 	dict(name='td', attrs={'class':['greyboxbody','embedvideobody','embedvideofooter','embedvideobottom']}),
 	]
    feeds          = [
 #	(u'\xd6sszes', 'http://hvg.hu/rss'),
 	(u'Itthon', 'http://hvg.hu/rss/itthon'),
 	(u'Vil\xe1g', 'http://hvg.hu/rss/vilag'),
 	(u'Gazdas\xe1g', 'http://hvg.hu/rss/gazdasag'),
 	(u'Tudom\xe1ny', 'http://hvg.hu/rss/tudomany'),
 	(u'Panor\xe1ma', 'http://hvg.hu/rss/panorama'),
 	(u'Karrier', 'http://hvg.hu/rss/karrier'),
 	(u'Gasztron\xf3mia', 'http://hvg.hu/rss/gasztronomia'),
 	(u'Helyi \xe9rt\xe9k', 'http://hvg.hu/rss/helyiertek'),
 	(u'Kult\xfara', 'http://hvg.hu/rss/kultura'),
 	(u'C\xe9gaut\xf3', 'http://hvg.hu/rss/cegauto'),
 	(u'V\xe1llalkoz\xf3 szellem', 'http://hvg.hu/rss/kkv'),
 	(u'Eg\xe9szs\xe9g', 'http://hvg.hu/rss/egeszseg'),
 	(u'V\xe9lem\xe9ny', 'http://hvg.hu/rss/velemeny'),
 	(u'Sport', 'http://hvg.hu/rss/sport')
 ]
--- a/recipes/icons/biolog_pl.png
+++ b/recipes/icons/biolog_pl.png
--- a/recipes/icons/blues.png
+++ b/recipes/icons/blues.png
--- a/recipes/icons/computerworld_pl.png
+++ b/recipes/icons/computerworld_pl.png
--- a/recipes/icons/descopera_org.png
+++ b/recipes/icons/descopera_org.png
--- a/recipes/icons/dziennik_pl.png
+++ b/recipes/icons/dziennik_pl.png
--- a/recipes/icons/formulaas.png
+++ b/recipes/icons/formulaas.png
--- a/recipes/icons/infra_pl.png
+++ b/recipes/icons/infra_pl.png
--- a/recipes/icons/kosmonauta_pl.png
+++ b/recipes/icons/kosmonauta_pl.png
--- a/recipes/icons/mlody_technik_pl.png
+++ b/recipes/icons/mlody_technik_pl.png
--- a/recipes/icons/moneynews.png
+++ b/recipes/icons/moneynews.png
--- a/recipes/icons/novilist_novine_hr.png
+++ b/recipes/icons/novilist_novine_hr.png
--- a/recipes/icons/novilist_portal_hr.png
+++ b/recipes/icons/novilist_portal_hr.png
--- a/recipes/icons/rionegro.png
+++ b/recipes/icons/rionegro.png
--- a/recipes/icons/skylife.png
+++ b/recipes/icons/skylife.png
--- a/recipes/icons/zaman.png
+++ b/recipes/icons/zaman.png
--- a/recipes/ideal_almeria.recipe
+++ b/recipes/ideal_almeria.recipe
@ -0,0 +1,68 @@
 # encoding: utf-8 -*-
 __license__     = 'GPL v3'
 __author__      = 'Josemi Liébana <office at josemi-liebana.com>'
 __copyright__   = 'Josemi Liébana'
 __version__     = 'v0.1'
 __date__        = '5 January 2012'
 '''
 www.ideal.es
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Ideal(BasicNewsRecipe):
    title                 = u'Ideal (Edición Almería)'
    __author__            = u'Josemi Liébana'
    description           = u'Noticias de Almería y el resto del mundo'
    publisher             = 'Ideal'
    category              = u'News, Politics, Spain, Almería'
    publication_type      = 'Newspaper'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
    masthead_url          = u'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
    cover_url             = u'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
    extra_css             = u' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    keep_only_tags = [
                         dict(attrs={'id':'title'})
                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
                     ]
    remove_tags = [dict(name='ul')]
    remove_attributes = ['width','height']
    feeds = [
              (u'Última Hora'       , u'http://www.ideal.es/almeria/rss/feeds/ultima.xml'           )
             ,(u'Portada'           , u'http://www.ideal.es/almeria/portada.xml'                    )
             ,(u'Local'             , u'http://www.ideal.es/almeria/rss/feeds/granada.xml'          )
             ,(u'Deportes'          , u'http://www.ideal.es/almeria/rss/feeds/deportes.xml'         )
             ,(u'Sociedad'          , u'http://www.ideal.es/almeria/rss/feeds/sociedad.xml'         )
             ,(u'Cultura'           , u'http://www.ideal.es/almeria/rss/feeds/cultura.xml'          )
             ,(u'Economía'          , u'http://www.ideal.es/almeria/rss/feeds/economia.xml'         )
             ,(u'Costa'             , u'http://www.ideal.es/almeria/rss/feeds/costa.xml'            )
             ,(u'Puerta Purchena'   , u'http://www.ideal.es/almeria/rss/feeds/puerta_purchena.xml'  )
             ,(u'Andalucía'         , u'http://www.ideal.es/almeria/rss/feeds/andalucia.xml'        )
             ,(u'España'            , u'http://www.ideal.es/almeria/rss/feeds/espana.xml'           )
             ,(u'Mundo'             , u'http://www.ideal.es/almeria/rss/feeds/internacional.xml'    )
             ,(u'Vivir'             , u'http://www.ideal.es/almeria/rss/feeds/vivir.xml'            )
             ,(u'Opinión'           , u'http://www.ideal.es/almeria/rss/feeds/opinion.xml'          )
             ,(u'Televisión'        , u'http://www.ideal.es/almeria/rss/feeds/television.xml'       )
             ,(u'Contraportada'     , u'http://www.ideal.es/almeria/rss/feeds/contraportada.xml'    )
            ]
--- a/recipes/ideal_granada.recipe
+++ b/recipes/ideal_granada.recipe
@ -0,0 +1,69 @@
 # encoding: utf-8 -*-
 __license__     = 'GPL v3'
 __author__      = 'Josemi Liébana <office at josemi-liebana.com>'
 __copyright__   = 'Josemi Liébana'
 __version__     = 'v0.1'
 __date__        = '5 January 2012'
 '''
 www.ideal.es
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Ideal(BasicNewsRecipe):
    title                 = u'Ideal (Edición Granada)'
    __author__            = u'Josemi Liébana'
    description           = u'Noticias de Granada y el resto del mundo'
    publisher             = 'Ideal'
    category              = 'News, Politics, Spain, Granada'
    publication_type      = 'Newspaper'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
    cover_url             = 'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    keep_only_tags = [
                         dict(attrs={'id':'title'})
                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
                     ]
    remove_tags = [dict(name='ul')]
    remove_attributes = ['width','height']
    feeds = [
              (u'Última Hora'       , u'http://www.ideal.es/granada/rss/feeds/ultima.xml'           )
             ,(u'Portada'           , u'http://www.ideal.es/granada/portada.xml'                    )
             ,(u'Local'             , u'http://www.ideal.es/granada/rss/feeds/granada.xml'          )
             ,(u'Deportes'          , u'http://www.ideal.es/granada/rss/feeds/deportes.xml'         )
             ,(u'Sociedad'          , u'http://www.ideal.es/granada/rss/feeds/sociedad.xml'         )
             ,(u'Cultura'           , u'http://www.ideal.es/granada/rss/feeds/cultura.xml'          )
             ,(u'Economía'          , u'http://www.ideal.es/granada/rss/feeds/economia.xml'         )
             ,(u'Costa'             , u'http://www.ideal.es/granada/rss/feeds/costa.xml'            )
             ,(u'La Carrera'        , u'http://www.ideal.es/granada/rss/feeds/la_carrera.xml'       )
             ,(u'Puerta Real'       , u'http://www.ideal.es/granada/rss/feeds/puerta_real.xml'      )
             ,(u'Andalucía'         , u'http://www.ideal.es/granada/rss/feeds/andalucia.xml'        )
             ,(u'España'            , u'http://www.ideal.es/granada/rss/feeds/espana.xml'           )
             ,(u'Mundo'             , u'http://www.ideal.es/granada/rss/feeds/internacional.xml'    )
             ,(u'Vivir'             , u'http://www.ideal.es/granada/rss/feeds/vivir.xml'            )
             ,(u'Opinión'           , u'http://www.ideal.es/granada/rss/feeds/opinion.xml'          )
             ,(u'Televisión'        , u'http://www.ideal.es/granada/rss/feeds/television.xml'       )
             ,(u'Contraportada'     , u'http://www.ideal.es/granada/rss/feeds/contraportada.xml'    )
            ]
--- a/recipes/ideal_jaen.recipe
+++ b/recipes/ideal_jaen.recipe
@ -0,0 +1,67 @@
 # encoding: utf-8 -*-
 __license__     = 'GPL v3'
 __author__      = 'Josemi Liébana <office at josemi-liebana.com>'
 __copyright__   = 'Josemi Liébana'
 __version__     = 'v0.1'
 __date__        = '5 January 2012'
 '''
 www.ideal.es
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Ideal(BasicNewsRecipe):
    title                 = u'Ideal (Edición Jaén)'
    __author__            = u'Josemi Liébana'
    description           = u'Noticias de Jaén y el resto del mundo'
    publisher             = 'Ideal'
    category              = u'News, Politics, Spain, Jaén'
    publication_type      = 'Newspaper'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
    cover_url             = 'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    keep_only_tags = [
                         dict(attrs={'id':'title'})
                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
                     ]
    remove_tags = [dict(name='ul')]
    remove_attributes = ['width','height']
    feeds = [
              (u'Última Hora'       , u'http://www.ideal.es/jaen/rss/feeds/ultima.xml'          )
             ,(u'Portada'           , u'http://www.ideal.es/jaen/portada.xml'                   )
             ,(u'Local'             , u'http://www.ideal.es/jaen/rss/feeds/granada.xml'         )
             ,(u'Deportes'          , u'http://www.ideal.es/jaen/rss/feeds/deportes.xml'        )
             ,(u'Sociedad'          , u'http://www.ideal.es/jaen/rss/feeds/sociedad.xml'        )
             ,(u'Cultura'           , u'http://www.ideal.es/jaen/rss/feeds/cultura.xml'         )
             ,(u'Economía'          , u'http://www.ideal.es/jaen/rss/feeds/economia.xml'        )
             ,(u'Costa'             , u'http://www.ideal.es/jaen/rss/feeds/costa.xml'           )
             ,(u'Andalucía'         , u'http://www.ideal.es/jaen/rss/feeds/andalucia.xml'       )
             ,(u'España'            , u'http://www.ideal.es/jaen/rss/feeds/espana.xml'          )
             ,(u'Mundo'             , u'http://www.ideal.es/jaen/rss/feeds/internacional.xml'   )
             ,(u'Vivir'             , u'http://www.ideal.es/jaen/rss/feeds/vivir.xml'           )
             ,(u'Opinión'           , u'http://www.ideal.es/jaen/rss/feeds/opinion.xml'         )
             ,(u'Televisión'        , u'http://www.ideal.es/jaen/rss/feeds/television.xml'      )
             ,(u'Contraportada'     , u'http://www.ideal.es/jaen/rss/feeds/contraportada.xml'   )
            ]
--- a/recipes/iht.recipe
+++ b/recipes/iht.recipe
@ -1,63 +1,30 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Derry FitzGerald'
 '''
 iht.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
-
+class NYTimesGlobal(BasicNewsRecipe):
-class InternationalHeraldTribune(BasicNewsRecipe):
+    title          = u'NY Times Global'
    title          = u'The International Herald Tribune'
    __author__     = 'Derry FitzGerald'
    language       = 'en'
    __author__     = 'Krittika Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25
    use_embedded_content = False
    oldest_article = 1
    max_articles_per_feed = 30
    no_stylesheets = True
    auto_cleanup = True
    remove_tags    = [dict(name='div', attrs={'class':['footer','header']}),
                      dict(name=['form'])]
    preprocess_regexps = [
            (re.compile(r'<!-- webtrends.*', re.DOTALL),
             lambda m:'</body></html>')
                          ]
    extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'
    remove_empty_feeds = True
    feeds          = [
-                      (u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'),
+('NYTimes',
-                      (u'Business', u'http://www.iht.com/rss/business.xml'),
+ 'http://www.nytimes.com/services/xml/rss/nyt/HomePage.xml'),
-                      (u'Americas', u'http://www.iht.com/rss/america.xml'),
+('NYTimes global',
-                      (u'Europe', u'http://www.iht.com/rss/europe.xml'),
+ 'http://www.nytimes.com/services/xml/rss/nyt/GlobalHome.xml'),
-                      (u'Asia', u'http://www.iht.com/rss/asia.xml'),
+('World',
-                      (u'Africa and Middle East', u'http://www.iht.com/rss/africa.xml'),
+ 'http://www.nytimes.com/services/xml/rss/nyt/World.xml'),
-                      (u'Opinion', u'http://www.iht.com/rss/opinion.xml'),
+('U.S.',
-                      (u'Technology', u'http://www.iht.com/rss/technology.xml'),
+ 'http://www.nytimes.com/services/xml/rss/nyt/US.xml'),
-                      (u'Health and Science', u'http://www.iht.com/rss/healthscience.xml'),
+('Business',
-                      (u'Sports', u'http://www.iht.com/rss/sports.xml'),
+ 'http://feeds.nytimes.com/nyt/rss/Business'),
-                      (u'Culture', u'http://www.iht.com/rss/arts.xml'),
+('Sports',
-                      (u'Style and Design', u'http://www.iht.com/rss/style.xml'),
+ 'http://www.nytimes.com/services/xml/rss/nyt/Sports.xml'),
-                      (u'Travel', u'http://www.iht.com/rss/travel.xml'),
+('Technology',
-                      (u'At Home Abroad', u'http://www.iht.com/rss/athome.xml'),
+ 'http://feeds.nytimes.com/nyt/rss/Technology'),
-                      (u'Your Money', u'http://www.iht.com/rss/yourmoney.xml'),
+]
                      (u'Properties', u'http://www.iht.com/rss/properties.xml')
                    ]
    temp_files = []
    articles_are_obfuscated = True
    masthead_url = 'http://graphics8.nytimes.com/images/misc/iht-masthead-logo.gif'
    def get_obfuscated_article(self, url):
        br = self.get_browser()
        br.open(url)
        response1 = br.follow_link(url_regex=re.compile(r'.*pagewanted=print.*'))
        html = response1.read()
        self.temp_files.append(PersistentTemporaryFile('_iht.html'))
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
--- a/recipes/iktibas.recipe
+++ b/recipes/iktibas.recipe
@ -0,0 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1324739406(BasicNewsRecipe):
    title          = u'\u0130ktibas'
    language = 'tr'
    __author__ = 'asalet_r'
    oldest_article = 7
    max_articles_per_feed = 20
    auto_cleanup = True
    feeds          = [(u'\u0130ktibas', u'http://www.iktibasdergisi.com/rss/rss.xml')]
--- a/recipes/independent.recipe
+++ b/recipes/independent.recipe
@ -1,9 +1,8 @@
 # adapted from old recipe by Darko Miletic <darko.miletic at gmail.com>
-import string, re
+import re
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
+from calibre.ebooks.BeautifulSoup import Tag, NavigableString
 class TheIndependentNew(BasicNewsRecipe):
@ -40,7 +39,9 @@ class TheIndependentNew(BasicNewsRecipe):
    encoding                = 'utf-8'
    remove_tags             =[
                               dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
-                               dict(attrs={'class' : ['autoplay','openBiogPopup']})
+                               dict(attrs={'class' : ['autoplay','openBiogPopup']}),
                               dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}),
                               dict(attrs={'style' : re.compile('.*')}),
                             ]
    keep_only_tags          =[dict(attrs={'id':'main'})]
@ -103,6 +104,12 @@ class TheIndependentNew(BasicNewsRecipe):
            url = None
        return url
    def populate_article_metadata(self, article, soup, first):
        if first and hasattr(self, 'add_toc_thumbnail'):
            picdiv = soup.find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,picdiv['src'])
    def preprocess_html(self, soup):
        #remove 'advertorial articles'
@ -114,6 +121,7 @@ class TheIndependentNew(BasicNewsRecipe):
                    return None
        items_to_extract = []
        slideshow_elements = []
        for item in soup.findAll(attrs={'class' : re.compile("widget.*")}):
            remove = True
@ -132,6 +140,7 @@ class TheIndependentNew(BasicNewsRecipe):
            if (pattern.search(item['class'])) is not None:
                if self._FETCH_IMAGES:
                    remove = False
                    slideshow_elements.append(item)
                else:
                    remove = True
@ -149,7 +158,8 @@ class TheIndependentNew(BasicNewsRecipe):
        items_to_extract = []
        if self._FETCH_IMAGES:
-            for item in soup.findAll('a',attrs={'href' : re.compile('.*')}):
+            for element in slideshow_elements:
                for item in element.findAll('a',attrs={'href' : re.compile('.*')}):
                    if item.img is not None:
                        #use full size image
                        img = item.findNext('img')
@ -157,7 +167,7 @@ class TheIndependentNew(BasicNewsRecipe):
                        img['src'] = item['href']
                        #insert caption if available
-                    if img['title'] is not None and (len(img['title']) > 1):
+                        if img.get('title') and (len(img['title']) > 1):
                            tag = Tag(soup,'h3')
                            text = NavigableString(img['title'])
                            tag.insert(0,text)
@ -262,12 +272,15 @@ class TheIndependentNew(BasicNewsRecipe):
    def _insertRatingStars(self,soup,item):
-        if item.contents is None:
+        if item.contents is None or len(item.contents) < 1:
            return
        rating = item.contents[0]
-        if not rating.isdigit():
+
-            return None
+        try:
-        rating = int(item.contents[0])    
+            rating = float(item.contents[0])
        except:
            print 'Could not convert decimal rating to star: malformatted float.'
            return
        for i in range(1,6):
            star = Tag(soup,'img')
            if i <= rating:
@ -284,7 +297,7 @@ class TheIndependentNew(BasicNewsRecipe):
        items_to_extract = []
        for item in soup.findAll('div', attrs={'class' : 'image'}):
            img = item.findNext('img')
-            if img is not None and img['src'] is not None:
+            if img and img.get('src'):
                # broken images still point to remote url
                pattern = re.compile('http://www.independent.co.uk.*')
                if pattern.match(img["src"]) is not None:
--- a/recipes/indy_star.recipe
+++ b/recipes/indy_star.recipe
@ -1,16 +1,20 @@
-from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds.recipes import BasicNewsRecipe
-class AdvancedUserRecipe1234144423(BasicNewsRecipe):
+class IndianapolisStar(BasicNewsRecipe):
 	title                 = u'Indianapolis Star'
-    oldest_article = 5
+	oldest_article        = 10
 	auto_cleanup          = True
 	language              = 'en'
 	__author__            = 'Owen Kelly'
 	max_articles_per_feed = 100
 	cover_url = u'http://www2.indystar.com/frontpage/images/today.jpg'
-    
+	feeds = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss'),
-    feeds          = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss&mime=XML'), (u'News Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss&mime=XML'), (u'Business Headlines', u'http://www..indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss&mime=XML'), (u'Sports Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=SPORTS&template=rss&mime=XML'), (u'Lifestyle Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'), (u'Opinion Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML')]
+		(u'News Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss'),
 		(u'Business Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss'),
 		(u'Politics and Government', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS05&template=rss'),
 		(u'Lifestyle Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'),
 		(u'Opinion Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML')
 		]
 	def print_version(self, url):
 		return url + '&template=printart'
--- a/recipes/infra_pl.recipe
+++ b/recipes/infra_pl.recipe
@ -0,0 +1,17 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class INFRA(BasicNewsRecipe):
    title          = u'INFRA'
    oldest_article = 7
    max_articles_per_feed = 100
    __author__        = 'fenuks'
    description   = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
    cover_url      = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
    category       = 'UFO'
    language       = 'pl'
    max_articles_per_feed = 100
    no_stylesheers=True
    remove_tags_before=dict(name='h2', attrs={'class':'contentheading'})
    remove_tags_after=dict(attrs={'class':'pagenav'})
    remove_tags=[dict(attrs={'class':'pagenav'})]
    feeds          = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')]
--- a/recipes/izdiham.com.recipe
+++ b/recipes/izdiham.com.recipe
@ -0,0 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1324158549(BasicNewsRecipe):
    title          = u'izdiham.com'
    language = 'tr'
    __author__ = 'asalet_r'
    oldest_article = 7
    max_articles_per_feed = 20
    auto_cleanup = True
    feeds          = [(u'\u0130zdiham', u'http://www.izdiham.com/index.php/feed')]
--- a/recipes/japan_news.recipe
+++ b/recipes/japan_news.recipe
@ -0,0 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class NewsOnJapan(BasicNewsRecipe):
    title          = u'News On Japan'
    language       = 'en'
    __author__     = 'Krittika Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25
    use_embedded_content = False
    no_stylesheets = True
    auto_cleanup = True
    feeds          = [
 ('News',
 'http://newsonjapan.com/rss/top.xml'),
 ]
--- a/recipes/klip_me.recipe
+++ b/recipes/klip_me.recipe
@ -0,0 +1,72 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1299694372(BasicNewsRecipe):
    title                             = u'Klipme'
    __author__                  = 'Ken Sun'
    publisher                     = 'Klip.me'
    category                      = 'info, custom, Klip.me'
    oldest_article               = 365
    max_articles_per_feed = 100
    no_stylesheets        = True
    remove_javascript     = True
    remove_tags              = [
    dict(name='div', attrs={'id':'text_controls_toggle'})
    ,dict(name='script')
    ,dict(name='div', attrs={'id':'text_controls'})
    ,dict(name='div', attrs={'id':'editing_controls'})
    ,dict(name='div', attrs={'class':'bar bottom'})
     ]
    use_embedded_content  = False
    needs_subscription    = True
    INDEX                 = u'http://www.klip.me'
    LOGIN                 = INDEX + u'/fav/signin?callback=/fav'
    feeds          = [
            (u'Klip.me unread', u'http://www.klip.me/fav'),
            (u'Klip.me started', u'http://www.klip.me/fav?s=starred')
            ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None:
            br.open(self.LOGIN)
            br.select_form(nr=0)
            br['Email'] = self.username
            if self.password is not None:
               br['Passwd'] = self.password
            br.submit()
        return br
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            for item in soup.findAll('table',attrs={'class':['item','item new']}):
                atag = item.a
                if atag and atag.has_key('href'):
                    url         = atag['href']
                    articles.append({
                                     'url'        :url
                                    })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
    def print_version(self, url):
        return 'http://www.klip.me' + url
    def populate_article_metadata(self, article, soup, first):
        article.title  = soup.find('title').contents[0].strip()
    def postprocess_html(self, soup, first_fetch):
        for link_tag in soup.findAll(attrs={"id" : "story"}):
            link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>')
            print link_tag
        return soup
--- a/recipes/kopalniawiedzy.recipe
+++ b/recipes/kopalniawiedzy.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2011, Attis <attis@attis.one.pl>'
+__copyright__ = '2011 Attis <attis@attis.one.pl>, 2012 Tomasz Długosz <tomek3d@gmail.com>'
 __version__ = 'v. 0.1'
 import re
@ -10,7 +10,7 @@ class KopalniaWiedzy(BasicNewsRecipe):
        publisher      = u'Kopalnia Wiedzy'
        description    = u'Ciekawostki ze świata nauki i techniki'
        encoding       = 'utf-8'
-		__author__     = 'Attis'
+        __author__     = 'Attis & Tomasz Długosz'
        language       = 'pl'
        oldest_article = 7
        max_articles_per_feed = 100
@ -18,9 +18,9 @@ class KopalniaWiedzy(BasicNewsRecipe):
        remove_javascript     = True
        no_stylesheets        = True
-		remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'} }, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}]
+        remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}]
        remove_tags_after = dict(attrs={'class':'ad-square'})
-		keep_only_tags    = [dict(name="div", attrs={'id':'articleContent'})]
+        keep_only_tags    = [dict(name="div", attrs={'class':'article-text text-small'})]
        extra_css      = '.topimage {margin-top: 30px}'
        preprocess_regexps = [
--- a/recipes/kosmonauta_pl.recipe
+++ b/recipes/kosmonauta_pl.recipe
@ -0,0 +1,14 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class Kosmonauta(BasicNewsRecipe):
    title          = u'Kosmonauta.net'
    __author__        = 'fenuks'
    description   = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
    category       = 'astronomy'
    language       = 'pl'
    cover_url='http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 100
    feeds          = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/index.php/feed/rss.html')]
--- a/recipes/la_razon_bo.recipe
+++ b/recipes/la_razon_bo.recipe
@ -1,10 +1,9 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.la-razon.com
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class LaRazon_Bol(BasicNewsRecipe):
@ -16,19 +15,17 @@ class LaRazon_Bol(BasicNewsRecipe):
    oldest_article        = 1
    max_articles_per_feed = 200
    no_stylesheets        = True
-    encoding              = 'cp1252'
+    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'es_BO'
    publication_type      = 'newspaper'
    delay                 = 1
    remove_empty_feeds    = True
-    cover_url             = strftime('http://www.la-razon.com/portadas/%Y%m%d_LaRazon.jpg')
+    masthead_url          = 'http://www.la-razon.com/static/LRZRazon/images/lrz-logo.png'
-    masthead_url          = 'http://www.la-razon.com/imagenes/logo.jpg'
+    extra_css             = """ body{font-family: Georgia,"Times New Roman",Times,serif}
-    extra_css             = """ body{font-family: Arial,Helvetica,sans-serif }
+                                img{margin-bottom: 0.4em; display: block}
-                                img{margin-bottom: 0.4em}
+                                .meta{font-size: small; font-family: Arial,Helvetica,sans-serif}
                                .noticia-titulo{font-family: Georgia,"Times New Roman",Times,serif}
                                .lead{font-weight: bold; font-size: 0.8em}
                            """
    INDEX = 'http://www.la-razon.com/'
    conversion_options = {
                          'comment'   : description
@ -37,28 +34,37 @@ class LaRazon_Bol(BasicNewsRecipe):
                        , 'language'  : language
                        }
-    keep_only_tags    = [dict(name='div', attrs={'class':['noticia-titulo','noticia-desarrollo']})]
+    keep_only_tags    = [dict(name='div', attrs={'class':['pg-hd', 'pg-bd']})]
-    remove_tags       = [dict(name=['meta','link','form','iframe','embed','object'])]
+    remove_tags       = [
                          dict(name=['meta','link','form','iframe','embed','object'])
                         ,dict(name='div', attrs={'class':'bd'})
                        ]
    remove_attributes = ['width','height']
    feeds = [
-              (u'Editorial'     , u'http://www.la-razon.com/rss_editorial.php' )
+              (u'Editorial'     , u'http://www.la-razon.com/rss/opinion/editorial/'     )
-             ,(u'Opinión'       , u'http://www.la-razon.com/rss_opinion.php'   )
+             ,(u'Nacional'      , u'http://www.la-razon.com/rss/nacional/'              )
-             ,(u'Nacional'      , u'http://www.la-razon.com/rss_nacional.php'  )
+             ,(u'Economia'      , u'http://www.la-razon.com/rss/economia/'              )
-             ,(u'Economia'      , u'http://www.la-razon.com/rss_economia.php'  )
+             ,(u'Ciudades'      , u'http://www.la-razon.com/rss/ciudades/'              )
-             ,(u'Ciudades'      , u'http://www.la-razon.com/rss_ciudades.php'  )
+             ,(u'Sociedad'      , u'http://www.la-razon.com/rss/sociedad/'              )
-             ,(u'Sociedad'      , u'http://www.la-razon.com/rss_sociedad.php'  )
+             ,(u'Mundo'         , u'http://www.la-razon.com/rss/mundo/'                 )
-             ,(u'Mundo'         , u'http://www.la-razon.com/rss_sociedad.php'  )
+             ,(u'La Revista'    , u'http://www.la-razon.com/rss/la_revista/'            )
-             ,(u'La Revista'    , u'http://www.la-razon.com/rss_larevista.php' )
+             ,(u'Sociales'      , u'http://www.la-razon.com/rss/sociales/'              )
-             ,(u'Sociales'      , u'http://www.la-razon.com/rss_sociales.php'  )
+             ,(u'Mia'           , u'http://www.la-razon.com/rss/suplementos/mia/'       )
-             ,(u'Mia'           , u'http://www.la-razon.com/rss_mia.php'       )
+             ,(u'Marcas'        , u'http://www.la-razon.com/rss/marcas/'                )
-             ,(u'Marcas'        , u'http://www.la-razon.com/rss_marcas.php'    )
+             ,(u'Escape'        , u'http://www.la-razon.com/rss/suplementos/escape/'    )
-             ,(u'Escape'        , u'http://www.la-razon.com/rss_escape.php'    )
+             ,(u'El Financiero' , u'http://www.la-razon.com/rss/suplementos/financiero/')
-             ,(u'El Financiero' , u'http://www.la-razon.com/rss_financiero.php')
+             ,(u'Tendencias'    , u'http://www.la-razon.com/rss/suplementos/tendencias/')
             ,(u'Tendencias'    , u'http://www.la-razon.com/rss_tendencias.php')
            ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    def get_cover_url(self):
        soup = self.index_to_soup(self.INDEX)
        lightbox = soup.find('div', attrs = {'class' : 'lightbox lightbox-frontpage'})
        return lightbox.img['src']
--- a/recipes/la_republica.recipe
+++ b/recipes/la_republica.recipe
@ -1,13 +1,12 @@
 __license__   = 'GPL v3'
 __author__    = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
 __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
-description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version'
+description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version'
 '''
 http://www.repubblica.it/
 '''
 import re
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
@ -33,12 +32,6 @@ class LaRepubblica(BasicNewsRecipe):
    remove_attributes = ['width','height','lang','xmlns:og','xmlns:fb']
    preprocess_regexps = [
        (re.compile(r'.*?<head>', re.DOTALL|re.IGNORECASE), lambda match: '<head>'),
        (re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE), lambda match: '<head><title>'),
        (re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE), lambda match: '</title></head>')
    ]
    def get_article_url(self, article):
        link = BasicNewsRecipe.get_article_url(self, article)
        if link and not '.repubblica.it/' in link:
@ -73,15 +66,15 @@ class LaRepubblica(BasicNewsRecipe):
    remove_tags        = [
                            dict(name=['object','link','meta','iframe','embed']),
                            dict(name='span',attrs={'class':'linkindice'}),
-                            dict(name='div', attrs={'class':'bottom-mobile'}),
+                            dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}),
-                            dict(name='div', attrs={'id':['rssdiv','blocco']}),
+                            dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head']}),
-                            dict(name='div', attrs={'class':'utility'}),
+                            dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}),
                            dict(name='div', attrs={'class':'generalbox'}),
                            dict(name='ul', attrs={'id':'hystory'})
                         ]
    feeds          = [
-                       (u'Rilievo', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
+                       (u'Homepage', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
                       (u'Cronaca', u'http://www.repubblica.it/rss/cronaca/rss2.0.xml'),
                       (u'Esteri', u'http://www.repubblica.it/rss/esteri/rss2.0.xml'),
                       (u'Economia', u'http://www.repubblica.it/rss/economia/rss2.0.xml'),
@ -110,3 +103,5 @@ class LaRepubblica(BasicNewsRecipe):
            del item['style']           
        return soup
    def preprocess_raw_html(self, raw, url):
       return '<html><head>'+raw[raw.find('</head>'):]
--- a/recipes/lega_nerd.recipe
+++ b/recipes/lega_nerd.recipe
@ -0,0 +1,14 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1326135232(BasicNewsRecipe):
    title          = u'Lega Nerd'
    description = 'nerd / geek culture, pc, comics, music, culture'
    language = 'it'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Lega Nerd', u'http://feeds.feedburner.com/LegaNerd')]
    __author__      = 'faber1971'
 __version__     = 'v1.0'
 __date__        = '9, January 2011'
--- a/recipes/letsgetcritical.recipe
+++ b/recipes/letsgetcritical.recipe
@ -0,0 +1,94 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class LetsGetCritical(BasicNewsRecipe):
    title          = u"Let's Get Critical"
    description    = 'Curation / aggregation of criticisms of the arts and culture '
    language = 'en'
    __author__     = 'barty on mobileread.com forum'
    max_articles_per_feed = 100
    no_stylesheets = False
    timefmt        = ' [%a, %d %b, %Y]'
    oldest_article = 365
    auto_cleanup   = True
    INDEX          = 'http://www.letsgetcritical.org'
    CATEGORIES     = [
        # comment out categories you don't want
        # (user friendly name, system name, max number of articles to load)
        ('Architecture','architecture',30),
        ('Art','art',30),
        ('Books','books',30),
        ('Design','design',30),
        ('Digital','digital',30),
        ('Food','food',30),
        ('Movies','movies',30),
        ('Music','music',30),
        ('Television','television',30),
        ('Other articles','',10)
        ]
    def parse_index(self):
        self.cover_url = 'http://www.letsgetcritical.org/wp-content/themes/lets_get_critical/images/lgc.jpg'
        feeds = []
        seen_urls = set([])
        regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
        for category in self.CATEGORIES:
            (cat_name, tag, max_articles) = category
            tagurl = '' if tag=='' else '/category/'+tag.lower()
            self.log('Reading category:', cat_name)
            articles = []
            pageno = 1
            while len(articles) < max_articles and pageno < 100:
                page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
                pageno += 1
                self.log('\tReading page:', page)
                try:
                    soup = self.index_to_soup(page)
                except:
                    break
                posts = soup.findAll('div',attrs={'class':'post_multi'})
                if len(posts) == 0:
                    break
                for post in posts:
                    dt = post.find('div',attrs={'class':'title'})
                    atag = dt.find('a')
                    url = atag['href']
                    # skip promotionals and duplicate
                    if url.startswith('http://letsgetcritical') or url.startswith('/') or url in seen_urls:
                        continue
                    seen_urls.add(url)
                    title = self.tag_to_string(atag)
                    self.log('\tFound article:', title)
                    self.log('\t', url)
                    desc = post.find('blockquote')
                    desc = self.tag_to_string(desc) if desc else ''
                    m = regex.match( url)
                    if m:
                        desc = "[%s] %s" %  (m.group(2), desc)
                    #self.log('\t', desc)
                    date = ''
                    p = post.previousSibling
                    # navigate up sibling to find date
                    while p:
                        if hasattr(p,'class') and p['class'] == 'singledate':
                            date = self.tag_to_string(p)
                            break
                        p = p.previousSibling
                    articles.append({'title':title,'url':url,'description':desc,'date':date})
                    if len(articles) >= max_articles:
                        break
            if articles:
                feeds.append((cat_name, articles))
        return feeds
--- a/recipes/los_tiempos_bo.recipe
+++ b/recipes/los_tiempos_bo.recipe
@ -41,7 +41,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
    keep_only_tags    = [dict(name='div', attrs={'id':'articulo'})]
    remove_tags       = [
                          dict(name=['meta','link','form','iframe','embed','object','hr'])
-                         ,dict(attrs={'class':['caja_fonts sin_border_bot','pub']})
+                         ,dict(attrs={'class':['caja_fonts sin_border_bot','pub','twitter-share-button']})
                        ]
    remove_attributes = ['width','height']
--- a/recipes/lwn_weekly.recipe
+++ b/recipes/lwn_weekly.recipe
@ -14,8 +14,11 @@ class WeeklyLWN(BasicNewsRecipe):
    description = 'Weekly summary of what has happened in the free software world.'
    __author__ = 'Davide Cavalca'
    language = 'en'
    site_url = 'http://lwn.net'
-    cover_url = 'http://lwn.net/images/lcorner.png'
+    extra_css = 'pre,code,samp,kbd,tt { font-size: 80% }\nblockquote {margin-left:0 }\n* { color: black }\n'
    cover_url = site_url + '/images/lcorner.png'
    #masthead_url = 'http://lwn.net/images/lcorner.png'
    publication_type = 'magazine'
@ -43,11 +46,29 @@ class WeeklyLWN(BasicNewsRecipe):
            br.submit()
        return br
    def print_version(self, url):
        # Strip off anchor
        url = url.split('#')[0]
        # Prepend site_url
        if url[0:len(self.site_url)] != self.site_url:
            url = self.site_url + url
        # Append printable URL parameter
        print_param = '?format=printable'
        if url[-len(print_param):] != print_param:
            url += print_param
        #import sys
        #print >>sys.stderr, "*** print_version(url):", url
        return url
    def parse_index(self):
        if self.username is not None and self.password is not None:
-            index_url = 'http://lwn.net/current/bigpage?format=printable'
+            index_url = self.print_version('/current/bigpage')
        else:
-            index_url = 'http://lwn.net/free/bigpage?format=printable'
+            index_url = self.print_version('/free/bigpage')
        soup = self.index_to_soup(index_url)
        body = soup.body
@ -56,19 +77,19 @@ class WeeklyLWN(BasicNewsRecipe):
        url_re = re.compile('^/Articles/')
        while True:
-            tag_title = body.findNext(name='p', attrs={'class':'SummaryHL'})
+            tag_title = body.findNext(attrs={'class':'SummaryHL'})
            if tag_title == None:
                break
-            tag_section = tag_title.findPrevious(name='p', attrs={'class':'Cat1HL'})
+            tag_section = tag_title.findPrevious(attrs={'class':'Cat1HL'})
            if tag_section == None:
                section = 'Front Page'
            else:
                section = tag_section.string
-            tag_section2 = tag_title.findPrevious(name='p', attrs={'class':'Cat2HL'})
+            tag_section2 = tag_title.findPrevious(attrs={'class':'Cat2HL'})
            if tag_section2 != None:
-                if tag_section2.findPrevious(name='p', attrs={'class':'Cat1HL'}) == tag_section:
+                if tag_section2.findPrevious(attrs={'class':'Cat1HL'}) == tag_section:
                    section = "%s: %s" %(section, tag_section2.string)
            if section not in articles.keys():
@ -94,9 +115,10 @@ class WeeklyLWN(BasicNewsRecipe):
            if tag_url == None:
                break
            article = dict(
                title=self.tag_to_string(tag_title),
-                url= 'http://lwn.net' + tag_url['href'].split('#')[0] + '?format=printable',
+                url=tag_url['href'],
                description='', content='', date='')
            articles[section].append(article)
--- a/Show More
+++ b/Show More