Merge

2025-08-11 09:13:57 -04:00 · 2012-01-25 22:34:50 +01:00 · 2012-01-25 22:34:50 +01:00 · 1028383a93
commit 1028383a93
parent 522787e86c e01e2c9ba6
724 changed files with 304073 additions and 156411 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -2,6 +2,7 @@
 .check-cache.pickle
 src/calibre/plugins
 resources/images.qrc
+src/calibre/ebooks/oeb/display/test/*.js
 src/calibre/manual/.build/
 src/calibre/manual/cli/
 src/calibre/manual/template_ref.rst
@ -15,6 +16,7 @@ resources/ebook-convert-complete.pickle
 resources/builtin_recipes.xml
 resources/builtin_recipes.zip
 resources/template-functions.json
+resources/display/*.js
 setup/installer/windows/calibre/build.log
 src/calibre/translations/.errors
 src/cssutils/.svn/
--- a/Changelog.old.yaml
+++ b/Changelog.old.yaml
--- a/Changelog.yaml
+++ b/Changelog.yaml
--- a/recipes/abc_au.recipe
+++ b/recipes/abc_au.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Dean Cording'
+__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
 '''
 abc.net.au/news
 '''
@ -8,7 +8,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe

 class ABCNews(BasicNewsRecipe):
    title                  = 'ABC News'
-    __author__             = 'Dean Cording'
+    __author__             = 'Pat Stapleton, Dean Cording'
    description            = 'News from Australia'
    masthead_url           = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
    cover_url              = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
@ -23,7 +23,9 @@ class ABCNews(BasicNewsRecipe):
    category               = 'News, Australia, World'
    language               = 'en_AU'
    publication_type       = 'newsportal'
-    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+#    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+#Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
+    preprocess_regexps     = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
@ -32,23 +34,23 @@ class ABCNews(BasicNewsRecipe):
                            ,'linearize_tables': False
                         }

-    keep_only_tags    =  dict(id='article')
+    keep_only_tags = [dict(attrs={'class':['article section']})]

-    remove_tags = [dict(attrs={'class':['related', 'tags']}),
-                     dict(id='statepromo')
-                        ]
+    remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
+        'inline-content story left', 'inline-content map left contracted', 'published',
+        'story-map', 'statepromo', 'topics', ]})]

    remove_attributes = ['width','height']

    feeds          = [
-                      ('Top Stories', 'http://www.abc.net.au/news/syndicate/topstoriesrss.xml'),
-                      ('Canberra', 'http://www.abc.net.au/news/indexes/idx-act/rss.xml'),
-                      ('Sydney', 'http://www.abc.net.au/news/indexes/sydney/rss.xml'),
-                      ('Melbourne', 'http://www.abc.net.au/news/indexes/melbourne/rss.xml'),
-                      ('Brisbane', 'http://www.abc.net.au/news/indexes/brisbane/rss.xml'),
-                      ('Perth', 'http://www.abc.net.au/news/indexes/perth/rss.xml'),
-                      ('Australia', 'http://www.abc.net.au/news/indexes/idx-australia/rss.xml'),
-                      ('World', 'http://www.abc.net.au/news/indexes/world/rss.xml'),
-                      ('Business', 'http://www.abc.net.au/news/indexes/business/rss.xml'),
-                      ('Science and Technology', 'http://www.abc.net.au/news/tag/science-and-technology/rss.xml'),
+                      ('Top Stories', 'http://www.abc.net.au/news/feed/45910/rss.xml'),
+                      ('Canberra', 'http://www.abc.net.au/news/feed/6910/rss.xml'),
+                      ('Sydney', 'http://www.abc.net.au/news/feed/10232/rss.xml'),
+                      ('Melbourne', 'http://www.abc.net.au/news/feed/21708/rss.xml'),
+                      ('Brisbane', 'http://www.abc.net.au/news/feed/12858/rss.xml'),
+                      ('Perth', 'feed://www.abc.net.au/news/feed/24886/rss.xml'),
+                      ('Australia', 'http://www.abc.net.au/news/feed/46182/rss.xml'),
+                      ('World', 'http://www.abc.net.au/news/feed/52278/rss.xml'),
+                      ('Business', 'http://www.abc.net.au/news/feed/51892/rss.xml'),
+                      ('Science and Technology', 'http://www.abc.net.au/news/feed/2298/rss.xml'),
                    ]
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@ -1,19 +1,38 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
+import re
 class Adventure_zone(BasicNewsRecipe):
    title          = u'Adventure Zone'
    __author__        = 'fenuks'
    description   = 'Adventure zone - adventure games from A to Z'
    category       = 'games'
    language       = 'pl'
-    oldest_article = 15
-    max_articles_per_feed = 100
    no_stylesheets = True
+    oldest_article = 20
+    max_articles_per_feed = 100
+    use_embedded_content=False
+    preprocess_regexps     = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
    remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
-    remove_tags_after= dict(name='td', attrs={'class':'main-body middle-border'})
+    remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
+    remove_tags_after= dict(id='comments')
    extra_css              = '.main-bg{text-align: left;}  td.capmain{ font-size: 22px; }'
    feeds          = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]

+    def parse_feeds (self): 
+      feeds = BasicNewsRecipe.parse_feeds(self) 
+      soup=self.index_to_soup(u'http://www.adventure-zone.info/fusion/feeds/news.php')
+      tag=soup.find(name='channel')
+      titles=[]
+      for r in tag.findAll(name='image'):
+          r.extract()
+      art=tag.findAll(name='item')
+      for i in art:
+            titles.append(i.title.string)
+      for feed in feeds:
+        for article in feed.articles[:]:
+            article.title=titles[feed.articles.index(article)]
+      return feeds
+
+
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
        cover=soup.find(id='box_OstatninumerAZ')
@ -22,17 +41,10 @@ class Adventure_zone(BasicNewsRecipe):


    def skip_ad_pages(self, soup):
-        skip_tag = soup.body.findAll(name='a')
-        if skip_tag is not None:
-            for r in skip_tag:
-                 if 'articles.php?' in r['href']:
-                     if r.strong is not None:
-                         word=r.strong.string
-                         if ('zapowied' or 'recenzj') in word:
-                             return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item_id'+r['href'][r['href'].find('_id')+3:], raw=True)
-        else:
-            None
-
-    def print_version(self, url):
-        return url.replace('news.php?readmore', 'print.php?type=N&item_id')
-
+        skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
+        skip_tag = skip_tag.findAll(name='a')
+        for r in skip_tag:
+           if r.strong:
+                 word=r.strong.string
+                 if word and (('zapowied' in word) or ('recenzj' in word)  or ('solucj' in word)):
+                   return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
--- a/recipes/al_masry_al_youm.recipe
+++ b/recipes/al_masry_al_youm.recipe
@ -0,0 +1,50 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
+'''
+abc.net.au/news
+'''
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class TheDailyNewsEG(BasicNewsRecipe):
+    title          	   = u'al-masry al-youm'
+    __author__             = 'Omm Mishmishah'
+    description            = 'Independent News from Egypt'
+    masthead_url           = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
+    cover_url              = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
+
+    auto_cleanup           = True
+    oldest_article         = 7
+    max_articles_per_feed  = 100
+    no_stylesheets         = False
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'utf8'
+    publisher              = 'Independent News Egypt'
+    category               = 'News, Egypt, World'
+    language               = 'en_EG'
+    publication_type       = 'newsportal'
+#    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+#Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
+    preprocess_regexps     = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': False
+                         }
+
+    keep_only_tags = [dict(attrs={'class':['article section']})]
+
+    remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
+        'inline-content story left', 'inline-content map left contracted', 'published',
+        'story-map', 'statepromo', 'topics', ]})]
+
+    remove_attributes = ['width','height']
+
+    feeds          = [(u'English News', u'http://www.almasryalyoum.com/en/rss_feed_term/113/rss.xml'),
+                      (u'News Features', u'http://www.almasryalyoum.com/en/rss_feed_term/115/rss.xml'),
+                      (u'Culture', u'http://www.almasryalyoum.com/en/rss_feed_term/133/rss.xml'),
+                      (u'Cinema', u'http://www.almasryalyoum.com/en/rss_feed_term/134/rss.xml')
+                     ]
--- a/recipes/alternet.recipe
+++ b/recipes/alternet.recipe
@ -10,11 +10,11 @@ class Alternet(BasicNewsRecipe):
    category = 'News, Magazine'
    description = 'News magazine and online community'
    feeds          = [
-	(u'Front Page', u'http://feeds.feedblitz.com/alternet'),
-	(u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'),
-	(u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'),
-	(u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage')
-	]
+        (u'Front Page', u'http://feeds.feedblitz.com/alternet'),
+        (u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'),
+        (u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'),
+        (u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage')
+        ]
    remove_attributes = ['width', 'align','cellspacing']
    remove_javascript = True
    use_embedded_content   = False
@ -36,3 +36,5 @@ class Alternet(BasicNewsRecipe):
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
+
+    conversion_options = {'linearize_tables': True}
--- a/recipes/ap.recipe
+++ b/recipes/ap.recipe
@ -11,7 +11,6 @@ class AssociatedPress(BasicNewsRecipe):
    language = 'en'
    no_stylesheets = True
    max_articles_per_feed = 15
-    html2lrf_options = ['--force-page-break-before-tag="chapter"']


    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
--- a/recipes/astro_news_pl.recipe
+++ b/recipes/astro_news_pl.recipe
@ -1,5 +1,4 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
 class AstroNEWS(BasicNewsRecipe):
    title          = u'AstroNEWS'
    __author__        = 'fenuks'
@ -8,11 +7,16 @@ class AstroNEWS(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
-    auto_cleanup = True
+    #extra_css= 'table {text-align: left;}'
+    no_stylesheets=True
    cover_url='http://news.astronet.pl/img/logo_news.jpg'
-   # no_stylesheets= True
+    remove_tags=[dict(name='hr')]
    feeds          = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]

    def print_version(self, url):
        return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')

+    def preprocess_html(self, soup):
+        for item in soup.findAll(align=True):
+            del item['align']
+        return soup
--- a/recipes/bbc.recipe
+++ b/recipes/bbc.recipe
@ -1,61 +1,648 @@
-__license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+##
+## Title:        BBC News, Sport, and Blog Calibre Recipe
+## Contact:      mattst - jmstanfield@gmail.com
+##
+## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
+## Copyright:    mattst - jmstanfield@gmail.com
+##
+## Written:      November 2011
+## Last Edited:  2011-11-19
+##
+
+__license__     = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
+__copyright__   = 'mattst - jmstanfield@gmail.com'
+
+
 '''
-news.bbc.co.uk
+BBC News, Sport, and Blog Calibre Recipe
 '''
+
+# Import the regular expressions module.
 import re
+
+# Import the BasicNewsRecipe class which this class extends.
 from calibre.web.feeds.recipes import BasicNewsRecipe

-class BBC(BasicNewsRecipe):
-    title                  = 'BBC News'
-    __author__             = 'Darko Miletic, Starson17'
-    description            = 'News from UK. '
-    oldest_article         = 2
-    max_articles_per_feed  = 100
-    no_stylesheets         = True
-    #delay                  = 1
-    use_embedded_content   = False
-    encoding               = 'utf8'
-    publisher              = 'BBC'
-    category               = 'news, UK, world'
-    language               = 'en_GB'
-    publication_type       = 'newsportal'
-    extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
-    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
-    conversion_options = {
-                             'comments'        : description
-                            ,'tags'            : category
-                            ,'language'        : language
-                            ,'publisher'       : publisher
-                            ,'linearize_tables': True
+class BBCNewsSportBlog(BasicNewsRecipe):
+
+    #
+    #    **** IMPORTANT USERS READ ME ****
+    #
+    #  First select the feeds you want then scroll down below the feeds list
+    #  and select the values you want for the other user preferences, like
+    #  oldest_article and such like.
+    #
+    #
+    #  Select the BBC rss feeds which you want in your ebook.
+    #  Selected feed have NO '#' at their start, de-selected feeds begin with a '#'.
+    #
+    #  Eg.  ("News Home", "http://feeds.bbci.co.uk/... - include feed.
+    #  Eg. #("News Home", "http://feeds.bbci.co.uk/... - do not include feed.
+    #
+    # There are 68 feeds below which constitute the bulk of the available rss
+    # feeds on the BBC web site. These include 5 blogs by editors and
+    # correspondants, 16 sports feeds, 15 'sub' regional feeds (Eg. North West
+    # Wales, Scotland Business), and 7 Welsh language feeds.
+    #
+    # Some of the feeds are low volume (Eg. blogs), or very low volume (Eg. Click)
+    # so if "oldest_article = 1.5" (only articles published in the last 36 hours)
+    # you may get some 'empty feeds' which will not then be included in the ebook.
+    #
+    # The 15 feeds currently selected below are simply my default ones.
+    #
+    # Note: With all 68 feeds selected, oldest_article set to 2,
+    # max_articles_per_feed set to 100, and simultaneous_downloads set to 10,
+    # the ebook creation took 29 minutes on my speedy 100 mbps net connection,
+    # fairly high-end desktop PC running Linux (Ubuntu Lucid-Lynx).
+    # More realistically with 15 feeds selected, oldest_article set to 1.5,
+    # max_articles_per_feed set to 100, and simultaneous_downloads set to 20,
+    # it took 6 minutes. If that's too slow increase 'simultaneous_downloads'.
+    #
+    # Select / de-select the feeds you want in your ebook.
+    #
+    feeds = [
+              ("News Home", "http://feeds.bbci.co.uk/news/rss.xml"),
+              ("UK", "http://feeds.bbci.co.uk/news/uk/rss.xml"),
+              ("World", "http://feeds.bbci.co.uk/news/world/rss.xml"),
+              #("England", "http://feeds.bbci.co.uk/news/england/rss.xml"),
+              #("Scotland", "http://feeds.bbci.co.uk/news/scotland/rss.xml"),
+              #("Wales", "http://feeds.bbci.co.uk/news/wales/rss.xml"),
+              #("N. Ireland", "http://feeds.bbci.co.uk/news/northern_ireland/rss.xml"),
+              #("Africa", "http://feeds.bbci.co.uk/news/world/africa/rss.xml"),
+              #("Asia", "http://feeds.bbci.co.uk/news/world/asia/rss.xml"),
+              #("Europe", "http://feeds.bbci.co.uk/news/world/europe/rss.xml"),
+              #("Latin America", "http://feeds.bbci.co.uk/news/world/latin_america/rss.xml"),
+              #("Middle East", "http://feeds.bbci.co.uk/news/world/middle_east/rss.xml"),
+              ("US & Canada", "http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"),
+              ("Politics", "http://feeds.bbci.co.uk/news/politics/rss.xml"),
+              ("Science/Environment", "http://feeds.bbci.co.uk/news/science_and_environment/rss.xml"),
+              ("Technology", "http://feeds.bbci.co.uk/news/technology/rss.xml"),
+              ("Magazine", "http://feeds.bbci.co.uk/news/magazine/rss.xml"),
+              ("Entertainment/Arts", "http://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"),
+              #("Health", "http://feeds.bbci.co.uk/news/health/rss.xml"),
+              #("Education/Family", "http://feeds.bbci.co.uk/news/education/rss.xml"),
+              ("Business", "http://feeds.bbci.co.uk/news/business/rss.xml"),
+              ("Special Reports", "http://feeds.bbci.co.uk/news/special_reports/rss.xml"),
+              ("Also in the News", "http://feeds.bbci.co.uk/news/also_in_the_news/rss.xml"),
+              #("Newsbeat", "http://www.bbc.co.uk/newsbeat/rss.xml"),
+              #("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"),
+              ("Blog: Nick Robinson (Political Editor)", "http://feeds.bbci.co.uk/news/correspondents/nickrobinson/rss.sxml"),
+              #("Blog: Mark D'Arcy (Parliamentary Correspondent)", "http://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"),
+              #("Blog: Robert Peston (Business Editor)", "http://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"),
+              #("Blog: Stephanie Flanders (Economics Editor)", "http://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"),
+              ("Blog: Rory Cellan-Jones (Technology correspondent)", "http://feeds.bbci.co.uk/news/correspondents/rorycellanjones/rss.sxml"),
+              ("Sport Front Page", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml"),
+              #("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"),
+              #("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"),
+              #("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"),
+              #("Rugby League", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_league/rss.xml"),
+              #("Tennis", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/tennis/rss.xml"),
+              #("Golf", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/golf/rss.xml"),
+              #("Motorsport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/motorsport/rss.xml"),
+              #("Boxing", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/boxing/rss.xml"),
+              #("Athletics", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/athletics/rss.xml"),
+              #("Snooker", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/snooker/rss.xml"),
+              #("Horse Racing", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/horse_racing/rss.xml"),
+              #("Cycling", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/cycling/rss.xml"),
+              #("Disability Sport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/disability_sport/rss.xml"),
+              #("Other Sport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/rss.xml"),
+              #("Olympics 2012", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/olympics_2012/rss.xml"),
+              #("N. Ireland Politics", "http://feeds.bbci.co.uk/news/northern_ireland/northern_ireland_politics/rss.xml"),
+              #("Scotland Politics", "http://feeds.bbci.co.uk/news/scotland/scotland_politics/rss.xml"),
+              #("Scotland Business", "http://feeds.bbci.co.uk/news/scotland/scotland_business/rss.xml"),
+              #("E. Scotland, Edinburgh & Fife", "http://feeds.bbci.co.uk/news/scotland/edinburgh_east_and_fife/rss.xml"),
+              #("W. Scotland & Glasgow", "http://feeds.bbci.co.uk/news/scotland/glasgow_and_west/rss.xml"),
+              #("Highlands & Islands", "http://feeds.bbci.co.uk/news/scotland/highlands_and_islands/rss.xml"),
+              #("NE. Scotland, Orkney & Shetland", "http://feeds.bbci.co.uk/news/scotland/north_east_orkney_and_shetland/rss.xml"),
+              #("South Scotland", "http://feeds.bbci.co.uk/news/scotland/south_scotland/rss.xml"),
+              #("Central Scotland & Tayside", "http://feeds.bbci.co.uk/news/scotland/tayside_and_central/rss.xml"),
+              #("Wales Politics", "http://feeds.bbci.co.uk/news/wales/wales_politics/rss.xml"),
+              #("NW. Wales", "http://feeds.bbci.co.uk/news/wales/north_west_wales/rss.xml"),
+              #("NE. Wales", "http://feeds.bbci.co.uk/news/wales/north_east_wales/rss.xml"),
+              #("Mid. Wales", "http://feeds.bbci.co.uk/news/wales/mid_wales/rss.xml"),
+              #("SW. Wales", "http://feeds.bbci.co.uk/news/wales/south_west_wales/rss.xml"),
+              #("SE. Wales", "http://feeds.bbci.co.uk/news/wales/south_east_wales/rss.xml"),
+              #("Newyddion - News in Welsh", "http://feeds.bbci.co.uk/newyddion/rss.xml"),
+              #("Gwleidyddiaeth", "http://feeds.bbci.co.uk/newyddion/gwleidyddiaeth/rss.xml"),
+              #("Gogledd-Ddwyrain", "http://feeds.bbci.co.uk/newyddion/gogledd-ddwyrain/rss.xml"),
+              #("Gogledd-Orllewin", "http://feeds.bbci.co.uk/newyddion/gogledd-orllewin/rss.xml"),
+              #("Canolbarth", "http://feeds.bbci.co.uk/newyddion/canolbarth/rss.xml"),
+              #("De-Ddwyrain", "http://feeds.bbci.co.uk/newyddion/de-ddwyrain/rss.xml"),
+              #("De-Orllewin", "http://feeds.bbci.co.uk/newyddion/de-orllewin/rss.xml"),
+            ]
+
+
+    #    **** SELECT YOUR USER PREFERENCES ****
+
+    # Title to use for the ebook.
+    #
+    title = 'BBC News'
+
+    # A brief description for the ebook.
+    #
+    description = u'BBC web site ebook created using rss feeds.'
+
+    # The max number of articles which may be downloaded from each feed.
+    # I've never seen more than about 70 articles in a single feed in the
+    # BBC feeds.
+    #
+    max_articles_per_feed = 100
+
+    # The max age of articles which may be downloaded from each feed. This is
+    # specified in days - note fractions of days are allowed, Eg. 2.5 (2 and a
+    # half days). My default of 1.5 days is the last 36 hours, the point at
+    # which I've decided 'news' becomes 'old news', but be warned this is not
+    # so good for the blogs, technology, magazine, etc., and sports feeds.
+    # You may wish to extend this to 2-5 but watch out ebook creation time will
+    # increase as well. Setting this to 30 will get everything (AFAICT) as long
+    # as max_articles_per_feed remains set high (except for 'Click' which is
+    # v. low volume and its currently oldest article is 4th Feb 2011).
+    #
+    oldest_article = 1.5
+
+    # Number of simultaneous downloads. 20 is consistantly working fine on the
+    # BBC News feeds with no problems. Speeds things up from the defualt of 5.
+    # If you have a lot of feeds and/or have increased oldest_article above 2
+    # then you may wish to try increasing simultaneous_downloads to 25-30,
+    # Or, of course, if you are in a hurry. [I've not tried beyond 20.]
+    #
+    simultaneous_downloads = 20
+
+    # Timeout for fetching files from the server in seconds. The default of
+    # 120 seconds, seems somewhat excessive.
+    #
+    timeout = 30
+
+    # The format string for the date shown on the ebook's first page.
+    # List of all values: http://docs.python.org/library/time.html
+    # Default in news.py has a leading space so that's mirrored here.
+    # As with 'feeds' select/de-select by adding/removing the initial '#',
+    # only one timefmt should be selected, here's a few to choose from.
+    #
+    timefmt = ' [%a, %d %b %Y]'              # [Fri, 14 Nov 2011] (Calibre default)
+    #timefmt = ' [%a, %d %b %Y %H:%M]'       # [Fri, 14 Nov 2011 18:30]
+    #timefmt = ' [%a, %d %b %Y %I:%M %p]'    # [Fri, 14 Nov 2011 06:30 PM]
+    #timefmt = ' [%d %b %Y]'                 # [14 Nov 2011]
+    #timefmt = ' [%d %b %Y %H:%M]'           # [14 Nov 2011 18.30]
+    #timefmt = ' [%Y-%m-%d]'                 # [2011-11-14]
+    #timefmt = ' [%Y-%m-%d-%H-%M]'           # [2011-11-14-18-30]
+
+
+
+    #
+    #    **** IMPORTANT ****
+    #
+    #    DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
+    #
+    #    DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
+    #
+    #    I MEAN IT, YES I DO, ABSOLUTELY, AT YOU OWN RISK. :)
+    #
+    #    **** IMPORTANT ****
+    #
+
+
+
+    # Author of this recipe.
+    __author__ = 'mattst'
+
+    # Specify English as the language of the RSS feeds (ISO-639 code).
+    language = 'en_GB'
+
+    # Set tags.
+    tags = 'news, sport, blog'
+
+    # Set publisher and publication type.
+    publisher = 'BBC'
+    publication_type = 'newspaper'
+
+    # Disable stylesheets from site.
+    no_stylesheets = True
+
+    # Specifies an override encoding for sites that have an incorrect charset
+    # specified. Default of 'None' says to auto-detect. Some other BBC recipes
+    # use 'utf8', which works fine (so use that if necessary) but auto-detecting
+    # with None is working fine, so stick with that for robustness.
+    encoding = None
+
+    # Sets whether a feed has full articles embedded in it. The BBC feeds do not.
+    use_embedded_content = False
+
+    # Removes empty feeds - why keep them!?
+    remove_empty_feeds = True
+
+    # Create a custom title which fits nicely in the Kindle title list.
+    # Requires "import time" above class declaration, and replacing
+    # title with custom_title in conversion_options (right column only).
+    # Example of string below: "BBC News - 14 Nov 2011"
+    #
+    # custom_title = "BBC News - " + time.strftime('%d %b %Y')
+
+    '''
+    # Conversion options for advanced users, but don't forget to comment out the
+    # current conversion_options below. Avoid setting 'linearize_tables' as that
+    # plays havoc with the 'old style' table based pages.
+    #
+    conversion_options = { 'title'       : title,
+                           'comments'    : description,
+                           'tags'        : tags,
+                           'language'    : language,
+                           'publisher'   : publisher,
+                           'authors'     : publisher,
+                           'smarten_punctuation' : True
                         }
+    '''

-    keep_only_tags    = [
-                       dict(name='div', attrs={'class':['layout-block-a layout-block']})
-                       ,dict(attrs={'class':['story-body','storybody']})
-                        ]
+    conversion_options = { 'smarten_punctuation' : True }

-    remove_tags = [
-                       dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper',
-                                                    'story-feature wide ', 'story-feature narrow']}),
-                       dict(id=['hypertab', 'comment-form']),
-                        ]
+    # Specify extra CSS - overrides ALL other CSS (IE. Added last).
+    extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
+                 .introduction, .first { font-weight: bold; } \
+                 .cross-head { font-weight: bold; font-size: 125%; } \
+                 .cap, .caption { display: block; font-size: 80%; font-style: italic; } \
+                 .cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } \
+                 .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
+                    .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
+                    text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } \
+                 .story-date, .published { font-size: 80%; } \
+                 table { width: 100%; } \
+                 td img { display: block; margin: 5px auto; } \
+                 ul { padding-top: 10px; } \
+                 ol { padding-top: 10px; } \
+                 li { padding-top: 5px; padding-bottom: 5px; } \
+                 h1 { text-align: center; font-size: 175%; font-weight: bold; } \
+                 h2 { text-align: center; font-size: 150%; font-weight: bold; } \
+                 h3 { text-align: center; font-size: 125%; font-weight: bold; } \
+                 h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'

-    remove_attributes = ['width','height']
+    # Remove various tag attributes to improve the look of the ebook pages.
+    remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
+                          'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]

-    feeds          = [
-                      ('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
-                      ('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
-                      ('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
-                      ('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
-                      ('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
-                      ('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
-                      ('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
-                      ('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
-                      ('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
-                      ('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
-                      ('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
-                      ('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
-                      ('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
-                    ]
+    # Remove the (admittedly rarely used) line breaks, "<br />", which sometimes
+    # cause a section of the ebook to start in an unsightly fashion or, more
+    # frequently, a "<br />" will muck up the formatting of a correspondant's byline.
+    # "<br />" and "<br clear/>" are far more frequently used on the table formatted
+    # style of pages, and really spoil the look of the ebook pages.
+    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
+                              (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: '')]

+
+    # Create regular expressions for tag keeping and removal to make the matches more
+    # robust against minor changes and errors in the HTML, Eg. double spaces, leading
+    # and trailing spaces, missing hyphens, and such like.
+    # Python regular expression ('re' class) page: http://docs.python.org/library/re.html
+
+    # ***************************************
+    # Regular expressions for keep_only_tags:
+    # ***************************************
+
+    # The BBC News HTML pages use variants of 'storybody' to denote the section of a HTML
+    # page which contains the main text of the article. Match storybody variants: 'storybody',
+    # 'story-body', 'story body','storybody ', etc.
+    storybody_reg_exp = '^.*story[_ -]*body.*$'
+
+    # The BBC sport and 'newsbeat' (features) HTML pages use 'blq_content' to hold the title
+    # and published date. This is one level above the usual news pages which have the title
+    # and date within 'story-body'. This is annoying since 'blq_content' must also be kept,
+    # resulting in a lot of extra things to be removed by remove_tags.
+    blq_content_reg_exp = '^.*blq[_ -]*content.*$'
+
+    # The BBC has an alternative page design structure, which I suspect is an out-of-date
+    # design but which is still used in some articles, Eg. 'Click' (technology), 'FastTrack'
+    # (travel), and in some sport pages. These alternative pages are table based (which is
+    # why I think they are an out-of-date design) and account for -I'm guesstimaking- less
+    # than 1% of all articles. They use a table class 'storycontent' to hold the article
+    # and like blq_content (above) have required lots of extra removal by remove_tags.
+    story_content_reg_exp = '^.*story[_ -]*content.*$'
+
+    # Keep the sections of the HTML which match the list below. The HTML page created by
+    # Calibre will fill <body> with those sections which are matched. Note that the
+    # blq_content_reg_exp must be listed before storybody_reg_exp in keep_only_tags due to
+    # it being the parent of storybody_reg_exp, that is to say the div class/id 'story-body'
+    # will be inside div class/id 'blq_content' in the HTML (if 'blq_content' is there at
+    # all). If they are the other way around in keep_only_tags then blq_content_reg_exp
+    # will end up being discarded.
+    keep_only_tags = [ dict(name='table', attrs={'class':re.compile(story_content_reg_exp, re.IGNORECASE)}),
+                       dict(name='div',   attrs={'class':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
+                       dict(name='div',   attrs={'id':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
+                       dict(name='div',   attrs={'class':re.compile(storybody_reg_exp, re.IGNORECASE)}),
+                       dict(name='div',   attrs={'id':re.compile(storybody_reg_exp, re.IGNORECASE)}) ]
+
+    # ************************************
+    # Regular expressions for remove_tags:
+    # ************************************
+
+    # Regular expression to remove share-help and variant tags. The share-help class
+    # is used by the site for a variety of 'sharing' type links, Eg. Facebook, delicious,
+    # twitter, email. Removed to avoid page clutter.
+    share_help_reg_exp = '^.*share[_ -]*help.*$'
+
+    # Regular expression to remove embedded-hyper and variant tags. This class is used to
+    # display links to other BBC News articles on the same/similar subject.
+    embedded_hyper_reg_exp = '^.*embed*ed[_ -]*hyper.*$'
+
+    # Regular expression to remove hypertabs and variant tags. This class is used to
+    # display a tab bar at the top of an article which allows the user to switch to
+    # an article (viewed on the same page) providing further info., 'in depth' analysis,
+    # an editorial, a correspondant's blog entry, and such like. The ability to handle
+    # a tab bar of this nature is currently beyond the scope of this recipe and
+    # possibly of Calibre itself (not sure about that - TO DO - check!).
+    hypertabs_reg_exp = '^.*hyper[_ -]*tabs.*$'
+
+    # Regular expression to remove story-feature and variant tags. Eg. 'story-feature',
+    # 'story-feature related narrow', 'story-feature wide', 'story-feature narrow'.
+    # This class is used to add additional info. boxes, or small lists, outside of
+    # the main story. TO DO: Work out a way to incorporate these neatly.
+    story_feature_reg_exp = '^.*story[_ -]*feature.*$'
+
+    # Regular expression to remove video and variant tags, Eg. 'videoInStoryB',
+    # 'videoInStoryC'. This class is used to embed video.
+    video_reg_exp = '^.*video.*$'
+
+    # Regular expression to remove audio and variant tags, Eg. 'audioInStoryD'.
+    # This class is used to embed audio.
+    audio_reg_exp = '^.*audio.*$'
+
+    # Regular expression to remove pictureGallery and variant tags, Eg. 'pictureGallery'.
+    # This class is used to embed a photo slideshow. See also 'slideshow' below.
+    picture_gallery_reg_exp = '^.*picture.*$'
+
+    # Regular expression to remove slideshow and variant tags, Eg. 'dslideshow-enclosure'.
+    # This class is used to embed a slideshow (not necessarily photo) but both
+    # 'slideshow' and 'pictureGallery' are used for slideshows.
+    slideshow_reg_exp = '^.*slide[_ -]*show.*$'
+
+    # Regular expression to remove social-links and variant tags. This class is used to
+    # display links to a BBC bloggers main page, used in various columnist's blogs
+    # (Eg. Nick Robinson, Robert Preston).
+    social_links_reg_exp = '^.*social[_ -]*links.*$'
+
+    # Regular expression to remove quote and (multi) variant tags, Eg. 'quote',
+    # 'endquote', 'quote-credit', 'quote-credit-title', etc. These are usually
+    # removed by 'story-feature' removal (as they are usually within them), but
+    # not always. The quotation removed is always (AFAICT) in the article text
+    # as well but a 2nd copy is placed in a quote tag to draw attention to it.
+    # The quote class tags may or may not appear in div's.
+    quote_reg_exp = '^.*quote.*$'
+
+    # Regular expression to remove hidden and variant tags, Eg. 'hidden'.
+    # The purpose of these is unclear, they seem to be an internal link to a
+    # section within the article, but the text of the link (Eg. 'Continue reading
+    # the main story') never seems to be displayed anyway. Removed to avoid clutter.
+    # The hidden class tags may or may not appear in div's.
+    hidden_reg_exp = '^.*hidden.*$'
+
+    # Regular expression to remove comment and variant tags, Eg. 'comment-introduction'.
+    # Used on the site to display text about registered users entering comments.
+    comment_reg_exp = '^.*comment.*$'
+
+    # Regular expression to remove form and variant tags, Eg. 'comment-form'.
+    # Used on the site to allow registered BBC users to fill in forms, typically
+    # for entering comments about an article.
+    form_reg_exp = '^.*form.*$'
+
+    # Extra things to remove due to the addition of 'blq_content' in keep_only_tags.
+
+    #<div class="story-actions"> Used on sports pages for 'email' and 'print'.
+    story_actions_reg_exp = '^.*story[_ -]*actions.*$'
+
+    #<div class="bookmark-list"> Used on sports pages instead of 'share-help' (for
+    # social networking links).
+    bookmark_list_reg_exp = '^.*bookmark[_ -]*list.*$'
+
+    #<div id="secondary-content" class="content-group">
+    # NOTE: Don't remove class="content-group" that is needed.
+    # Used on sports pages to link to 'similar stories'.
+    secondary_content_reg_exp = '^.*secondary[_ -]*content.*$'
+
+    #<div id="featured-content" class="content-group">
+    # NOTE: Don't remove class="content-group" that is needed.
+    # Used on sports pages to link to pages like 'tables', 'fixtures', etc.
+    featured_content_reg_exp = '^.*featured[_ -]*content.*$'
+
+    #<div id="navigation">
+    # Used on sports pages to link to pages like 'tables', 'fixtures', etc.
+    # Used sometimes instead of "featured-content" above.
+    navigation_reg_exp = '^.*navigation.*$'
+
+    #<a class="skip" href="#blq-container-inner">Skip to top</a>
+    # Used on sports pages to link to the top of the page.
+    skip_reg_exp = '^.*skip.*$'
+
+    # Extra things to remove due to the addition of 'storycontent' in keep_only_tags,
+    # which are the alterative table design based pages. The purpose of some of these
+    # is not entirely clear from the pages (which are a total mess!).
+
+    # Remove mapping based tags, Eg. <map id="world_map">
+    # The dynamic maps don't seem to work during ebook creation. TO DO: Investigate.
+    map_reg_exp = '^.*map.*$'
+
+    # Remove social bookmarking variation, called 'socialBookMarks'.
+    social_bookmarks_reg_exp = '^.*social[_ -]*bookmarks.*$'
+
+    # Remove page navigation tools, like 'search', 'email', 'print', called 'blq-mast'.
+    blq_mast_reg_exp = '^.*blq[_ -]*mast.*$'
+
+    # Remove 'sharesb', I think this is a generic 'sharing' class. It seems to appear
+    # alongside 'socialBookMarks' whenever that appears. I am removing it as well
+    # under the assumption that it can appear alone as well.
+    sharesb_reg_exp = '^.*sharesb.*$'
+
+    # Remove class 'o'. The worst named user created css class of all time. The creator
+    # should immediately be fired. I've seen it used to hold nothing at all but with
+    # 20 or so empty lines in it. Also to hold a single link to another article.
+    # Whatever it was designed to do it is not wanted by this recipe. Exact match only.
+    o_reg_exp = '^o$'
+
+    # Remove 'promotopbg' and 'promobottombg', link lists. Have decided to
+    # use two reg expressions to make removing this (and variants) robust.
+    promo_top_reg_exp = '^.*promotopbg.*$'
+    promo_bottom_reg_exp = '^.*promobottombg.*$'
+
+    # Remove 'nlp', provides heading for link lists. Requires an exact match due to
+    # risk of matching those letters in something needed, unless I see a variation
+    # of 'nlp' used at a later date.
+    nlp_reg_exp = '^nlp$'
+
+    # Remove 'mva', provides embedded floating content of various types. Variant 'mvb'
+    # has also now been seen. Requires an exact match of 'mva' or 'mvb' due to risk of
+    # matching those letters in something needed.
+    mva_or_mvb_reg_exp = '^mv[ab]$'
+
+    # Remove 'mvtb', seems to be page navigation tools, like 'blq-mast'.
+    mvtb_reg_exp = '^mvtb$'
+
+    # Remove 'blq-toplink', class to provide a link to the top of the page.
+    blq_toplink_reg_exp = '^.*blq[_ -]*top[_ -]*link.*$'
+
+    # Remove 'products and services' links, Eg. desktop tools, alerts, and so on.
+    # Eg. Class="servicev4 ukfs_services" - what a mess of a name. Have decided to
+    # use two reg expressions to make removing this (and variants) robust.
+    prods_services_01_reg_exp = '^.*servicev4.*$'
+    prods_services_02_reg_exp = '^.*ukfs[_ -]*services.*$'
+
+    # Remove -what I think is- some kind of navigation tools helper class, though I am
+    # not sure, it's called: 'blq-rst blq-new-nav'. What I do know is it pops up
+    # frequently and it is not wanted. Have decided to use two reg expressions to make
+    # removing this (and variants) robust.
+    blq_misc_01_reg_exp = '^.*blq[_ -]*rst.*$'
+    blq_misc_02_reg_exp = '^.*blq[_ -]*new[_ -]*nav.*$'
+
+    # Remove 'puffbox' - this may only appear inside 'storyextra', so it may not
+    # need removing - I have no clue what it does other than it contains links.
+    # Whatever it is - it is not part of the article and is not wanted.
+    puffbox_reg_exp = '^.*puffbox.*$'
+
+    # Remove 'sibtbg' and 'sibtbgf' - some kind of table formatting classes.
+    sibtbg_reg_exp = '^.*sibtbg.*$'
+
+    # Remove 'storyextra' - links to relevant articles and external sites.
+    storyextra_reg_exp = '^.*story[_ -]*extra.*$'
+
+
+    remove_tags = [ dict(name='div',  attrs={'class':re.compile(story_feature_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(share_help_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(embedded_hyper_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(hypertabs_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(video_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(audio_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(picture_gallery_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(slideshow_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(story_actions_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(bookmark_list_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'id':re.compile(secondary_content_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'id':re.compile(featured_content_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'id':re.compile(navigation_reg_exp, re.IGNORECASE)}),
+                    dict(name='form', attrs={'id':re.compile(form_reg_exp, re.IGNORECASE)}),
+                    dict(attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
+                    dict(attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
+                    dict(attrs={'class':re.compile(social_links_reg_exp, re.IGNORECASE)}),
+                    dict(attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
+                    dict(attrs={'class':re.compile(skip_reg_exp, re.IGNORECASE)}),
+                    dict(name='map', attrs={'id':re.compile(map_reg_exp, re.IGNORECASE)}),
+                    dict(name='map', attrs={'name':re.compile(map_reg_exp, re.IGNORECASE)}),
+                    dict(name='div', attrs={'id':re.compile(social_bookmarks_reg_exp, re.IGNORECASE)}),
+                    dict(name='div', attrs={'id':re.compile(blq_mast_reg_exp, re.IGNORECASE)}),
+                    dict(name='div', attrs={'class':re.compile(sharesb_reg_exp, re.IGNORECASE)}),
+                    dict(name='div', attrs={'class':re.compile(o_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(promo_top_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(promo_bottom_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(nlp_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(mva_or_mvb_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(mvtb_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(blq_toplink_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(prods_services_01_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(prods_services_02_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(blq_misc_01_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(blq_misc_02_reg_exp, re.IGNORECASE)}),
+                    dict(name='div',  attrs={'class':re.compile(puffbox_reg_exp, re.IGNORECASE)}),
+                    dict(attrs={'class':re.compile(sibtbg_reg_exp, re.IGNORECASE)}),
+                    dict(attrs={'class':re.compile(storyextra_reg_exp, re.IGNORECASE)})
+                  ]
+
+    # Uses url to create and return the 'printer friendly' version of the url.
+    # In other words the 'print this page' address of the page.
+    #
+    # There are 3 types of urls used in the BBC site's rss feeds. There is just
+    # 1 type for the standard news while there are 2 used for sports feed urls.
+    # Note: Sports urls are linked from regular news feeds (Eg. 'News Home') when
+    # there is a major story of interest to 'everyone'. So even if no BBC sports
+    # feeds are added to 'feeds' the logic of this method is still needed to avoid
+    # blank / missing / empty articles which have an index title and then no body.
+    def print_version(self, url):
+
+        # Handle sports page urls type 01:
+        if (url.find("go/rss/-/sport1/") != -1):
+            temp_url = url.replace("go/rss/-/", "")
+
+        # Handle sports page urls type 02:
+        elif (url.find("go/rss/int/news/-/sport1/") != -1):
+            temp_url = url.replace("go/rss/int/news/-/", "")
+
+        # Handle regular news page urls:
+        else:
+            temp_url = url.replace("go/rss/int/news/-/", "")
+
+        # Always add "?print=true" to the end of the url.
+        print_url = temp_url + "?print=true"
+
+        return print_url
+
+
+    # Remove articles in feeds based on a string in the article title or url.
+    #
+    # Code logic written by: Starson17 - posted in: "Recipes - Re-usable code"
+    # thread, in post with title: "Remove articles from feed", see url:
+    # http://www.mobileread.com/forums/showpost.php?p=1165462&postcount=6
+    # Many thanks and all credit to Starson17.
+    #
+    # Starson17's code has obviously been altered to suite my requirements.
+    def parse_feeds(self):
+
+        # Call parent's method.
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        # Loop through all feeds.
+        for feed in feeds:
+
+            # Loop through all articles in feed.
+            for article in feed.articles[:]:
+
+                # Match key words and remove article if there's a match.
+
+                # Most BBC rss feed video only 'articles' use upper case 'VIDEO'
+                # as a title prefix. Just match upper case 'VIDEO', so that
+                # articles like 'Video game banned' won't be matched and removed.
+                if 'VIDEO' in article.title:
+                    feed.articles.remove(article)
+
+                # Most BBC rss feed audio only 'articles' use upper case 'AUDIO'
+                # as a title prefix. Just match upper case 'AUDIO', so that
+                # articles like 'Hi-Def audio...' won't be matched and removed.
+                elif 'AUDIO' in article.title:
+                    feed.articles.remove(article)
+
+                # Most BBC rss feed photo slideshow 'articles' use 'In Pictures',
+                # 'In pictures', and 'in pictures', somewhere in their title.
+                # Match any case of that phrase.
+                elif 'IN PICTURES' in article.title.upper():
+                    feed.articles.remove(article)
+
+                # As above, but user contributed pictures. Match any case.
+                elif 'YOUR PICTURES' in article.title.upper():
+                    feed.articles.remove(article)
+
+                # 'Sportsday Live' are articles which contain a constantly and
+                # dynamically updated 'running commentary' during a live sporting
+                # event. Match any case.
+                elif 'SPORTSDAY LIVE' in article.title.upper():
+                    feed.articles.remove(article)
+
+                # Sometimes 'Sportsday Live' (above) becomes 'Live - Sport Name'.
+                # These are being matched below using 'Live - ' because removing all
+                # articles with 'live' in their titles would remove some articles
+                # that are in fact not live sports pages. Match any case.
+                elif 'LIVE - ' in article.title.upper():
+                    feed.articles.remove(article)
+
+                # 'Quiz of the week' is a Flash player weekly news quiz. Match only
+                # the 'Quiz of the' part in anticipation of monthly and yearly
+                # variants. Match any case.
+                elif 'QUIZ OF THE' in article.title.upper():
+                    feed.articles.remove(article)
+
+                # Remove articles with 'scorecards' in the url. These are BBC sports
+                # pages which just display a cricket scorecard. The pages have a mass
+                # of table and css entries to display the scorecards nicely. Probably
+                # could make them work with this recipe, but might take a whole day
+                # of work to sort out all the css - basically a formatting nightmare.
+                elif 'scorecards' in article.url:
+                    feed.articles.remove(article)
+
+        return feeds
+
+# End of class and file.
--- a/recipes/berliner_zeitung.recipe
+++ b/recipes/berliner_zeitung.recipe
@ -1,61 +1,44 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
-import re
+
+'''Calibre recipe to convert the RSS feeds of the Berliner Zeitung to an ebook.'''

 class SportsIllustratedRecipe(BasicNewsRecipe) :
-    __author__    = 'ape'
-    __copyright__ = 'ape'
+    __author__    = 'a.peter'
+    __copyright__ = 'a.peter'
    __license__   = 'GPL v3'
    language      = 'de'
-    description   = 'Berliner Zeitung'
-    version       = 2
+    description   = 'Berliner Zeitung RSS'
+    version       = 4
    title         = u'Berliner Zeitung'
    timefmt       = ' [%d.%m.%Y]'

+    #oldest_article = 7.0
    no_stylesheets = True
    remove_javascript = True
    use_embedded_content = False
    publication_type = 'newspaper'

-    keep_only_tags = [dict(name='div', attrs={'class':'teaser t_split t_artikel'})]
+    remove_tags_before = dict(name='div', attrs={'class':'newstype'})
+    remove_tags_after = [dict(id='article_text')]

-    INDEX = 'http://www.berlinonline.de/berliner-zeitung/'
-
-    def parse_index(self):
-        base = 'http://www.berlinonline.de'
-        answer = []
-        articles = {}
-        more = 1
-
-        soup = self.index_to_soup(self.INDEX)
-
-        # Get list of links to ressorts from index page
-        ressort_list = soup.findAll('ul', attrs={'class': re.compile('ressortlist')})
-        for ressort in ressort_list[0].findAll('a'):
-            feed_title = ressort.string
-            print 'Analyzing', feed_title
-            if not articles.has_key(feed_title):
-                articles[feed_title] = []
-                answer.append(feed_title)
-            # Load ressort page.
-            feed = self.index_to_soup('http://www.berlinonline.de' + ressort['href'])
-            # find mainbar div which contains the list of all articles
-            for article_container in feed.findAll('div', attrs={'class': re.compile('mainbar')}):
-                # iterate over all articles
-                for article_teaser in article_container.findAll('div', attrs={'class': re.compile('teaser')}):
-                    # extract title of article
-                    if article_teaser.h3 != None:
-                        article = {'title' : article_teaser.h3.a.string, 'date' : u'', 'url'  : base + article_teaser.h3.a['href'], 'description' : u''}
-                        articles[feed_title].append(article)
-                    else:
-                        # Skip teasers for missing photos
-                        if article_teaser.div.p.contents[0].find('Foto:') > -1:
-                            continue
-                        article = {'title': 'Weitere Artikel ' + str(more), 'date': u'', 'url': base + article_teaser.div.p.a['href'], 'description': u''}
-                        articles[feed_title].append(article)
-                        more += 1
-        answer = [[key, articles[key]] for key in answer if articles.has_key(key)]
-        return answer
+    feeds = [(u'Startseite', u'http://www.berliner-zeitung.de/home/10808950,10808950,view,asFeed.xml'),
+             (u'Politik', u'http://www.berliner-zeitung.de/home/10808018,10808018,view,asFeed.xml'),
+             (u'Wirtschaft', u'http://www.berliner-zeitung.de/home/10808230,10808230,view,asFeed.xml'),
+             (u'Berlin', u'http://www.berliner-zeitung.de/home/10809148,10809148,view,asFeed.xml'),
+             (u'Brandenburg', u'http://www.berliner-zeitung.de/home/10809312,10809312,view,asFeed.xml'),
+             (u'Wissenschaft', u'http://www.berliner-zeitung.de/home/10808894,10808894,view,asFeed.xml'),
+             (u'Digital', u'http://www.berliner-zeitung.de/home/10808718,10808718,view,asFeed.xml'),
+             (u'Kultur', u'http://www.berliner-zeitung.de/home/10809150,10809150,view,asFeed.xml'),
+             (u'Panorama', u'http://www.berliner-zeitung.de/home/10808334,10808334,view,asFeed.xml'),
+             (u'Sport', u'http://www.berliner-zeitung.de/home/10808794,10808794,view,asFeed.xml'),
+             (u'Hertha', u'http://www.berliner-zeitung.de/home/10808800,10808800,view,asFeed.xml'),
+             (u'Union', u'http://www.berliner-zeitung.de/home/10808802,10808802,view,asFeed.xml'),
+             (u'Verkehr', u'http://www.berliner-zeitung.de/home/10809298,10809298,view,asFeed.xml'),
+             (u'Polizei', u'http://www.berliner-zeitung.de/home/10809296,10809296,view,asFeed.xml'),
+             (u'Meinung', u'http://www.berliner-zeitung.de/home/10808020,10808020,view,asFeed.xml')]

    def get_masthead_url(self):
-        return 'http://www.berlinonline.de/.img/berliner-zeitung/blz_logo.gif'
+        return 'http://www.berliner-zeitung.de/image/view/10810244,7040611,data,logo.png'

+    def print_version(self, url):
+        return url.replace('.html', ',view,printVersion.html')
--- a/recipes/biamag.recipe
+++ b/recipes/biamag.recipe
@ -0,0 +1,38 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+bianet.com.tr
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Radikal_tr(BasicNewsRecipe):
+    title                 = 'BiaMag'
+    __author__            = 'Osman Kaysan'
+    description           = 'Independent News from Turkey'
+    publisher             = 'BiaMag'
+    category              = 'news, politics, Turkey'
+    oldest_article        = 15
+    max_articles_per_feed = 120
+    masthead_url          = 'http://bianet.org/images/biamag_logo.gif'
+    language              = 'tr'
+    no_stylesheets        = True
+
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': True
+                ,'remove_paragraph_spacing': True,
+                          }
+
+    remove_tags_before  = dict(name='div', attrs={'class':'manset'})
+    remove_tags = [ dict(name='ul', attrs={'class':['altul']}), dict(name='div', attrs={'id':['habermenu']}), dict(name='div', attrs={'class':['mail']}), dict(name='div', attrs={'class':['from']})]
+    remove_tags_after   = dict(name='div', attrs={'id':'habermenu'})
+
+    feeds = [(u'BiaMag', u'http://www.bianet.org/biamag.rss')]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/recipes/biamag_en.recipe
+++ b/recipes/biamag_en.recipe
@ -0,0 +1,38 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+bianet.com.tr
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Radikal_tr(BasicNewsRecipe):
+    title                 = 'Bianet-English'
+    __author__            = 'Osman Kaysan'
+    description           = 'Independent News Network from Turkey(English)'
+    publisher             = 'Bianet'
+    category              = 'news, politics, Turkey'
+    oldest_article        = 7
+    max_articles_per_feed = 150
+    masthead_url          = 'http://bianet.org/images/english_logo.gif'
+    language              = 'en_TR'
+    no_stylesheets        = True
+
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': True
+                ,'remove_paragraph_spacing': True,
+                          }
+
+    remove_tags_before  = dict(name='div', attrs={'class':'manset'})
+    remove_tags = [ dict(name='ul', attrs={'class':['altul']}), dict(name='div', attrs={'id':['habermenu']}), dict(name='div', attrs={'class':['mail']}), dict(name='div', attrs={'class':['from']})]
+    remove_tags_after   = dict(name='div', attrs={'id':'habermenu'})
+
+    feeds = [(u'Bianet-English', u'http://www.bianet.org/english.rss')]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/recipes/bianet.recipe
+++ b/recipes/bianet.recipe
@ -0,0 +1,38 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+bianet.com.tr
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Radikal_tr(BasicNewsRecipe):
+    title                 = 'Bianet'
+    __author__            = 'Osman Kaysan'
+    description           = 'Independent News from Turkey'
+    publisher             = 'Bianet'
+    category              = 'news, politics, Turkey'
+    oldest_article        = 7
+    max_articles_per_feed = 120
+    masthead_url          = 'http://bianet.org/images/bianet_logo.gif'
+    language              = 'tr'
+    no_stylesheets        = True
+
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': True
+                ,'remove_paragraph_spacing': True,
+                          }
+
+    remove_tags_before  = dict(name='div', attrs={'class':'manset'})
+    remove_tags = [ dict(name='ul', attrs={'class':['altul']}), dict(name='div', attrs={'id':['habermenu']}), dict(name='div', attrs={'class':['mail']}), dict(name='div', attrs={'class':['from']})]
+    remove_tags_after   = dict(name='div', attrs={'id':'habermenu'})
+
+    feeds = [(u'Bianet', u'http://bianet.org/bianet.rss')]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/recipes/biolog_pl.recipe
+++ b/recipes/biolog_pl.recipe
@ -0,0 +1,19 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class Biolog_pl(BasicNewsRecipe):
+    title          = u'Biolog.pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_empty_feeds=True
+    __author__        = 'fenuks'
+    description   = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.'
+    category       = 'biology'
+    language       = 'pl'
+    cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
+    no_stylesheets = True
+    #keeps_only_tags=[dict(id='main')]
+    remove_tags_before=dict(id='main')
+    remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
+    remove_tags=[dict(name='img', attrs={'alt':'Komentarze'})]
+    feeds          = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')]
--- a/recipes/birgun_gazetesi.recipe
+++ b/recipes/birgun_gazetesi.recipe
@ -0,0 +1,50 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Birgun (BasicNewsRecipe):
+
+    title                  = u'Birgün Gazetesi'
+    __author__             = u'Osman Kaysan'
+    oldest_article         = 7
+    max_articles_per_feed  =150
+    use_embedded_content  = False
+    description           = 'Birgun gazatesi haberleri, kose yazarlari'
+    publisher              = 'Birgün'
+    category               = 'news,haberler,turkce,gazete,birgun'
+    language               = 'tr'
+    no_stylesheets        = True
+    publication_type = 'newspaper'
+
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': True
+                ,'remove_paragraph_spacing': True,
+                          }
+
+    cover_img_url = 'http://www.birgun.net/i/birgun.png'
+    masthead_url = 'http://www.birgun.net/i/birgun.png'
+
+    remove_attributes = ['width','height']
+
+    remove_tags_before  = dict(name='h2', attrs={'class':'storyHeadline'})
+    #remove_tags_after   = dict(name='div', attrs={'class':'toollinks'})
+    remove_tags_after   = dict(name='tr', attrs={'valign':'top'})
+    remove_tags   = [ dict(name='div', attrs={'id':'byLine'}), dict(name='div', attrs={'class':'toollinks'})
+, dict(name='div', attrs={'class':'main-lead'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})
+, dict(name='a', attrs={'class':'addthis_button'})]
+
+    remove_empty_feeds= True
+
+    feeds          = [
+                      ( u'Güncel', u'http://www.birgun.net/actuels.xml')
+         ,( u'Köşe Yazarları', u'http://www.birgun.net/writer.xml')
+         ,( u'Politika', u'http://www.birgun.net/politics.xml')
+         ,( u'Ekonomi', u'http://www.birgun.net/economic.xml')
+         ,( u'Çalışma Yaşamı', u'http://www.birgun.net/workers.xml')
+         ,( u'Dünya', u'http://www.birgun.net/worlds.xml')
+         ,( u'Yaşam', u'http://www.birgun.net/lifes.xml')
+                     ]
--- a/recipes/birmingham_post.recipe
+++ b/recipes/birmingham_post.recipe
@ -0,0 +1,44 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class AdvancedUserRecipe1306097511(BasicNewsRecipe):
+    title          = u'Birmingham post'
+    description = 'News for Birmingham UK'
+    timefmt = ''
+    __author__ = 'Dave Asbury'
+    cover_url = 'http://1.bp.blogspot.com/_GwWyq5eGw9M/S9BHPHxW55I/AAAAAAAAB6Q/iGCWl0egGzg/s320/Birmingham+post+Lite+front.JPG'
+    oldest_article = 1
+    max_articles_per_feed = 20
+    remove_empty_feeds = True
+    remove_javascript     = True
+    auto_cleanup = True
+    language = 'en_GB'
+
+
+    masthead_url        = 'http://www.pressgazette.co.uk/Pictures/web/t/c/g/birmingham_post.jpg'
+
+
+    keep_only_tags = [
+    #dict(name='h1',attrs={'id' : 'article-headline'}),
+                    #dict(attrs={'class':['article-meta-author','article-meta-date','article main','art-o art-align-center otm-1 ']}),
+    #dict(name='p')
+    #dict(attrs={'id' : 'three-col'})
+        ]
+    remove_tags    = [
+             # dict(name='div',attrs={'class' : 'span-33 last header-links'})
+
+                               ]
+    feeds          = [
+        #(u'News',u'http://www.birminghampost.net/news/rss.xml'),
+        (u'Local News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
+        (u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'),
+        (u'Sports',u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'),
+        (u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml')
+
+         ]
+    extra_css  = '''
+                    body {font: sans-serif medium;}'
+    h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
+                h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
+                    span{ font-size:9.5px; font-weight:bold;font-style:italic}
+                    p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
+
+     '''
--- a/recipes/blic.recipe
+++ b/recipes/blic.recipe
@ -1,6 +1,6 @@

 __license__   = 'GPL v3'
-__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 blic.rs
 '''
@ -73,7 +73,10 @@ class Blic(BasicNewsRecipe):
    def print_version(self, url):
        return url + '/print'

-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']    
-        return soup
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.blic.rs/')
+        alink = soup.find('a', attrs={'id':'blic_naslovna_print'})
+        if alink:
+           return 'http://www.blic.rs' + alink['href']
+        return None
+       
--- a/recipes/blues.recipe
+++ b/recipes/blues.recipe
@ -0,0 +1,26 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Oskar Kunicki <rakso at interia.pl>'
+'''
+Changelog:
+2011-11-27
+News from BluesRSS.info
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BluesRSS(BasicNewsRecipe):
+    title                     = 'Blues News'
+    __author__          = 'Oskar Kunicki'
+    description           ='Blues news from around the world'
+    publisher             = 'BluesRSS.info'
+    category              = 'news, blues, USA,UK'
+    oldest_article        = 5
+    max_articles_per_feed = 100
+    language              = 'en'
+    cover_url             = 'http://bluesrss.info/cover.jpg'
+    masthead_url       = 'http://bluesrss.info/cover.jpg'
+    no_stylesheets = True
+
+    remove_tags    = [dict(name='div', attrs={'class':'wp-pagenavi'})]
+
+    feeds = [(u'News', u'http://bluesrss.info/feed/')]
--- a/recipes/buffalo_news.recipe
+++ b/recipes/buffalo_news.recipe
@ -10,49 +10,39 @@ http://www.buffalonews.com/RSS/

 from calibre.web.feeds.news import BasicNewsRecipe

-class AdvancedUserRecipe1298680852(BasicNewsRecipe):
+class BuffaloNews(BasicNewsRecipe):
    title          = u'Buffalo News'
    oldest_article = 2
    language = 'en'
-    __author__ = 'ChappyOnIce'
+    __author__ = 'ChappyOnIce, Krittika Goyal'
    max_articles_per_feed = 20
    encoding = 'utf-8'
    masthead_url = 'http://www.buffalonews.com/buffalonews/skins/buffalonews/images/masthead/the_buffalo_news_logo.png'
-    remove_javascript = True
-    extra_css = 'body {text-align: justify;}\n  \
-       p {text-indent: 20px;}'
+    auto_cleanup = True
+    remove_empty_feeds = True

-    keep_only_tags    = [
-                       dict(name='div', attrs={'class':['main-content-left']})
-                        ]
-
-    remove_tags = [
-                       dict(name='div', attrs={'id':['commentCount']}),
-       dict(name='div', attrs={'class':['story-list-links']})
-                        ]
-
-    remove_tags_after  = dict(name='div', attrs={'class':['body storyContent']})
-
-    feeds          = [(u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Southern Erie County', u'http://www.buffalonews.com/city/communities/southern-erie/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Eastern Erie County', u'http://www.buffalonews.com/city/communities/eastern-erie/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Southern Tier', u'http://www.buffalonews.com/city/communities/southern-tier/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Niagara County', u'http://www.buffalonews.com/city/communities/niagara-county/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Business', u'http://www.buffalonews.com/business/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'MoneySmart', u'http://www.buffalonews.com/business/moneysmart/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Bills & NFL', u'http://www.buffalonews.com/sports/bills-nfl/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Sabres & NHL', u'http://www.buffalonews.com/sports/sabres-nhl/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Bob DiCesare', u'http://www.buffalonews.com/sports/columns/bob-dicesare/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Bucky Gleason', u'http://www.buffalonews.com/sports/columns/bucky-gleason/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Mark Gaughan', u'http://www.buffalonews.com/sports/bills-nfl/inside-the-nfl/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Mike Harrington', u'http://www.buffalonews.com/sports/columns/mike-harrington/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Jerry Sullivan', u'http://www.buffalonews.com/sports/columns/jerry-sullivan/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Other Sports Columns', u'http://www.buffalonews.com/sports/columns/other-sports-columns/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Life', u'http://www.buffalonews.com/life/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Bruce Andriatch', u'http://www.buffalonews.com/city/columns/bruce-andriatch/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Donn Esmonde', u'http://www.buffalonews.com/city/columns/donn-esmonde/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Rod Watson', u'http://www.buffalonews.com/city/columns/rod-watson/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Entertainment', u'http://www.buffalonews.com/entertainment/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Off Main Street', u'http://www.buffalonews.com/city/columns/off-main-street/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Editorials', u'http://www.buffalonews.com/editorial-page/buffalo-news-editorials/?widget=rssfeed&view=feed&contentId=77944')
+    feeds          = [
+            (u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Southern Erie County', u'http://www.buffalonews.com/city/communities/southern-erie/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Eastern Erie County', u'http://www.buffalonews.com/city/communities/eastern-erie/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Southern Tier', u'http://www.buffalonews.com/city/communities/southern-tier/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Niagara County', u'http://www.buffalonews.com/city/communities/niagara-county/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Business', u'http://www.buffalonews.com/business/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'MoneySmart', u'http://www.buffalonews.com/business/moneysmart/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Bills & NFL', u'http://www.buffalonews.com/sports/bills-nfl/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Sabres & NHL', u'http://www.buffalonews.com/sports/sabres-nhl/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Bob DiCesare', u'http://www.buffalonews.com/sports/columns/bob-dicesare/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Bucky Gleason', u'http://www.buffalonews.com/sports/columns/bucky-gleason/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Mark Gaughan', u'http://www.buffalonews.com/sports/bills-nfl/inside-the-nfl/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Mike Harrington', u'http://www.buffalonews.com/sports/columns/mike-harrington/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Jerry Sullivan', u'http://www.buffalonews.com/sports/columns/jerry-sullivan/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Other Sports Columns', u'http://www.buffalonews.com/sports/columns/other-sports-columns/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Life', u'http://www.buffalonews.com/life/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Bruce Andriatch', u'http://www.buffalonews.com/city/columns/bruce-andriatch/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Donn Esmonde', u'http://www.buffalonews.com/city/columns/donn-esmonde/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Rod Watson', u'http://www.buffalonews.com/city/columns/rod-watson/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Entertainment', u'http://www.buffalonews.com/entertainment/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Off Main Street', u'http://www.buffalonews.com/city/columns/off-main-street/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Editorials', u'http://www.buffalonews.com/editorial-page/buffalo-news-editorials/?widget=rssfeed&view=feed&contentId=77944')
         ]
+
--- a/recipes/catavencu.recipe
+++ b/recipes/catavencu.recipe
@ -4,16 +4,16 @@
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
-catavencu.ro
+academiacatavencu.info
 '''

 from calibre.web.feeds.news import BasicNewsRecipe

-class Catavencu(BasicNewsRecipe):
+class AcademiaCatavencu(BasicNewsRecipe):
    title                 = u'Academia Ca\u0163avencu'
    __author__            = u'Silviu Cotoar\u0103'
    description           = 'Tagma cum laude'
-    publisher             = 'Catavencu'
+    publisher             = u'Ca\u0163avencu'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
@ -21,32 +21,31 @@ class Catavencu(BasicNewsRecipe):
    use_embedded_content  = False
    category              = 'Ziare'
    encoding              = 'utf-8'
-    cover_url         = 'http://upload.wikimedia.org/wikipedia/en/1/1e/Academia_Catavencu.jpg'
+    cover_url         = 'http://www.academiacatavencu.info/images/logo.png'

    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
-                ,'publisher'  : publisher
+							,'publisher'  : publisher
                         }

    keep_only_tags = [
-            dict(name='ul', attrs={'class':'articles'})
+            dict(name='h1', attrs={'class':'art_title'}),
+			dict(name='div', attrs={'class':'art_text'})
                     ]

    remove_tags = [
-             dict(name='div', attrs={'class':['tools']})
-           , dict(name='div', attrs={'class':['share']})
-           , dict(name='div', attrs={'class':['category']})
-           , dict(name='div', attrs={'id':['comments']})
+             dict(name='div', attrs={'class':['desp_m']})
+           , dict(name='div', attrs={'id':['tags']})          
                  ]

    remove_tags_after = [
-              dict(name='div', attrs={'id':'comments'})
+              dict(name='div', attrs={'class':['desp_m']})
            ]

    feeds          = [
-            (u'Feeds', u'http://catavencu.ro/feed/rss')
+            (u'Feeds', u'http://www.academiacatavencu.info/rss.xml')
                 ]

    def preprocess_html(self, soup):
--- a/recipes/cgm_pl.recipe
+++ b/recipes/cgm_pl.recipe
@ -27,7 +27,7 @@ class CGM(BasicNewsRecipe):
            del item['style']
        ad=soup.findAll('a')
        for r in ad:
-            if 'http://www.hustla.pl' in r['href']:                
+            if 'http://www.hustla.pl' in r['href'] or 'http://www.ebilet.pl' in r['href']:                
                 r.extract()
        gallery=soup.find('div', attrs={'class':'galleryFlash'})
        if gallery:
--- a/recipes/cnd.recipe
+++ b/recipes/cnd.recipe
@ -23,7 +23,9 @@ class TheCND(BasicNewsRecipe):
 	remove_tags		= [dict(name='table', attrs={'align':'right'}), dict(name='img', attrs={'src':'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
 	no_stylesheets	 = True

-	preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+	preprocess_regexps = [  (re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
+				(re.compile('<table width.*?</table>', re.DOTALL), lambda m: ''),
+				]

 	def print_version(self, url):
 		if url.find('news/article.php') >= 0:
@ -46,16 +48,18 @@ class TheCND(BasicNewsRecipe):
 			title = self.tag_to_string(a)
 			self.log('\tFound article: ', title, 'at', url)
 			date = a.nextSibling
+			if re.search('cm', date):
+				continue
 			if (date is not None) and len(date)>2:
 				if not articles.has_key(date):
 					articles[date] = []
 				articles[date].append({'title':title, 'url':url, 'description': '', 'date':''})
 				self.log('\t\tAppend to : ', date)

-		self.log('log articles', articles)
+		#self.log('log articles', articles)
 		mostCurrent = sorted(articles).pop()
-		self.title = 'CND ' + mostCurrent
-
+		self.title = 'CND ' + mostCurrent		
+		
 		feeds.append((self.title, articles[mostCurrent]))

 		return feeds
--- a/recipes/cnd_weekly.recipe
+++ b/recipes/cnd_weekly.recipe
@ -0,0 +1,72 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Derek Liang <Derek.liang.ca @@@at@@@ gmail.com>'
+'''
+cnd.org
+'''
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TheCND(BasicNewsRecipe):
+
+	title	  = 'CND Weekly'
+	__author__ = 'Derek Liang'
+	description = ''
+	INDEX = 'http://cnd.org'
+	language = 'zh'
+	conversion_options = {'linearize_tables':True}
+
+	remove_tags_before = dict(name='div', id='articleHead')
+	remove_tags_after  = dict(id='copyright')
+	remove_tags		= [dict(name='table', attrs={'align':'right'}), dict(name='img', attrs={'src':'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
+	no_stylesheets	 = True
+
+	preprocess_regexps = [  (re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
+				(re.compile('<table width.*?</table>', re.DOTALL), lambda m: ''),
+				]
+
+	def print_version(self, url):
+		if url.find('news/article.php') >= 0:
+			return re.sub("^[^=]*", "http://my.cnd.org/modules/news/print.php?storyid", url)
+		else:
+			return re.sub("^[^=]*", "http://my.cnd.org/modules/wfsection/print.php?articleid", url)
+
+	def parse_index(self):
+		soup = self.index_to_soup(self.INDEX)
+
+		feeds = []
+		articles = {}
+
+		for a in soup.findAll('a', attrs={'target':'_cnd'}):
+			url = a['href']
+			if url.find('article.php') < 0 :
+				continue
+			if url.startswith('/'):
+				url = 'http://cnd.org'+url
+			title = self.tag_to_string(a)
+			date = a.nextSibling
+			if not re.search('cm', date):
+				continue
+			self.log('\tFound article: ', title, 'at', url, '@', date)
+			if (date is not None) and len(date)>2:
+				if not articles.has_key(date):
+					articles[date] = []
+				articles[date].append({'title':title, 'url':url, 'description': '', 'date':''})
+				self.log('\t\tAppend to : ', date)
+
+		
+		sorted_articles = sorted(articles)
+		while sorted_articles:
+			mostCurrent = sorted_articles.pop()
+			self.title = 'CND ' + mostCurrent
+			feeds.append((self.title, articles[mostCurrent]))
+
+		return feeds
+
+	def populate_article_metadata(self, article, soup, first):
+		header = soup.find('h3')
+		self.log('header: ' + self.tag_to_string(header))
+		pass
+
--- a/recipes/computerworld_pl.recipe
+++ b/recipes/computerworld_pl.recipe
@ -0,0 +1,22 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class Computerworld_pl(BasicNewsRecipe):
+    title          = u'Computerworld.pl'
+    __author__        = 'fenuks'
+    description   = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
+    category       = 'IT'
+    language       = 'pl'
+    no_stylesheets=True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    keep_only_tags=[dict(name='div', attrs={'id':'s'})]
+    remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
+    remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
+    feeds          = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.computerworld.pl/')
+        cover=soup.find(name='img', attrs={'class':'prawo'})
+        self.cover_url=cover['src']
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/cosmopolitan_uk.recipe
+++ b/recipes/cosmopolitan_uk.recipe
@ -0,0 +1,52 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+#from calibre import __appname__
+from calibre.utils.magick import Image
+class AdvancedUserRecipe1306097511(BasicNewsRecipe):
+    title          = u'Cosmopolitan UK'
+    description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
+
+    __author__ = 'Dave Asbury'
+    #last update 21/12/11
+    # greyscale code by Starson
+    cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
+    no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 20
+    remove_empty_feeds = True
+    remove_javascript     = True
+
+    preprocess_regexps = [
+    (re.compile(r'<!-- Begin tmpl module_competition_offer -->.*?<!-- End tmpl module_competition_offer-->', re.IGNORECASE | re.DOTALL), lambda match: '')]
+    language = 'en_GB'
+
+
+    masthead_url        = 'http://www.cosmopolitan.co.uk/cm/cosmopolitanuk/site_images/header/cosmouk_logo_home.gif'
+
+
+    keep_only_tags = [
+                              dict(attrs={'class' : ['dateAuthor', 'publishDate']}),
+                              dict(name='div',attrs ={'id' : ['main_content']})
+                              ]
+    remove_tags    = [
+                              dict(name='div',attrs={'class' : ['blogInfo','viral_toolbar','comment_number','prevEntry nav']}),
+                              dict(name='div',attrs={'class' : 'blog_module_about_the_authors'}),
+                              dict(attrs={'id': ['breadcrumbs','comment','related_links_list','right_rail','content_sec_fb_more','content_sec_mostpopularstories','content-sec_fb_frame_viewfb_bot']}),
+                              dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']}),
+                              dict(name='li',attrs={'class' : 'thumb'})
+              ]
+
+    feeds          = [
+        (u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')]
+
+    def postprocess_html(self, soup, first):
+        #process all the images
+        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
+            iurl = tag['src']
+            img = Image()
+            img.open(iurl)
+            if img < 0:
+                raise RuntimeError('Out of memory')
+            img.type = "GrayscaleType"
+            img.save(iurl)
+        return soup
--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@ -5,7 +5,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    description = 'News as provide by The Daily Mirror -UK'

    __author__ = 'Dave Asbury'
-    # last updated 30/10/11
+    # last updated 26/12/11
    language = 'en_GB'

    cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
@ -13,30 +13,22 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'


-    oldest_article = 2
-    max_articles_per_feed = 30
+    oldest_article = 1
+    max_articles_per_feed = 20
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
-    extra_css  = '''
-	body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
-                	 '''
-
-    keep_only_tags = [
-       dict(name='div',attrs={'id' : 'body-content'})
-        ]
-
-    remove_tags_after = [dict (name='div',attrs={'class' : 'related'})]
-
+    auto_cleanup = True
    remove_tags = [
-           dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}),
-           dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}),
-           dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}),
-           dict(name='div',attrs={'class' : 'span-12 last sl-others addthis_toolbox addthis_default_style'})
+           dict(name='title'),
+           dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),
          ]

    preprocess_regexps = [
-    (re.compile(r'<dl class="q-search">.*?</dl>', re.IGNORECASE | re.DOTALL), lambda match: '')]
+    (re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')]
+
+    preprocess_regexps = [
+    (re.compile(r'Advertisement >>', re.IGNORECASE | re.DOTALL), lambda match: '')]


    feeds          = [
@ -53,5 +45,10 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
        ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')

           # example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
-
  ]
+    extra_css  = '''
+	body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
+                    h1{ font-size:18px;}
+                    img { display:block}
+                	 '''
+
--- a/recipes/daily_writing_tips.recipe
+++ b/recipes/daily_writing_tips.recipe
@ -0,0 +1,18 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DailyWritingTips(BasicNewsRecipe):
+    title          = u'Daily Writing Tips'
+    language       = 'en_GB'
+    __author__ = 'NotTaken'
+    oldest_article = 7 #days
+    max_articles_per_feed = 40
+    use_embedded_content = True
+    no_stylesheets = True
+    auto_cleanup = False
+    encoding = 'utf-8'
+
+
+    feeds          = [
+('Latest tips',
+ 'http://feeds2.feedburner.com/DailyWritingTips'),
+]
--- a/recipes/datasport.recipe
+++ b/recipes/datasport.recipe
@ -0,0 +1,15 @@
+__license__   = 'GPL v3'
+__author__    = 'faber1971'
+description   = 'Italian soccer news website - v1.00 (17, December 2011)'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1324114272(BasicNewsRecipe):
+    title          = u'Datasport'
+    language = 'it'
+    __author__ = 'faber1971'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    feeds          = [(u'Datasport', u'http://www.datasport.it/calcio/rss.xml')]
--- a/recipes/derin_dusunce.recipe
+++ b/recipes/derin_dusunce.recipe
@ -0,0 +1,11 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BasicUserRecipe1324913694(BasicNewsRecipe):
+    title          = u'Derin Dusunce'
+    language = 'tr'
+    __author__ = 'asalet_r'
+    oldest_article = 7
+    max_articles_per_feed = 20
+    auto_cleanup = True
+
+    feeds          = [(u'Derin D\xfc\u015f\xfcnce', u'http://www.derindusunce.org/feed/')]
--- a/recipes/descopera_org.recipe
+++ b/recipes/descopera_org.recipe
@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+'''
+descopera.org
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Descopera(BasicNewsRecipe):
+    title = u'Descoperă.org'
+    __author__  = 'Marius Ignătescu'
+    description = 'Descoperă. Placerea de a cunoaște'
+    publisher = 'descopera.org'
+    category = 'science, technology, culture, history, earth'
+    language = 'ro'
+    oldest_article = 14
+    max_articles_per_feed = 100
+    encoding = 'utf8'
+    no_stylesheets = True
+    extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    keep_only_tags    = [dict(name='div', attrs={'class':['post']})]
+    remove_tags = [dict(name='div', attrs={'class':['topnav', 'box_a', 'shr-bookmarks shr-bookmarks-expand shr-bookmarks-center shr-bookmarks-bg-knowledge']})]
+    remove_attributes = ['width','height']
+    cover_url = 'http://www.descopera.org/wp-content/themes/dorg/styles/default/img/b_top.png?width=400'
+    feeds  = [(u'Articles', u'http://www.descopera.org/feed/')]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/recipes/di.recipe
+++ b/recipes/di.recipe
@ -46,7 +46,8 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
 		dict(name = 'div', attrs = {'class' : 'poradniki_context'}),
 		dict(name = 'div', attrs = {'class' : 'uniBox'}),
 		dict(name = 'object', attrs = {}),
-		dict(name = 'h3', attrs = {})
+		dict(name = 'h3', attrs = {}),
+		dict(attrs={'class':'twitter-share-button'})
 	]
 	
 	preprocess_regexps = [
@ -58,3 +59,8 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
 			(r'\s*</', lambda match: '</'),
 		]
 	]
+
+	def skip_ad_pages(self, soup):
+		if 'Advertisement' in soup.title:
+			nexturl=soup.find('a')['href']
+			return self.index_to_soup(nexturl, raw=True)
--- a/recipes/dunya_bizim.recipe
+++ b/recipes/dunya_bizim.recipe
@ -0,0 +1,12 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BasicUserRecipe1324736687(BasicNewsRecipe):
+    title          = u'D\xfcnya Bizim'
+    language = 'tr'
+    __author__ = 'asalet_r'
+
+    oldest_article = 7
+    max_articles_per_feed = 10
+    auto_cleanup = True
+
+    feeds          = [(u'Aktif \u0130mamlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=31'), (u'Ayr\u0131nt\u0131 Defteri', u'http://dunyabizim.com/servisler/rss.php?kategoriID=58'), (u'Baba Kitaplar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=4'), (u'Bu da Oldu', u'http://dunyabizim.com/servisler/rss.php?kategoriID=32'), (u'\xc7-al\u0131nt\u0131 Yaz\u0131lar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=33'), (u'Dar\xfclmedya', u'http://dunyabizim.com/servisler/rss.php?kategoriID=49'), (u'Gidenler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=59'), (u'G\xfczel Mekanlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=43'), (u'\u0130yi Haberler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=18'), (u'\u0130yi M\xfczikler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=2'), (u'Kalite Dergiler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=3'), (u'Konu\u015fa Konu\u015fa', u'http://dunyabizim.com/servisler/rss.php?kategoriID=24'), (u'M\xfcstesta G\xfczeller', u'http://dunyabizim.com/servisler/rss.php?kategoriID=65'), (u'O \u015eimdi Nerede?', u'http://dunyabizim.com/servisler/rss.php?kategoriID=52'), (u'Olsa Ke\u015fke', u'http://dunyabizim.com/servisler/rss.php?kategoriID=34'), (u'Orada Ne Oldu?', u'http://dunyabizim.com/servisler/rss.php?kategoriID=38'), (u'\xd6nemli Adamlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=1'), (u'Polemik', u'http://dunyabizim.com/servisler/rss.php?kategoriID=39'), (u'Sinema', u'http://dunyabizim.com/servisler/rss.php?kategoriID=23'), (u'Yalan Haber', u'http://dunyabizim.com/servisler/rss.php?kategoriID=40'), (u'Yeni \u015eeyler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=57'), (u'Zekeriya Sofras\u0131', u'http://dunyabizim.com/servisler/rss.php?kategoriID=60')]
--- a/recipes/dunya_bulteni.recipe
+++ b/recipes/dunya_bulteni.recipe
@ -0,0 +1,12 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BasicUserRecipe1321194347(BasicNewsRecipe):
+    title          = u'D\xfcnya B\xfclteni'
+    language = 'tr'
+    __author__ = 'asalet_r'
+
+    oldest_article = 7
+    max_articles_per_feed = 50
+    auto_cleanup = True
+
+    feeds          = [(u'Tarih Dosyas\u0131', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=157'), (u'R\xf6portaj', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=153'), (u'Makale-Yorum', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=174'), (u'K\xfclt\xfcr-Sanat', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=66'), (u'Hayat\u0131n \u0130\xe7inden', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=200'), (u'Haber Analiz', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=123'), (u'Gezi-\u0130zlenim', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=90'), (u'Aile Sa\u011fl\u0131k E\u011fitim', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=75')]
--- a/recipes/dziennik_pl.recipe
+++ b/recipes/dziennik_pl.recipe
@ -0,0 +1,58 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class Dziennik_pl(BasicNewsRecipe):
+    title          = u'Dziennik.pl'
+    __author__        = 'fenuks'
+    description   = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.'
+    category       = 'newspaper'
+    language       = 'pl'
+    cover_url='http://6.s.dziennik.pl/images/og_dziennik.jpg'
+    no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_javascript=True
+    remove_empty_feeds=True
+    preprocess_regexps     = [(re.compile("Komentarze:"), lambda m: '')]
+    keep_only_tags=[dict(id='article')]
+    remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget']}), dict(name='a', attrs={'class':'komentarz'})]
+    feeds          = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
+		(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
+		(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
+		(u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'),
+		(u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'),
+		(u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'),
+		(u'Film', u'http://rss.dziennik.pl/Dziennik-Film'),
+		(u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'),
+		(u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'),
+		(u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'),
+		(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
+		(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]
+
+    def append_page(self, soup, appendtag):
+        tag=soup.find('a', attrs={'class':'page_next'})
+        if tag:
+            appendtag.find('div', attrs={'class':'article_paginator'}).extract()
+        while tag:
+            soup2= self.index_to_soup(tag['href'])
+            tag=soup2.find('a', attrs={'class':'page_next'})
+            if not tag:
+                for r in appendtag.findAll('div', attrs={'class':'art_src'}):
+                    r.extract()
+            pagetext = soup2.find(name='div', attrs={'class':'article_body'})
+            for dictionary in self.remove_tags:
+                 v=pagetext.findAll(name=dictionary['name'], attrs=dictionary['attrs'])
+                 for delete in v:
+                     delete.extract()
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+            if appendtag.find('div', attrs={'class':'article_paginator'}):
+                appendtag.find('div', attrs={'class':'article_paginator'}).extract()
+
+
+
+
+    def preprocess_html(self, soup):
+         self.append_page(soup, soup.body)
+         return soup
--- a/recipes/echo_online.recipe
+++ b/recipes/echo_online.recipe
@ -0,0 +1,46 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
+'''
+Fetch echo-online.de
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+class Echo_Online(BasicNewsRecipe):
+    title          = u'Echo Online' # 2011-12-28 AGe
+    description = '-Echo Online-'
+    publisher = 'Echo Online GmbH'
+    category = 'News, Germany'
+    __author__ = 'Armin Geller' # 2011-12-28 AGe
+    language = 'de'
+    lang = 'de-DE'
+    encoding = 'iso-8859-1'
+    timefmt = ' [%a, %d %b %Y]'
+
+    oldest_article = 7
+    max_articles_per_feed = 50 # 2011-12-28 AGe
+    no_stylesheets = True
+    auto_cleanup = True
+    remove_javascript = True
+
+    feeds = [
+              (u'Topnews', u'http://www.echo-online.de/storage/rss/rss/topnews.xml'),
+              (u'Darmstadt', u'http://www.echo-online.de/rss/darmstadt.xml'),
+              (u'Darmstadt-Dieburg', u'http://www.echo-online.de/rss/darmstadtdieburg.xml'),
+              (u'Kreis Gro\xdf-Gerau', u'http://www.echo-online.de/rss/kreisgrossgerau.xml'),
+              (u'R\xfcsselsheim', u'http://www.echo-online.de/rss/ruesselsheim.xml'),
+              (u'Kreis Bergstra\xdfe', u'http://www.echo-online.de/rss/bergstrasse.xml'),
+              (u'Odenwaldkreis', u'http://www.echo-online.de/rss/odenwald.xml'),
+              (u'SV 98', u'http://www.echo-online.de/rss/sv98.xml'),
+              (u'Kino', u'http://www.echo-online.de/rss/kino.xml'),
+              (u'Ausstellungen', u'http://www.echo-online.de/rss/ausstellungen.xml'),
+              (u'Ausflug & Reise', u'http://www.echo-online.de/rss/ausflugreise.xml'),
+             ]
+
+    def print_version(self, url):
+          return self.browser.open_novisit(url).geturl() + '?_FRAME=33&_FORMAT=PRINT'
+
+    remove_tags = [dict(name='div', attrs={'class':["header", "name"]}),]
+    auto_cleanup_keep = '//div[@class="bild_gross w270"]'
+
+    cover_url = 'http://adcounter.darmstaedter-echo.de/webdav/files/config/gui/images/Zeitungsfaecher.gif'
+
--- a/recipes/edge_conversations.recipe
+++ b/recipes/edge_conversations.recipe
@ -0,0 +1,50 @@
+__license__   = 'GPL v3'
+__copyright__ = '2012 Levien van Zon <levien@zonnetjes.net>'
+
+'''
+Fetch Edge.org conversations
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class EdgeConversationRSS(BasicNewsRecipe):
+    title          = u'Edge.org Conversations'
+    __author__ = 'levien'
+    language = 'en'
+    description = '''Edge.org offers "open-minded, free ranging, intellectually
+    playful ... an unadorned pleasure in curiosity, a collective expression of
+    wonder at the living and inanimate world ... an ongoing and thrilling
+    colloquium.'''
+    oldest_article = 60
+    max_articles_per_feed = 100
+    no_stylesheets = True
+
+    keep_only_tags = [dict(name='div', attrs={'class':'HomeLeftPannel IMGCTRL'}) ]
+    remove_tags    = [
+        dict(name='div',attrs={'class':'Logo'})
+        ]
+
+    feeds          = [(u'Edge RSS', u'http://edge.org/feeds/')]
+
+    def print_version(self, url):
+        return url.replace('conversation/', 'conversation.php?cid=')
+
+    def parse_feeds(self):
+
+        # Call parent's method.
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        # Loop through all feeds.
+        for feed in feeds:
+
+            # Loop through all articles in feed.
+            for article in feed.articles[:]:
+
+            # Remove anything that is not a conversation, and remove PDF files as well...
+
+                if not ('CONVERSATION' in article.title):
+                    feed.articles.remove(article)
+                elif 'pdf' in article.url:
+                    feed.articles.remove(article)
+
+        return feeds
+
--- a/recipes/el_periodico.recipe
+++ b/recipes/el_periodico.recipe
@ -5,12 +5,11 @@ __license__     = 'GPL v3'
 __copyright__   = '04 December 2010, desUBIKado'
 __author__      = 'desUBIKado'
 __description__ = 'Daily newspaper from Aragon'
-__version__     = 'v0.07'
-__date__        = '06, February 2011'
+__version__     = 'v0.08'
+__date__        = '13, November 2011'
 '''
 elperiodicodearagon.com
 '''
-import re
 from calibre.web.feeds.news import BasicNewsRecipe


@ -20,13 +19,13 @@ class elperiodicodearagon(BasicNewsRecipe):
    description           = u'Noticias desde Aragon'
    publisher             = u'elperiodicodearagon.com'
    category              = u'news, politics, Spain, Aragon'
-    oldest_article        = 2
+    oldest_article        = 1
    delay                 = 0
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    language              = 'es'
-    encoding              = 'utf8'
+    encoding              = 'iso-8859-1'
    remove_empty_feeds    = True
    remove_javascript     = True

@ -39,61 +38,30 @@ class elperiodicodearagon(BasicNewsRecipe):
                         }

    feeds              = [
-                           (u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
-                           (u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
-                           (u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
-                           (u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
-                           (u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
-                           (u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
-                           (u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
-                           (u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
-                           (u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
-                           (u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')
+                           (u'Portada', u'http://zetaestaticos.com/aragon/rss/portada_es.xml'),
+                           (u'Arag\xf3n', u'http://zetaestaticos.com/aragon/rss/2_es.xml'),
+                           (u'Internacional', u'http://zetaestaticos.com/aragon/rss/4_es.xml'),
+                           (u'Espa\xf1a', u'http://zetaestaticos.com/aragon/rss/3_es.xml'),
+                           (u'Econom\xeda', u'http://zetaestaticos.com/aragon/rss/5_es.xml'),
+                           (u'Deportes', u'http://zetaestaticos.com/aragon/rss/7_es.xml'),
+                           (u'Real Zaragoza', u'http://zetaestaticos.com/aragon/rss/10_es.xml'),
+                           (u'CAI Zaragoza', u'http://zetaestaticos.com/aragon/rss/91_es.xml'),
+                           (u'Monta\xf1ismo', u'http://zetaestaticos.com/aragon/rss/354_es.xml'),
+                           (u'Opini\xf3n', u'http://zetaestaticos.com/aragon/rss/103_es.xml'),
+                           (u'Tema del d\xeda', u'http://zetaestaticos.com/aragon/rss/102_es.xml'),
+                           (u'Escenarios', u'http://zetaestaticos.com/aragon/rss/105_es.xml'),
+                           (u'Sociedad', u'http://zetaestaticos.com/aragon/rss/104_es.xml'),
+                           (u'Gente', u'http://zetaestaticos.com/aragon/rss/330_es.xml'),
+                           (u'Espacio 3', u'http://zetaestaticos.com/aragon/rss/328_es.xml'),
+                           (u'Fiestas del Pilar', u'http://zetaestaticos.com/aragon/rss/107_es.xml')
                         ]


-    extra_css = '''
-                    h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
-                    h2 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
-                    h4 {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:20px;}
-                    .columnaDeRecursosRelacionados {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
-                    img{margin-bottom: 0.4em}
-		'''
-
    remove_attributes = ['height','width']

-    keep_only_tags     = [dict(name='div', attrs={'id':'contenidos'})]
+    keep_only_tags     = [dict(name='div', attrs={'id':'Noticia'})]


-    # Quitar toda la morralla
-
-    remove_tags        = [dict(name='ul', attrs={'class':'herramientasDeNoticia'}),
-                          dict(name='span', attrs={'class':'MasInformacion '}),
-                          dict(name='span', attrs={'class':'MasInformacion'}),
-                          dict(name='div', attrs={'class':'Middle'}),
-                          dict(name='div', attrs={'class':'MenuCabeceraRZaragoza'}),
-                          dict(name='div', attrs={'id':'MenuCabeceraRZaragoza'}),
-                          dict(name='div', attrs={'class':'MenuEquipo'}),
-                          dict(name='div', attrs={'class':'TemasRelacionados'}),
-                          dict(name='div', attrs={'class':'GaleriaEnNoticia'}),
-                          dict(name='div', attrs={'class':'Recorte'}),
-                          dict(name='div', attrs={'id':'NoticiasenRecursos'}),
-                          dict(name='div', attrs={'id':'NoticiaEnPapel'}),
-                          dict(name='p', attrs={'class':'RecorteEnNoticias'}),
-                          dict(name='div', attrs={'id':'Comparte'}),
-                          dict(name='div', attrs={'id':'CajaComparte'}),
-                          dict(name='a', attrs={'class':'EscribirComentario'}),
-                          dict(name='a', attrs={'class':'AvisoComentario'}),
-                          dict(name='div', attrs={'class':'CajaAvisoComentario'}),
-                          dict(name='div', attrs={'class':'navegaNoticias'}),
-                          dict(name='div', attrs={'class':'Mensaje'}),
-                          dict(name='div', attrs={'id':'PaginadorDiCom'}),
-                          dict(name='div', attrs={'id':'CajaAccesoCuentaUsuario'}),
-                          dict(name='div', attrs={'id':'CintilloComentario'}),
-                          dict(name='div', attrs={'id':'EscribeComentario'}),
-                          dict(name='div', attrs={'id':'FormularioComentario'}),
-                          dict(name='div', attrs={'id':'FormularioNormas'})]
-
    # Recuperamos la portada de papel (la imagen format=1 tiene mayor resolucion)

    def get_cover_url(self):
@ -104,23 +72,7 @@ class elperiodicodearagon(BasicNewsRecipe):
              return image['src'].rstrip('format=2') + 'format=1'
        return None

-    # Para quitar espacios entre la noticia y los comentarios (lineas 1 y 2)
-    # El indice no apuntaba correctamente al empiece de la noticia (linea 3)
+    # Usamos la versión para móviles

-    preprocess_regexps = [
-        (re.compile(r'<p>&nbsp;</p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
-        (re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
-        (re.compile(r'<p id="">', re.DOTALL|re.IGNORECASE), lambda match: '<p>')
-        ]
-
-    # Para sustituir el video incrustado de YouTube por una imagen
-
-    def preprocess_html(self, soup):
-        for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
-            if video_yt:
-               video_yt.name = 'img'
-               fuente = video_yt['src']
-               fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
-               video_yt['src'] = fuente2 + '/0.jpg'
-
-        return soup
+    def print_version(self, url):
+          return url.replace('http://www.elperiodicodearagon.com/', 'http://www.elperiodicodearagon.com/m/')
--- a/recipes/elet_es_irodalom.recipe
+++ b/recipes/elet_es_irodalom.recipe
@ -0,0 +1,58 @@
+################################################################################
+#Description:     http://es.hu/ RSS channel
+#Author:      Bigpapa (bigpapabig@hotmail.com)
+#Date:    2012.01.20. - V1.2
+################################################################################
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class elet_es_irodalom(BasicNewsRecipe):
+    title                  = u'\u00c9let \u00e9s Irodalom'
+    __author__             = 'Bigpapa'
+    oldest_article         = 7
+    max_articles_per_feed  = 30 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
+    no_stylesheets         = True
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'iso-8859-2'
+    category               = 'Cikkek'
+    language               = 'hu'
+    publication_type       = 'newsportal'
+    extra_css              = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} '
+    needs_subscription = 'optional'
+
+    masthead_url = 'http://www.es.hu/images/logo.jpg'
+    timefmt = ' [%Y %b %d, %a]'
+
+#Nem ide a kódba kell beleírni a hozzáférés adatait, hanem azt akkor adod meg, ha le akarod tölteni!
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('http://www.es.hu/')
+            br.select_form(name='userfrmlogin')
+            br['cusername'] = self.username
+            br['cpassword'] = self.password
+            br.submit()
+        return br
+
+    keep_only_tags    = [
+                       dict(name='div', attrs={'class':['doc_author', 'doc_title', 'doc']})
+    ]
+
+    remove_tags = [
+     dict(name='a', attrs={'target':['_TOP']}),
+    dict(name='div', attrs={'style':['float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}),
+
+    ]
+
+    feeds          = [
+    (u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'),
+    (u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'),
+    (u'Visszhang', 'http://www.feed43.com/3727375706873086.xml'),
+    (u'P\xe1ratlan oldal', 'http://www.feed43.com/2525784782475057.xml'),
+    (u'Feuilleton', 'http://www.feed43.com/7216025082703073.xml'),
+    (u'Pr\xf3za', 'http://www.feed43.com/8760248802326384.xml'),
+    (u'Vers', 'http://www.feed43.com/1737324675134275.xml'),
+    (u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'),
+    (u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml')
+    ]
--- a/recipes/elmundo.recipe
+++ b/recipes/elmundo.recipe
@ -4,7 +4,8 @@ __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 elmundo.es
 '''
-
+import re
+import time
 from calibre.web.feeds.news import BasicNewsRecipe

 class ElMundo(BasicNewsRecipe):
@ -18,12 +19,15 @@ class ElMundo(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'iso8859_15'
+    remove_javascript     = True
+    remove_empty_feeds    = True
    language              = 'es'
    masthead_url          = 'http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png'
    publication_type      = 'newspaper'
    extra_css             = """
                               body{font-family: Arial,Helvetica,sans-serif}
                               .metadata_noticia{font-size: small}
+                               .pestana_GDP{font-size: small; font-weight:bold}
                               h1,h2,h3,h4,h5,h6,.subtitulo {color: #3F5974}
                               .hora{color: red}
                               .update{color: gray}
@ -41,22 +45,43 @@ class ElMundo(BasicNewsRecipe):
    remove_tags_after  = dict(name='div' , attrs={'id':['desarrollo_noticia','tamano']})
    remove_attributes  = ['lang','border']
    remove_tags = [
-                     dict(name='div', attrs={'class':['herramientas','publicidad_google']})
-                    ,dict(name='div', attrs={'id':'modulo_multimedia' })
+                     dict(name='div', attrs={'class':['herramientas','publicidad_google','comenta','col col-2b','apoyos','no-te-pierdas']})
+                    ,dict(name='div', attrs={'class':['publicidad publicidad_cuerpo_noticia','comentarios_nav','mensaje_privado','interact']})
+                    ,dict(name='div', attrs={'class':['num_comentarios estirar']})
+                    ,dict(name='span', attrs={'class':['links_comentar']})
+                    ,dict(name='div', attrs={'id':['comentar']})
                    ,dict(name='ul', attrs={'class':'herramientas' })
                    ,dict(name=['object','link','embed','iframe','base','meta'])
                  ]

    feeds = [
-              (u'Portada'         , u'http://estaticos.elmundo.es/elmundo/rss/portada.xml'       )
+              (u'Portada'         , u'http://estaticos.elmundo.es/elmundo/rss/portada.xml'       )                                      
             ,(u'Deportes'        , u'http://estaticos.elmundo.es/elmundodeporte/rss/portada.xml')
-             ,(u'Economia'        , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml'      )
-             ,(u'Espana'          , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml'        )
+             ,(u'Econom\xeda'     , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml'      )
+             ,(u'Espa\xf1a'       , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml'        )
             ,(u'Internacional'   , u'http://estaticos.elmundo.es/elmundo/rss/internacional.xml' )
             ,(u'Cultura'         , u'http://estaticos.elmundo.es/elmundo/rss/cultura.xml'       )
-             ,(u'Ciencia/Ecologia', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml'       )
-             ,(u'Comunicacion'    , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml'  )
-             ,(u'Television'      , u'http://estaticos.elmundo.es/elmundo/rss/television.xml'    )
+             ,(u'Ciencia/Ecolog\xeda', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml'    )
+             ,(u'Comunicaci\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml'  )
+             ,(u'Televisi\xf3n'   , u'http://estaticos.elmundo.es/elmundo/rss/television.xml'    )
+
+             ,(u'Salud'           , u'http://estaticos.elmundo.es/elmundosalud/rss/portada.xml'  )
+             ,(u'Solidaridad'     , u'http://estaticos.elmundo.es/elmundo/rss/solidaridad.xml'   )
+             ,(u'Su vivienda'     , u'http://estaticos.elmundo.es/elmundo/rss/suvivienda.xml'    )             
+             ,(u'Motor'           , u'http://estaticos.elmundo.es/elmundomotor/rss/portada.xml'  )             
+             
+             ,(u'Madrid'          , u'http://estaticos.elmundo.es/elmundo/rss/madrid.xml'        )
+             ,(u'Barcelona'       , u'http://estaticos.elmundo.es/elmundo/rss/barcelona.xml'     )
+             ,(u'Pa\xeds Vasco'   , u'http://estaticos.elmundo.es/elmundo/rss/paisvasco.xml'     )	     
+             ,(u'Baleares'        , u'http://estaticos.elmundo.es/elmundo/rss/baleares.xml'      )
+	     ,(u'Castilla y Le\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/castillayleon.xml' )	     
+	     ,(u'Valladolid'      , u'http://estaticos.elmundo.es/elmundo/rss/valladolid.xml'    )
+	     ,(u'Valencia'        , u'http://estaticos.elmundo.es/elmundo/rss/valencia.xml'      )
+	     ,(u'Alicante'        , u'http://estaticos.elmundo.es/elmundo/rss/alicante.xml'      )
+	     ,(u'Castell\xf3n'    , u'http://estaticos.elmundo.es/elmundo/rss/castellon.xml'     )	
+	     ,(u'Andaluc\xeda'    , u'http://estaticos.elmundo.es/elmundo/rss/andalucia.xml'     )
+	     ,(u'Sevilla'         , u'http://estaticos.elmundo.es/elmundo/rss/andalucia_sevilla.xml'  )
+	     ,(u'M\xe1laga'       , u'http://estaticos.elmundo.es/elmundo/rss/andalucia_malaga.xml'   )
            ]

    def preprocess_html(self, soup):
@ -67,3 +92,34 @@ class ElMundo(BasicNewsRecipe):
    def get_article_url(self, article):
        return article.get('guid',  None)

+
+    preprocess_regexps = [     
+                           # Para presentar la imagen de los videos incrustados                           
+
+                           (re.compile(r'var imagen', re.DOTALL|re.IGNORECASE), lambda match: '--></script><img src'),
+                           (re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
+                           (re.compile(r'var video=', re.DOTALL|re.IGNORECASE), lambda match: '<script language="Javascript" type="text/javascript"><!--'),
+
+                           # Para que no salga la numeración de comentarios: 1, 2, 3 ...
+
+                           (re.compile(r'<ol>\n<li style="z-index:', re.DOTALL|re.IGNORECASE), lambda match: '<ul><li style="z-index:'),
+                           (re.compile(r'</ol>\n<div class="num_comentarios estirar">', re.DOTALL|re.IGNORECASE), lambda match: '</ul><div class="num_comentarios estirar">'),
+                         ]
+
+    # Obtener la imagen de portada
+
+    def get_cover_url(self):
+       cover = None
+       st = time.localtime()
+       year = str(st.tm_year)
+       month = "%.2d" % st.tm_mon
+       day = "%.2d" % st.tm_mday
+		#http://img.kiosko.net/2011/11/19/es/elmundo.750.jpg
+       cover='http://img.kiosko.net/'+ year + '/' +  month + '/' + day +'/es/elmundo.750.jpg'
+       br = BasicNewsRecipe.get_browser()
+       try:
+           br.open(cover)
+       except:
+           self.log("\nPortada no disponible")
+           cover ='http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png'
+       return cover 
--- a/recipes/emuzica_pl.recipe
+++ b/recipes/emuzica_pl.recipe
@ -0,0 +1,16 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class eMuzyka(BasicNewsRecipe):
+    title          = u'eMuzyka'
+    __author__        = 'fenuks'
+    description   = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
+    category       = 'music'
+    language       = 'pl'
+    cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
+    no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
+    remove_tags=[dict(name='span', attrs={'id':'date'})]
+    feeds          = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
--- a/recipes/espn.recipe
+++ b/recipes/espn.recipe
@ -20,7 +20,7 @@ class ESPN(BasicNewsRecipe):

    use_embedded_content = False
    remove_javascript     = True
-    needs_subscription = True
+    needs_subscription = 'optional'
    encoding= 'ISO-8859-1'

    remove_tags_before = dict(name='font', attrs={'class':'date'})
@ -75,32 +75,30 @@ class ESPN(BasicNewsRecipe):

        return soup

-
-
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
-        br.set_handle_refresh(False)
-        url = ('https://r.espn.go.com/members/v3_1/login')
-        raw = br.open(url).read()
-        raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
-        with TemporaryFile(suffix='.htm') as fname:
-            with open(fname, 'wb') as f:
-                f.write(raw)
-            br.open_local_file(fname)
+        if self.username and self.password:
+            br.set_handle_refresh(False)
+            url = ('https://r.espn.go.com/members/v3_1/login')
+            raw = br.open(url).read()
+            raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
+            with TemporaryFile(suffix='.htm') as fname:
+                with open(fname, 'wb') as f:
+                    f.write(raw)
+                br.open_local_file(fname)

-        br.form = br.forms().next()
-        br.form.find_control(name='username', type='text').value = self.username
-        br.form['password'] = self.password
-        br.submit().read()
-        br.open('http://espn.go.com').read()
-        br.set_handle_refresh(True)
+            br.form = br.forms().next()
+            br.form.find_control(name='username', type='text').value = self.username
+            br.form['password'] = self.password
+            br.submit().read()
+            br.open('http://espn.go.com').read()
+            br.set_handle_refresh(True)
        return br

    def get_article_url(self, article):
        return article.get('guid',  None)

    def print_version(self, url):
-
        if 'eticket' in url:
            return url.partition('&')[0].replace('story?', 'print?')
        match = re.search(r'story\?(id=\d+)', url)
--- a/recipes/expansion_spanish.recipe
+++ b/recipes/expansion_spanish.recipe
@ -1,35 +1,43 @@
 #!/usr/bin/env  python
-__license__   = 'GPL v3'
-__author__    = 'Gerardo Diez'
-__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
-description   = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
-__docformat__ = 'restructuredtext en'
+__license__     = 'GPL v3'
+__copyright__   = '5, January 2011 Gerardo Diez<gerardo.diez.garcia@gmail.com> & desUBIKado'
+__author__      = 'desUBIKado, based on an earlier version by Gerardo Diez'
+__version__     = 'v1.01'
+__date__        = '13, November 2011'

 '''
-expansion.es
+[url]http://www.expansion.com/[/url]
 '''
+
+import time
+import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
-class Publico(BasicNewsRecipe):
-    title               =u'Expansion.com'
-    __author__      ='Gerardo Diez'
-    publisher       =u'Unidad Editorial Información Económica, S.L.'
-    category                ='finances, catalunya'
-    oldest_article      =1
+
+class expansion_spanish(BasicNewsRecipe):
+    __author__      ='Gerardo Diez & desUBIKado'
+    description     ='Financial news from Spain'
+    title           =u'Expansion'
+    publisher       =u'Unidad Editorial Internet, S.L.'
+    category        ='news, finances, Spain'
+    oldest_article  = 2
+    simultaneous_downloads = 10
    max_articles_per_feed   =100
-    simultaneous_downloads  =10
-    cover_url       =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
-    timefmt         ='[%A, %d %B, %Y]'
-    encoding        ='latin'
+    timefmt         = '[%a, %d %b, %Y]'
+    encoding        ='iso-8859-15'
    language        ='es'
-    remove_javascript   =True
-    no_stylesheets      =True
+    use_embedded_content  = False
+    remove_javascript     = True
+    no_stylesheets        = True
+    remove_empty_feeds    = True
+
    keep_only_tags      =dict(name='div', attrs={'class':['noticia primer_elemento']})
+
    remove_tags         =[
-                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
-                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
+                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto','tit_relacionadas','interact','paginacion estirar','sumario derecha']}),
+                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia','not_logged']}),
                dict(name='span', attrs={'class':['comentarios']}),
                dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
-                dict(name='div', attrs={'id':['comentarios_lectores_listado']})
+                dict(name='div', attrs={'id':['comentarios_lectores_listado','comentar']})
                            ]
    feeds               =[
                (u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
@ -38,42 +46,112 @@ class Publico(BasicNewsRecipe):
                (u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
                (u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
                (u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),
-
                (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
                (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
-                (u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
+                (u'Cr\xe9ditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
                (u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
-                (u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
+                (u'Fondos de Inversi\xf3n', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
                (u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),
-
                (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
                (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
                (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
-                (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
-                (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
+                (u'Energ\xeda', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
+                (u'Inmobiliario y Construcci\xf3n', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
                (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
-                (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
-                (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
-                (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
+                (u'Automoci\xf3n e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
+                (u'Distribuci\xf3n', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
+                (u'Deporte y Negocio', u' [url]http://estaticos.expansion.com/rss/empresasdeporte.xml[/url]'),
                (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
                (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
                (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),
-
-                (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
-                (u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
+                (u'Portada: Econom\xeda y Pol\xedtica', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
+                (u'Pol\xedtica', u'http://estaticos.expansion.com/rss/economia.xml'),
                (u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),
-
-                (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
+                (u'Portada: Opini\xf3n', u'http://estaticos.expansion.com/rss/opinion.xml'),
                (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
                (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),
-
-                (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
+                (u'Portada: Jur\xeddico', u'http://estaticos.expansion.com/rss/juridico.xml'),
                (u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
-                (u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
+                (u'Opini\xf3n', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
                (u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),
-
                (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
-                (u'Catalu&ntilde;a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
-                (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
+                (u'Catalu\xf1a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
+                (u'Funci\xf3n p\xfablica', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
                ]

+    # Obtener la imagen de portada
+
+    def get_cover_url(self):
+       cover = None
+       st = time.localtime()
+       year = str(st.tm_year)
+       month = "%.2d" % st.tm_mon
+       day = "%.2d" % st.tm_mday
+		#[url]http://img5.kiosko.net/2011/11/14/es/expansion.750.jpg[/url]
+       cover='http://img5.kiosko.net/'+ year + '/' +  month + '/' + day +'/es/expansion.750.jpg'
+       br = BasicNewsRecipe.get_browser()
+       try:
+           br.open(cover)
+       except:
+           self.log("\nPortada no disponible")
+           cover ='http://www.aproahp.org/enlaces/images/diario_expansion.gif'
+       return cover
+
+
+
+    # Para que no salte la publicidad al recuperar la noticia, y que siempre se recupere
+    # la página web, mando la variable "t" con la hora "linux" o "epoch" actual
+    # haciendole creer al sitio web que justo se acaba de ver la publicidad
+
+    def print_version(self, url):
+           st = time.time()
+           segundos = str(int(st))
+           parametros = '.html?t=' + segundos
+           return url.replace('.html', parametros)
+
+
+
+    _processed_links = []
+
+    def get_article_url(self, article):
+
+       # Para obtener la url original del artículo a partir de la de "feedsportal"
+
+       link = article.get('link', None)
+       if link is None:
+           return article
+       if link.split('/')[-1]=="story01.htm":
+           link=link.split('/')[-2]
+           a=['0B','0C','0D','0E','0F','0G','0N'  ,'0L0S','0A']
+           b=['.' ,'/' ,'?' ,'-' ,'=' ,'&' ,'.com','www.','0']
+           for i in range(0,len(a)):
+              link=link.replace(a[i],b[i])
+           link="http://"+link
+
+       # Eliminar artículos duplicados en otros feeds
+
+       if not (link in self._processed_links):
+            self._processed_links.append(link)
+       else:
+            link = None
+
+       return link
+
+
+
+    # Un poco de css para mejorar la presentación de las noticias
+
+    extra_css = '''
+                    .entradilla {font-family:Arial,Helvetica,sans-serif; font-weight:bold; font-style:italic; font-size:16px;}
+                    .fecha_publicacion,.autor {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
+                '''
+
+
+
+    # Para presentar la imagen de los videos incrustados
+
+    preprocess_regexps = [
+                           (re.compile(r'var imagen', re.DOTALL|re.IGNORECASE), lambda match: '--></script><img src'),
+                           (re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
+                           (re.compile(r'var id_reproductor', re.DOTALL|re.IGNORECASE), lambda match: '<script language="Javascript" type="text/javascript"><!--'),
+                         ]
--- a/recipes/fhm_uk.recipe
+++ b/recipes/fhm_uk.recipe
@ -0,0 +1,30 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1325006965(BasicNewsRecipe):
+    title          = u'FHM UK'
+    description = 'Good News for Men'
+    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
+    masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
+    __author__ = 'Dave Asbury'
+    # last updated 27/12/11
+    language = 'en_GB'
+    oldest_article = 28
+    max_articles_per_feed = 12
+    remove_empty_feeds = True
+    no_stylesheets = True
+    #auto_cleanup = True
+    #articles_are_obfuscated = True
+    keep_only_tags = [
+               dict(name='h1'),
+               dict(name='img',attrs={'id' : 'ctl00_Body_imgMainImage'}),
+               dict(name='div',attrs={'id' : ['articleLeft']}),
+                               dict(name='div',attrs={'class' : ['imagesCenterArticle','containerCenterArticle','articleBody']}),
+
+        ]
+
+
+    feeds          = [
+    (u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
+    (u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
+    (u'Gaming',u'http://feed43.com/0755006465351035.xml'),
+            ]
--- a/recipes/fisco_oggi.recipe
+++ b/recipes/fisco_oggi.recipe
@ -0,0 +1,18 @@
+__license__   = 'GPL v3'
+__author__    = 'faber1971'
+description   = 'Website of Italian Governament Income Agency (about revenue, taxation, taxes)- v1.00 (17, December 2011)'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1324112023(BasicNewsRecipe):
+    title          = u'Fisco Oggi'
+    language = 'it'
+    __author__ = 'faber1971'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    remove_javascript = True
+    no_stylesheets = True
+
+    feeds          = [(u'Attualit\xe0', u'http://www.fiscooggi.it/taxonomy/term/1/feed'), (u'Normativa', u'http://www.fiscooggi.it/taxonomy/term/5/feed'), (u'Giurisprudenza', u'http://www.fiscooggi.it/taxonomy/term/8/feed'), (u'Dati e statistiche', u'http://www.fiscooggi.it/taxonomy/term/12/feed'), (u'Analisi e commenti', u'http://www.fiscooggi.it/taxonomy/term/13/feed'), (u'Bilancio e contabilit\xe0', u'http://www.fiscooggi.it/taxonomy/term/576/feed'), (u'Dalle regioni', u'http://www.fiscooggi.it/taxonomy/term/16/feed'), (u'Dal mondo', u'http://www.fiscooggi.it/taxonomy/term/17/feed')]
+
--- a/recipes/focus_pl.recipe
+++ b/recipes/focus_pl.recipe
@ -1,57 +1,68 @@
-# -*- coding: utf-8 -*-
+import re
+
 from calibre.web.feeds.news import BasicNewsRecipe

-class Focus_pl(BasicNewsRecipe):
-    title          = u'Focus.pl'
-    oldest_article = 15
-    max_articles_per_feed = 100
-    __author__        = 'fenuks'
-    language       = 'pl'
-    description ='polish scientific monthly magazine'
+class FocusRecipe(BasicNewsRecipe):
+    __license__ = 'GPL v3'
+    __author__ = u'intromatyk <intromatyk@gmail.com>'
+    language = 'pl'
+    version = 1
+
+    title = u'Focus'
+    publisher = u'Gruner + Jahr Polska'
+    category = u'News'
+    description = u'Newspaper'
    category='magazine'
    cover_url=''
    remove_empty_feeds= True
    no_stylesheets=True
-    remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'})
-    remove_tags_after=dict(name='div', attrs={'class':'clear'})
-    feeds          = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'),
-	(u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
-	(u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
-	(u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
-	(u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
-	(u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
-	(u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
-	(u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
-	(u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'),
+    oldest_article = 7
+    max_articles_per_feed = 100000
+    recursions = 0
+
+    no_stylesheets = True
+    remove_javascript = True
+    encoding = 'utf-8'
+    # Seems to work best, but YMMV
+    simultaneous_downloads = 5
+
+    r = re.compile('.*(?P<url>http:\/\/(www.focus.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'cll'}))
+    
+    remove_tags =[]
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulm noprint'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'txb'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'h2'}))
+    remove_tags.append(dict(name = 'ul', attrs = {'class' : 'txu'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulc'}))
+
+    extra_css = '''
+                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
+                    h1{text-align: left;}
+                    h2{font-size: medium; font-weight: bold;}
+                    p.lead {font-weight: bold; text-align: left;}
+                    .authordate {font-size: small; color: #696969;}
+                    .fot{font-size: x-small; color: #666666;}
+                    '''    


-
-]
+    feeds          = [
+                            ('Nauka', 'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
+                            ('Historia', 'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
+                            ('Cywilizacja', 'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
+                            ('Sport', 'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
+                            ('Technika', 'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
+                            ('Przyroda', 'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
+                            ('Technologie', 'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),                            
+                          ]

    def skip_ad_pages(self, soup):
-          tag=soup.find(name='a')
-          if tag:
-            new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True)
-            return new_soup
-
-    def append_page(self, appendtag):
-        tag=appendtag.find(name='div', attrs={'class':'arrows'})
-        if tag:
-            nexturl='http://www.focus.pl/'+tag.a['href']
-            for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
-                rem.extract()
-            while nexturl:
-                 soup2=self.index_to_soup(nexturl)
-                 nexturl=None
-                 pagetext=soup2.find(name='div', attrs={'class':'txt'})
-                 tag=pagetext.find(name='div', attrs={'class':'arrows'})
-                 for r in tag.findAll(name='a'):
-                     if u'Następne' in r.string:
-                         nexturl='http://www.focus.pl/'+r['href']
-                 for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
-                     rem.extract()
-                 pos = len(appendtag.contents)
-                 appendtag.insert(pos, pagetext)
+        if ('advertisement' in soup.find('title').string.lower()):
+            href = soup.find('a').get('href')
+            return self.index_to_soup(href, raw=True)
+        else:
+            return None

    def get_cover_url(self):
        soup=self.index_to_soup('http://www.focus.pl/magazyn/')
@ -60,7 +71,14 @@ class Focus_pl(BasicNewsRecipe):
            self.cover_url='http://www.focus.pl/' + tag.a['href']
            return getattr(self, 'cover_url', self.cover_url)

-
-    def preprocess_html(self, soup):
-         self.append_page(soup.body)
-         return soup
+    def print_version(self, url):
+     if url.count ('focus.pl.feedsportal.com'):
+            u = url.find('focus0Bpl')
+            u = 'http://www.focus.pl/' + url[u + 11:]
+            u = u.replace('0C', '/')
+            u = u.replace('A', '')
+            u = u.replace ('0E','-')
+            u = u.replace('/nc/1//story01.htm', '/do-druku/1')
+     else:
+            u = url.replace('/nc/1','/do-druku/1')           
+     return u
--- a/recipes/folhadesaopaulo_sub.recipe
+++ b/recipes/folhadesaopaulo_sub.recipe
@ -8,31 +8,35 @@ class FSP(BasicNewsRecipe):
    __author__ = 'fluzao'
    description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
                  u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
-    INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
+
+    #found this to be the easiest place to find the index page (13-Nov-2011).
+    #  searching for the "Indice Geral" link
+    HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
+    masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
+
    language = 'pt'
    no_stylesheets = True
    max_articles_per_feed  = 40
    remove_javascript     = True
    needs_subscription = True
-    remove_tags_before = dict(name='b')
+
+    remove_tags_before = dict(name='p')
    remove_tags  = [dict(name='td', attrs={'align':'center'})]
    remove_attributes = ['height','width']
-    masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
-
    # fixes the problem with the section names
    section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \
                    'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \
                    'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \
-                    'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio'}
+                    'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio', \
+                    'imoveis' : u'im\xf3veis', 'negocios' : u'neg\xf3cios', \
+                    'veiculos' : u've\xedculos', 'corrida' : 'folha corrida'}

    # this solves the problem with truncated content in Kindle
    conversion_options = {'linearize_tables' : True}

    # this bit removes the footer where there are links for Proximo Texto, Texto Anterior,
    #    Indice e Comunicar Erros
-    preprocess_regexps = [(re.compile(r'<BR><BR>Texto Anterior:.*<!--/NOTICIA-->',
-                                      re.DOTALL|re.IGNORECASE), lambda match: r''),
-                          (re.compile(r'<BR><BR>Pr&oacute;ximo Texto:.*<!--/NOTICIA-->',
+    preprocess_regexps = [(re.compile(r'<!--/NOTICIA-->.*Comunicar Erros</a>',
                                      re.DOTALL|re.IGNORECASE), lambda match: r'')]

    def get_browser(self):
@ -49,7 +53,25 @@ class FSP(BasicNewsRecipe):


    def parse_index(self):
-        soup = self.index_to_soup(self.INDEX)
+        #Searching for the index page on the HOMEPAGE
+        hpsoup = self.index_to_soup(self.HOMEPAGE)
+        indexref = hpsoup.find('a', href=re.compile('^indices.*'))
+        self.log('--> tag containing the today s index: ', indexref)
+        INDEX = indexref['href']
+        INDEX = 'http://www1.folha.uol.com.br/fsp/'+INDEX
+        self.log('--> INDEX after extracting href and adding prefix: ', INDEX)
+        # ... and taking the opportunity to get the cover image link
+        coverurl = hpsoup.find('a', href=re.compile('^cp.*'))['href']
+        if coverurl:
+            self.log('--> tag containing the today s cover: ', coverurl)
+            coverurl = coverurl.replace('htm', 'jpg')
+            coverurl = 'http://www1.folha.uol.com.br/fsp/images/'+coverurl
+            self.log('--> coverurl after extracting href and adding prefix: ', coverurl)
+            self.cover_url = coverurl
+
+        #soup = self.index_to_soup(self.INDEX)
+        soup = self.index_to_soup(INDEX)
+
        feeds = []
        articles = []
        section_title = "Preambulo"
@ -68,8 +90,12 @@ class FSP(BasicNewsRecipe):
                self.log('--> new section title:   ', section_title)
            if strpost.startswith('<a href'):
                url = post['href']
+                #this bit is kept if they ever go back to the old format (pre Nov-2011)
                if url.startswith('/fsp'):
                    url = 'http://www1.folha.uol.com.br'+url
+                #
+                if url.startswith('http://www1.folha.uol.com.br/fsp'):
+                    #url = 'http://www1.folha.uol.com.br'+url
                    title = self.tag_to_string(post)
                    self.log()
                    self.log('--> post:  ', post)
@ -82,15 +108,11 @@ class FSP(BasicNewsRecipe):
        # keeping the front page url
        minha_capa = feeds[0][1][1]['url']

-        # removing the 'Preambulo' section
+        # removing the first section (now called 'top')
        del feeds[0]

-        # creating the url for the cover image
-        coverurl = feeds[0][1][0]['url']
-        coverurl = coverurl.replace('/opiniao/fz', '/images/cp')
-        coverurl = coverurl.replace('01.htm', '.jpg')
-        self.cover_url = coverurl
-
        # inserting the cover page as the first article (nicer for kindle users)
        feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}]))
        return feeds
+
+
--- a/recipes/formulaas.recipe
+++ b/recipes/formulaas.recipe
@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+formula-as.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class FormulaAS(BasicNewsRecipe):
+    title                 = u'Formula AS'
+    __author__            = u'Silviu Cotoar\u0103'
+    publisher             = u'Formula AS'
+    description           = u'Formula AS'
+    oldest_article        = 5
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare,Romania'
+    encoding              = 'utf-8'
+    cover_url        	  = 'http://www.formula-as.ro/_client/img/header_logo.png'
+
+    conversion_options = {
+                'comments'    : description
+                ,'tags'       : category
+                ,'language'   : language
+                ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [
+                      dict(name='div', attrs={'class':'item padded'})					 
+                     ]
+
+    remove_tags = [
+					dict(name='ul', attrs={'class':'subtitle lower'})
+                  ]
+
+    remove_tags_after = [
+			         dict(name='ul', attrs={'class':'subtitle lower'}),
+					 dict(name='div', attrs={'class':'item-brief-options'})					 
+	               ]
+    feeds  = [
+        (u'\u0218tiri', u'http://www.formula-as.ro/rss/articole.xml')
+         ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/recipes/frazpc.recipe
+++ b/recipes/frazpc.recipe
@ -18,7 +18,7 @@ class FrazPC(BasicNewsRecipe):
    max_articles_per_feed = 100
    use_embedded_content = False
    no_stylesheets = True
-
+    cover_url='http://www.frazpc.pl/images/logo.png'
    feeds          = [
        (u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'), 
        (u'Artyku\u0142y', u'http://www.frazpc.pl/feed/artykuly')
@ -33,6 +33,7 @@ class FrazPC(BasicNewsRecipe):
        dict(name='div', attrs={'class':'comments_box'})
    ]

+    remove_tags_after=dict(name='div', attrs={'class':'content'})
    preprocess_regexps = [(re.compile(r'\| <a href="#comments">Komentarze \([0-9]*\)</a>'), lambda match: '')]

    remove_attributes = [ 'width', 'height' ]
--- a/recipes/gazeta_pl_szczecin.recipe
+++ b/recipes/gazeta_pl_szczecin.recipe
@ -0,0 +1,35 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+import re
+import string
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class GazetaPlSzczecin(BasicNewsRecipe):
+    title          = u'Gazeta.pl Szczecin'
+    description    = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
+    __author__     = u'Michał Szkutnik'
+    __license__    = u'GPL v3'
+    language       = 'pl'
+    publisher      = 'Agora S.A.'
+    category       = 'news, szczecin'
+    oldest_article = 2
+    max_articles_per_feed = 100
+    auto_cleanup   = True
+    remove_tags    = [ { "name" : "a", "attrs" : { "href" : "http://szczecin.gazeta.pl/szczecin/www.gazeta.pl" }}]
+    cover_url      = "http://bi.gazeta.pl/i/hp/hp2009/logo.gif"
+    feeds          = [(u'Wszystkie', u'http://rss.feedsportal.com/c/32739/f/530434/index.rss')]
+
+    def get_article_url(self, article):
+        s = re.search("""/0L(szczecin.*)/story01.htm""", article.link)
+        s = s.group(1)
+        replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I"  : "_"}
+        for (a, b) in replacements.iteritems():
+            s = string.replace(s, a, b)
+        s = string.replace(s, "0A", "0")
+        return "http://"+s
+
+    def print_version(self, url):
+        s = re.search("""/(\d*),(\d*),(\d*),.*\.html""", url)
+        no1 = s.group(2)
+        no2 = s.group(3)
+        return """http://szczecin.gazeta.pl/szczecin/2029020,%s,%s.html""" % (no1, no2)
--- a/recipes/givemesomethingtoread.recipe
+++ b/recipes/givemesomethingtoread.recipe
@ -0,0 +1,90 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class GiveMeSomethingToRead(BasicNewsRecipe):
+    title          = u'Give Me Something To Read'
+    description    = 'Curation / aggregation of articles on diverse topics'
+    language = 'en'
+    __author__     = 'barty on mobileread.com forum'
+    max_articles_per_feed = 100
+    no_stylesheets = False
+    timefmt        = ' [%a, %d %b, %Y]'
+    oldest_article = 365
+    auto_cleanup   = True
+    INDEX          = 'http://givemesomethingtoread.com'
+    CATEGORIES     = [
+        # comment out categories you don't want
+        # (user friendly name, system name, max number of articles to load)
+        ('The Arts','arts',25),
+        ('Science','science',30),
+        ('Technology','technology',30),
+        ('Politics','politics',20),
+        ('Media','media',30),
+        ('Crime','crime',15),
+        ('Other articles','',10)
+        ]
+
+    def parse_index(self):
+        self.cover_url = 'http://thegretchenshow.files.wordpress.com/2009/12/well-read-cat-small.jpg'
+        feeds = []
+        seen_urls = set([])
+        regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
+
+        for category in self.CATEGORIES:
+
+            (cat_name, tag, max_articles) = category
+
+            tagurl = '' if tag=='' else '/tagged/'+tag
+            self.log('Reading category:', cat_name)
+
+            articles = []
+            pageno = 1
+
+            while len(articles) < max_articles and pageno < 100:
+
+                page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
+                pageno += 1
+
+                self.log('\tReading page:', page)
+                try:
+                    soup = self.index_to_soup(page)
+                except:
+                    break
+
+                headers = soup.findAll('h2')
+                if len(headers) == .0:
+                    break
+
+                for header in headers:
+                    atag = header.find('a')
+                    url = atag['href']
+                    # skip promotionals and duplicate
+                    if url.startswith('http://givemesomethingtoread') or url.startswith('/') or url in seen_urls:
+                        continue
+                    seen_urls.add(url)
+                    title = self.tag_to_string(header)
+                    self.log('\tFound article:', title)
+                    #self.log('\t', url)
+                    desc = header.parent.find('blockquote')
+                    desc = self.tag_to_string(desc) if desc else ''
+                    m = regex.match( url)
+                    if m:
+                        desc = "[%s] %s" %  (m.group(2), desc)
+                    #self.log('\t', desc)
+                    date = ''
+                    p = header.parent.previousSibling
+                    # navigate up to find h3, which contains the date
+                    while p:
+                        if hasattr(p,'name') and p.name == 'h3':
+                            date = self.tag_to_string(p)
+                            break
+                        p = p.previousSibling
+                    articles.append({'title':title,'url':url,'description':desc,'date':date})
+                    if len(articles) >= max_articles:
+                        break
+
+            if articles:
+                feeds.append((cat_name, articles))
+
+        return feeds
+
--- a/recipes/glasgow_herald.recipe
+++ b/recipes/glasgow_herald.recipe
@ -1,4 +1,3 @@
-
 from calibre.web.feeds.news import BasicNewsRecipe

 class GlasgowHerald(BasicNewsRecipe):
@ -9,12 +8,16 @@ class GlasgowHerald(BasicNewsRecipe):
    language = 'en_GB'

    __author__     = 'Kovid Goyal'
+    use_embedded_content = False

-    keep_only_tags = [dict(attrs={'class':'article'})]
-    remove_tags = [
-            dict(id=['pic-nav']),
-            dict(attrs={'class':['comments-top']})
-            ]
+    no_stylesheets = True
+    auto_cleanup = True
+
+    #keep_only_tags = [dict(attrs={'class':'article'})]
+    #remove_tags = [
+            #dict(id=['pic-nav']),
+            #dict(attrs={'class':['comments-top']})
+            #]


    feeds          = [
@ -25,5 +28,4 @@ class GlasgowHerald(BasicNewsRecipe):
                        (u'Arts & Entertainment',
                        u'http://www.heraldscotland.com/cmlink/1.768',),
                        (u'Columnists', u'http://www.heraldscotland.com/cmlink/1.658574')]
-
-
+ 
--- a/recipes/globe_and_mail.recipe
+++ b/recipes/globe_and_mail.recipe
@ -51,6 +51,13 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
            {'class':['articleTools', 'pagination', 'Ads', 'topad',
                'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]

+    def populate_article_metadata(self, article, soup, first):
+        if first and hasattr(self, 'add_toc_thumbnail'):
+            picdiv = soup.find('img')
+            if picdiv is not None:
+                self.add_toc_thumbnail(article,picdiv['src'])
+
+
    #Use the mobile version rather than the web version
    def print_version(self, url):
        return url.rpartition('?')[0] + '?service=mobile'
--- a/recipes/goal.recipe
+++ b/recipes/goal.recipe
@ -0,0 +1,13 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1325677767(BasicNewsRecipe):
+    title          = u'Goal'
+    oldest_article = 1
+    language = 'it'
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    remove_tags_after = [dict(id='article_content')]
+    feeds          = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')]
+    __author__      = 'faber1971'
+    description    = 'Sports news from Italy'
+
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@ -12,7 +12,6 @@ class GN(BasicNewsRecipe):
        EDITION = 0

        __author__ = 'Piotr Kontek'
-        title = u'Gość niedzielny'
        description = 'Weekly magazine'
        encoding = 'utf-8'
        no_stylesheets = True
@ -20,6 +19,8 @@ class GN(BasicNewsRecipe):
        remove_javascript = True
        temp_files = []
        simultaneous_downloads = 1
+        masthead_url = 'http://gosc.pl/files/11/03/12/949089_top.gif'
+        title = u'Gość niedzielny'

        articles_are_obfuscated = True

@ -64,7 +65,6 @@ class GN(BasicNewsRecipe):
                    if img != None:
                        a = img.parent
                        self.EDITION = a['href']
-                        self.title = img['alt']
                        self.cover_url = 'http://www.gosc.pl' + img['src']
                        if not first:
                            break
--- a/recipes/grantland.recipe
+++ b/recipes/grantland.recipe
@ -0,0 +1,76 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class GrantLand(BasicNewsRecipe):
+    title          = u"Grantland"
+    description    = 'Writings on Sports & Pop Culture'
+    language       = 'en'
+    __author__     = 'barty on mobileread.com forum'
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    # auto_cleanup is too aggressive sometimes and we end up with blank articles
+    auto_cleanup   = False
+    timefmt        = ' [%a, %d %b %Y]'
+    oldest_article = 90
+
+    cover_url      = 'http://cdn0.sbnation.com/imported_assets/740965/blog_grantland_grid_3.jpg'
+    masthead_url   = 'http://a1.espncdn.com/prod/assets/grantland/grantland-logo.jpg'
+
+    INDEX          = 'http://www.grantland.com'
+    CATEGORIES     = [
+        # comment out second line if you don't want older articles
+        # (user friendly name, url suffix, max number of articles to load)
+        ('Today in Grantland','',20),
+        ('In Case You Missed It','incaseyoumissedit',35),
+        ]
+
+    remove_tags    = [
+        {'name':['style','aside','nav','footer','script']},
+        {'name':'h1','text':'Grantland'},
+        {'id':['header','col-right']},
+        {'class':['connect_widget']},
+        {'name':'section','class':re.compile(r'\b(ad|module)\b')},
+        ]
+
+    preprocess_regexps = [
+        # remove blog banners
+        (re.compile(r'<a href="/blog/(?:(?!</a>).)+</a>', re.DOTALL|re.IGNORECASE), lambda m: ''),
+        ]
+
+    def parse_index(self):
+        feeds = []
+        seen_urls = set([])
+
+        for category in self.CATEGORIES:
+
+            (cat_name, tag, max_articles) = category
+            self.log('Reading category:', cat_name)
+            articles = []
+
+            page = "%s/%s" % (self.INDEX, tag)
+            soup = self.index_to_soup(page)
+
+            main = soup.find('div',id='col-main')
+            if main is None:
+                main = soup
+
+            for tag in main.findAll('a', href=re.compile(r'(story|post)/_/id/\d+')):
+                url = tag['href']
+                if url in seen_urls:
+                    continue
+                title = tag.string
+                # blank title probably means <a href=".."><img /></a>.  skip
+                if not title:
+                    continue
+                self.log('\tFound article:', title)
+                self.log('\t', url)
+                articles.append({'title':title,'url':url})
+                seen_urls.add(url)
+
+                if len(articles) >= max_articles:
+                    break
+
+            if articles:
+                feeds.append((cat_name, articles))
+
+        return feeds
--- a/recipes/gs24_pl.recipe
+++ b/recipes/gs24_pl.recipe
@ -0,0 +1,43 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+import re
+import string
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1322322819(BasicNewsRecipe):
+    title          = u'GS24.pl (Głos Szczeciński)'
+    description    = u'Internetowy serwis Głosu Szczecińskiego'
+    __author__     = u'Michał Szkutnik'
+    __license__    = u'GPL v3'
+    language       = 'pl'
+    publisher      = 'Media Regionalne sp. z o.o.'
+    category       = 'news, szczecin'
+    oldest_article = 2
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    cover_url = "http://www.gs24.pl/images/top_logo.png"
+
+    feeds          = [
+    # (u'Wszystko', u'http://www.gs24.pl/rss.xml'),
+     (u'Szczecin', u'http://www.gs24.pl/szczecin.xml'),
+     (u'Stargard', u'http://www.gs24.pl/stargard.xml'),
+     (u'Świnoujście', u'http://www.gs24.pl/swinoujscie.xml'),
+     (u'Goleniów', u'http://www.gs24.pl/goleniow.xml'),
+     (u'Gryfice', u'http://www.gs24.pl/gryfice.xml'),
+     (u'Kamień Pomorski', u'http://www.gs24.pl/kamienpomorski.xml'),
+     (u'Police', u'http://www.gs24.pl/police.xml'),
+     (u'Region', u'http://www.gs24.pl/region.xml'),
+     (u'Sport', u'http://www.gs24.pl/sport.xml'),
+                    ]
+
+    def get_article_url(self, article):
+        s = re.search("""/0L0S(gs24.*)/story01.htm""", article.link)
+        s = s.group(1)
+        replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I"  : "_", "0D" : "?", "0F" : "="}
+        for (a, b) in replacements.iteritems():
+            s = string.replace(s, a, b)
+        s = string.replace(s, "0A", "0")
+        return "http://"+s
+
+    def print_version(self, url):
+        return url + "&Template=printpicart"
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -9,6 +9,7 @@ www.guardian.co.uk
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from datetime import date
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class Guardian(BasicNewsRecipe):

@ -16,16 +17,19 @@ class Guardian(BasicNewsRecipe):
    if date.today().weekday() == 6:
        base_url = "http://www.guardian.co.uk/theobserver"
        cover_pic = 'Observer digital edition'
+        masthead_url = 'http://static.guim.co.uk/sys-images/Guardian/Pix/site_furniture/2010/10/19/1287478087992/The-Observer-001.gif'
    else:
        base_url = "http://www.guardian.co.uk/theguardian"
        cover_pic = 'Guardian digital edition'
+        masthead_url = 'http://static.guim.co.uk/static/f76b43f9dcfd761f0ecf7099a127b603b2922118/common/images/logos/the-guardian/titlepiece.gif'

    __author__ = 'Seabound and Sujata Raman'
    language = 'en_GB'

-    oldest_article = 7
-    max_articles_per_feed = 100
-    remove_javascript = True
+    oldest_article              = 7
+    max_articles_per_feed       = 100
+    remove_javascript           = True
+    encoding                    = 'utf-8'

    # List of section titles to ignore
    # For example: ['Sport']
@ -41,6 +45,16 @@ class Guardian(BasicNewsRecipe):
                        dict(name='div', attrs={'class':["guardian-tickets promo-component",]}),
                        dict(name='ul', attrs={'class':["pagination"]}),
                        dict(name='ul', attrs={'id':["content-actions"]}),
+                        # article history link
+                        dict(name='a', attrs={'class':["rollover history-link"]}),
+                        # "a version of this article ..." speil
+                        dict(name='div' , attrs = { 'class' : ['section']}),
+                        # "about this article" js dialog
+                        dict(name='div', attrs={'class':["share-top",]}),
+                        # author picture
+                        dict(name='img', attrs={'class':["contributor-pic-small"]}),
+                        # embedded videos/captions
+                        dict(name='span',attrs={'class' : ['inline embed embed-media']}),
                        #dict(name='img'),
                        ]
    use_embedded_content    = False
@ -65,8 +79,21 @@ class Guardian(BasicNewsRecipe):
              url = None
          return url

+    def populate_article_metadata(self, article, soup, first):
+        if first and hasattr(self, 'add_toc_thumbnail'):
+            picdiv = soup.find('img')
+            if picdiv is not None:
+                self.add_toc_thumbnail(article,picdiv['src'])
+
    def preprocess_html(self, soup):

+          # multiple html sections in soup, useful stuff in the first
+          html = soup.find('html')
+          soup2 = BeautifulSoup()
+          soup2.insert(0,html) 
+          
+          soup = soup2  
+          
          for item in soup.findAll(style=True):
              del item['style']

@ -74,7 +101,18 @@ class Guardian(BasicNewsRecipe):
              del item['face']
          for tag in soup.findAll(name=['ul','li']):
                tag.name = 'div'
-
+         
+         # removes number next to rating stars
+          items_to_remove = []
+          rating_container = soup.find('div', attrs = {'class': ['rating-container']})
+          if rating_container:
+            for item in rating_container:
+                if isinstance(item, Tag) and str(item.name) == 'span':
+                    items_to_remove.append(item)
+          
+          for item in items_to_remove:
+            item.extract()
+          
          return soup

    def find_sections(self):
--- a/recipes/hackernews.recipe
+++ b/recipes/hackernews.recipe
@ -9,9 +9,9 @@ from calibre.ptempfile import PersistentTemporaryFile
 from urlparse import urlparse
 import re

-class HackerNews(BasicNewsRecipe):
-    title                 = 'Hacker News'
-    __author__            = 'Tom Scholl'
+class HNWithCommentsLink(BasicNewsRecipe):
+    title                 = 'HN With Comments Link'
+    __author__            = 'Tom Scholl & David Kerschner'
    description           = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.'
    publisher             = 'Y Combinator'
    category              = 'news, programming, it, technology'
@ -80,6 +80,11 @@ class HackerNews(BasicNewsRecipe):
        body = body + comments
        return u'<html><title>' + title + u'</title><body>' + body + '</body></html>'

+    def parse_feeds(self):
+        a = super(HNWithCommentsLink, self).parse_feeds()
+        self.hn_articles = a[0].articles
+        return a
+
    def get_obfuscated_article(self, url):
        if url.startswith('http://news.ycombinator.com'):
            content = self.get_hn_content(url)
@ -97,6 +102,13 @@ class HackerNews(BasicNewsRecipe):
            else:
                content = self.get_readable_content(url)

+            article = 0
+            for a in self.hn_articles:
+                if a.url == url:
+                    article = a
+
+        content = re.sub(r'</body>\s*</html>\s*$', '', content) + article.summary + '</body></html>'
+
        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
        self.temp_files[-1].write(content)
        self.temp_files[-1].close()
--- a/recipes/haksoz.recipe
+++ b/recipes/haksoz.recipe
@ -0,0 +1,11 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BasicUserRecipe1324739199(BasicNewsRecipe):
+    title          = u'Haks\xf6z'
+    oldest_article = 7
+    max_articles_per_feed = 20
+    auto_cleanup = True
+    language = 'tr'
+    __author__ = 'asalet_r'
+
+    feeds          = [(u'Haks\xf6z', u'http://www.haksozhaber.net/rss/')]
--- a/recipes/hamilton_spectator.recipe
+++ b/recipes/hamilton_spectator.recipe
@ -0,0 +1,58 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+'''
+Hamilton Spectator Calibre Recipe
+'''
+class HamiltonSpectator(BasicNewsRecipe):
+    title = u'Hamilton Spectator'
+    oldest_article = 2
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    __author__ = u'Eric Coolman'
+    publisher = u'thespec.com'
+    description = u'Ontario Canada Newspaper'
+    category = u'News, Ontario, Canada'
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'en_CA'
+    encoding = 'utf-8'
+
+    feeds          = [
+		(u'Top Stories',u'http://www.thespec.com/rss?query=/&assetType=Article'),
+		(u'All News',u'http://www.thespec.com/rss?query=/news&assetType=Article'),
+		(u'Local',u'http://www.thespec.com/rss?query=/local&assetType=Article'),
+		(u'Ontario',u'http://www.thespec.com/rss?query=/ontario&assetType=Article'),
+		(u'Canada',u'http://www.thespec.com/rss?query=/canada&assetType=Article'),
+		(u'World News',u'http://www.thespec.com/rss?query=/world&assetType=Article'),
+		(u'Business',u'http://www.thespec.com/rss?query=/business&assetType=Article'),
+		(u'Crime',u'http://www.thespec.com/rss?query=/crime&assetType=Article'),
+		(u'All Sports',u'http://www.thespec.com/rss?query=/sports&assetType=Article'),
+		(u'Ticats',u'http://www.thespec.com/rss?query=/sports/ticats&assetType=Article'),
+		(u'Bulldogs',u'http://www.thespec.com/rss?query=/sports/bulldogs&assetType=Article'),
+		(u'High School Sports',u'http://www.thespec.com/rss?query=/sports/highschools&assetType=Article'),
+		(u'Local Sports',u'http://www.thespec.com/rss?query=/sports/local&assetType=Article'),
+		(u'What''s On',u'http://www.thespec.com/rss?query=/whatson&assetType=Article'),
+		(u'Arts and Entertainment',u'http://www.thespec.com/rss?query=/whatson/artsentertainment&assetType=Article'),
+		(u'Books',u'http://www.thespec.com/rss?query=/whatson/books&assetType=Article'),
+		(u'Movies',u'http://www.thespec.com/rss?query=/whatson/movies&assetType=Article'),
+		(u'Music',u'http://www.thespec.com/rss?query=/whatson/music&assetType=Article'),
+		(u'Restaurant Reviews',u'http://www.thespec.com/rss?query=/whatson/restaurants&assetType=Article'),
+		(u'Opinion',u'http://www.thespec.com/rss?query=/opinion&assetType=Article'),
+		(u'Opinion Columns',u'http://www.thespec.com/rss?query=/opinion/columns&assetType=Article'),
+		(u'Cartoons',u'http://www.thespec.com/rss?query=/opinion/cartoons&assetType=Article'),
+		(u'Letters',u'http://www.thespec.com/rss?query=/opinion/letters&assetType=Article'),
+		(u'Editorial',u'http://www.thespec.com/rss?query=/opinion/editorial&assetType=Article'),
+		(u'Community',u'http://www.thespec.com/rss?query=/community&assetType=Article'),
+		(u'Education',u'http://www.thespec.com/rss?query=/community/education&assetType=Article'),
+		(u'Faith',u'http://www.thespec.com/rss?query=/community/faith&assetType=Article'),
+		(u'Contests',u'http://www.thespec.com/rss?query=/community/contests&assetType=Article'),
+		(u'Living',u'http://www.thespec.com/rss?query=/living&assetType=Article'),
+		(u'Food',u'http://www.thespec.com/rss?query=/living/food&assetType=Article'),
+		(u'Health and Fitness',u'http://www.thespec.com/rss?query=/living/healthfitness&assetType=Article'),
+		(u'Your Home',u'http://www.thespec.com/rss?query=/living/home&assetType=Article'),
+		(u'Travel',u'http://www.thespec.com/rss?query=/living/travel&assetType=Article'),
+		(u'Family and Parenting',u'http://www.thespec.com/rss?query=/living/familyparenting&assetType=Article'),
+		(u'Style',u'http://www.thespec.com/rss?query=/living/style&assetType=Article')
+	]
+
--- a/recipes/heise_online.recipe
+++ b/recipes/heise_online.recipe
@ -1,11 +1,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-import re
-
 class AdvancedUserRecipe(BasicNewsRecipe):

-    title = 'heise online'
+    title = 'Heise-online'
    description = 'News vom Heise-Verlag'
    __author__ = 'schuster'
+    masthead_url = 'http://www.heise.de/icons/ho/heise_online_logo.gif'
+    publisher   = 'Heise Zeitschriften Verlag GmbH & Co. KG'
    use_embedded_content   = False
    language = 'de'
    oldest_article = 2
@ -14,11 +14,10 @@ class AdvancedUserRecipe(BasicNewsRecipe):
    remove_empty_feeds = True
    timeout = 5
    no_stylesheets = True
-    encoding = 'utf-8'


    remove_tags_after = dict(name ='p', attrs={'class':'editor'})
-    remove_tags = [{'class':'navi_top_container'},
+    remove_tags = [dict(id='navi_top_container'),
                            dict(id='navi_bottom'),
                            dict(id='mitte_rechts'),
                            dict(id='navigation'),
@ -29,27 +28,31 @@ class AdvancedUserRecipe(BasicNewsRecipe):
                            dict(id='seiten_navi'),
                            dict(id='adbottom'),
                            dict(id='sitemap'),
-                            dict(name='a', href=re.compile(r'^/([a-zA-Z]+/)?')),
-                ]
+                            dict(name='div', attrs={'id':'sitemap'}),
+                            dict(name='ul', attrs={'class':'erste_zeile'}),
+                            dict(name='ul', attrs={'class':'zweite_zeile'}),
+                            dict(name='div', attrs={'class':'navi_top_container'})]

    feeds =  [
                   ('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
-                   ('iX', 'http://www.heise.de/ix/news/news.rdf'),
-                      ('Technology Review', 'http://www.heise.de/tr/news-atom.xml'),
-                   ('mobil', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
-                   ('Security', 'http://www.heise.de/security/news/news-atom.xml'),
-                   ('Netze', 'http://www.heise.de/netze/rss/netze-atom.xml'),
-                   ('Open Source', 'http://www.heise.de/open/news/news-atom.xml'),
-                   ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
+                   ('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
                   ('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
-                   ('Autos', 'http://www.heise.de/autos/rss/news.rdf'),
-                   ('Mac & i', 'http://www.heise.de/mac-and-i/news.rdf'),
+                   ('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
+                   ('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
+                   ('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
+                   ('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
+                   ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
+                   ('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
+                   ('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
+                   ('iX', 'http://www.heise.de/ix/news/news.rdf'),
+                   ('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'),
                   ('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
                   ('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
                   ('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
-                   ('Blog: The World of IT', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
-                   ('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
-             ]
+                   ('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
+                   ('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')]

    def print_version(self, url):
        return url + '?view=print'
+
+
--- a/recipes/hindustan_times.recipe
+++ b/recipes/hindustan_times.recipe
@ -1,4 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+import urllib, re

 class HindustanTimes(BasicNewsRecipe):
    title          = u'Hindustan Times'
@ -26,4 +27,24 @@ class HindustanTimes(BasicNewsRecipe):
            'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
 ]

+    def get_article_url(self, article):
+        '''
+        HT uses a variant of the feedportal RSS ad display mechanism
+        '''
+        try:
+            s = article.summary
+            return urllib.unquote(
+                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
+        except:
+            pass
+        url = BasicNewsRecipe.get_article_url(self, article)
+        res = self.browser.open_novisit(url)
+        url = res.geturl().split('/')[-2]
+        encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
+                '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
+                'www.'}
+        for k, v in encoding.iteritems():
+            url = url.replace(k, v)
+        return url
+

--- a/recipes/histmag.recipe
+++ b/recipes/histmag.recipe
@ -4,56 +4,20 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, matek09, matek09@gmail.com'

 from calibre.web.feeds.news import BasicNewsRecipe
-import re

 class Histmag(BasicNewsRecipe):
+    title          = u'Histmag'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    cover_url='http://histmag.org/grafika/loga/histmag-logo-2-300px.png'
+    __author__ = 'matek09'
+    description = u"Artykuly historyczne i publicystyczne"
+    encoding = 'utf-8'
+    #preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),(re.compile(r'<span>'), lambda match: '<br><br><span>')]
+    no_stylesheets = True
+    language = 'pl'
+    remove_javascript = True
+    keep_only_tags=[dict(id='article')]
+    remove_tags=[dict(name = 'p', attrs = {'class' : 'article-tags'})]

-	title = u'Histmag'
-	__author__ = 'matek09'
-	description = u"Artykuly historyczne i publicystyczne"
-	encoding = 'utf-8'
-	no_stylesheets = True
-	language = 'pl'
-	remove_javascript = True
-	#max_articles_per_feed = 1
-	remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'article'}))
-	remove_tags_after = dict(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
-	#keep_only_tags =[]
-	#keep_only_tags.append(dict(name = 'h2'))
-	#keep_only_tags.append(dict(name = 'p'))
-
-	remove_tags =[]
-	remove_tags.append(dict(name = 'p', attrs = {'class' : 'podpis'}))
-	remove_tags.append(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
-	remove_tags.append(dict(name = 'img', attrs = {'src' : 'style/buttons/wesprzyjnas-1.jpg'}))
-
-	preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),
-						(re.compile(r'<span>'), lambda match: '<br><br><span>')]
-	extra_css = '''
-					.left {font-size: x-small}
-					.right {font-size: x-small}
-				'''
-
-	def find_articles(self, soup):
-		articles = []
-		for div in soup.findAll('div', attrs={'class' : 'text'}):
-			articles.append({
-				'title' : self.tag_to_string(div.h3.a),
-				'url'   : 'http://www.histmag.org/' + div.h3.a['href'],
-				'date'  : self.tag_to_string(div.next('p')).split('|')[0],
-				'description' : self.tag_to_string(div.next('p', podpis=False)),
-				})
-		return articles
-
-	def parse_index(self):
-		soup = self.index_to_soup('http://histmag.org/?arc=4&dx=0')
-		feeds = []
-		feeds.append((u"Artykuly historyczne", self.find_articles(soup)))
-		soup = self.index_to_soup('http://histmag.org/?arc=5&dx=0')
-		feeds.append((u"Artykuly publicystyczne", self.find_articles(soup)))
-		soup = self.index_to_soup('http://histmag.org/?arc=1&dx=0')
-		feeds.append((u"Wydarzenia", self.find_articles(soup)))
-
-		return feeds
-
-
+    feeds          = [(u'Wszystkie', u'http://histmag.org/rss/wszystkie.xml'), (u'Wydarzenia', u'http://histmag.org/rss/wydarzenia.xml'), (u'Recenzje', u'http://histmag.org/rss/recenzje.xml'), (u'Artykuły historyczne', u'http://histmag.org/rss/historia.xml'), (u'Publicystyka', u'http://histmag.org/rss/publicystyka.xml')]
--- a/recipes/historia_pl.recipe
+++ b/recipes/historia_pl.recipe
@ -8,6 +8,15 @@ class Historia_org_pl(BasicNewsRecipe):
    category       = 'history'
    language       = 'pl'
    oldest_article = 8
+    remove_empty_feeds=True
    max_articles_per_feed = 100

-    feeds          = [(u'Artykuły', u'http://www.historia.org.pl/index.php?format=feed&type=rss')]
+    feeds          = [(u'Wszystkie', u'http://www.historia.org.pl/index.php?format=feed&type=rss'),
+		(u'Wiadomości', u'http://www.historia.org.pl/index.php/wiadomosci.feed?type=rss'),
+		(u'Publikacje', u'http://www.historia.org.pl/index.php/publikacje.feed?type=rss'),
+		(u'Publicystyka', u'http://www.historia.org.pl/index.php/publicystyka.feed?type=rss'),
+		(u'Recenzje', u'http://historia.org.pl/index.php/recenzje.feed?type=rss'),
+		(u'Kultura i sztuka', u'http://www.historia.org.pl/index.php/kultura-i-sztuka.feed?type=rss'),
+		(u'Rekonstykcje', u'http://www.historia.org.pl/index.php/rekonstrukcje.feed?type=rss'),
+		(u'Projekty', u'http://www.historia.org.pl/index.php/projekty.feed?type=rss'),
+		(u'Konkursy'), (u'http://www.historia.org.pl/index.php/konkursy.feed?type=rss')]
--- a/recipes/hvg.recipe
+++ b/recipes/hvg.recipe
@ -1,44 +1,58 @@
-# -*- coding: utf-8 -*-
-import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
+################################################################################
+#Description:	  http://hvg.hu/ RSS channel
+#Author: 	  Bigpapa (bigpapabig@hotmail.com)
+#Date:	  2011.12.20. - V1.1
+################################################################################

-class HVG(BasicNewsRecipe):
-    title                 = 'HVG.HU'
-    __author__            = u'István Papp'
-    description           = u'Friss hírek a HVG-től'
-    timefmt               = ' [%Y. %b. %d., %a.]'
-    oldest_article        = 4
-    language              = 'hu'
+from calibre.web.feeds.news import BasicNewsRecipe

-    max_articles_per_feed = 100
-    no_stylesheets        = True
-    use_embedded_content  = False
-    encoding              = 'utf8'
-    publisher             = 'HVG Online'
-    category              = u'news, hírek, hvg'
-    extra_css             = 'body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
-    preprocess_regexps    = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
-    remove_tags_before    = dict(id='pg-content')
-    remove_javascript     = True
-    remove_empty_feeds    = True
+class hvg(BasicNewsRecipe):
+    title          = u'HVG'
+    __author__     = 'Bigpapa'
+    language = 'hu'
+    oldest_article = 5		# Hany napos legyen a legregebbi cikk amit leszedjen.
+    max_articles_per_feed = 5	# Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
+    no_stylesheets = True
+    encoding = 'utf8'
+    extra_css = ' h2 { font:bold 28px} '

-    feeds = [
-              (u'Itthon', u'http://hvg.hu/rss/itthon')
-             ,(u'Világ', u'http://hvg.hu/rss/vilag')
-             ,(u'Gazdaság', u'http://hvg.hu/rss/gazdasag')
-             ,(u'IT | Tudomány', u'http://hvg.hu/rss/tudomany')
-             ,(u'Panoráma', u'http://hvg.hu/rss/Panorama')
-             ,(u'Karrier', u'http://hvg.hu/rss/karrier')
-             ,(u'Gasztronómia', u'http://hvg.hu/rss/gasztronomia')
-             ,(u'Helyi érték', u'http://hvg.hu/rss/helyiertek')
-             ,(u'Kultúra', u'http://hvg.hu/rss/kultura')
-             ,(u'Cégautó', u'http://hvg.hu/rss/cegauto')
-             ,(u'Vállalkozó szellem', u'http://hvg.hu/rss/kkv')
-             ,(u'Egészség', u'http://hvg.hu/rss/egeszseg')
-             ,(u'Vélemény', u'http://hvg.hu/rss/velemeny')
-             ,(u'Sport', u'http://hvg.hu/rss/sport')
-            ]
+    remove_attributes = ['style','font', 'href']

-    def print_version(self, url):
-        return url.replace ('#rss', '/print')
+    keep_only_tags    = [
+		dict(name='div', attrs={'id':['pg-content']})
+	]

+    remove_tags = [ 
+	dict(name='div', attrs={'class':['box articlemenu', 'bannergoogle468', 'boxcontainer left', 'boxcontainer', 'commentbox']}),
+	dict(name='table', attrs={'class':['banner2', 'monocle']}),
+	dict(name='div', attrs={'id':['connect_widget_4cf63ca849ddf4577922632', 'sharetip', 'upprev_box']}),
+	dict(name='div', attrs={'style':['float: right; margin-bottom: 5px;', 'display: none;']}),
+	dict(name='h3', attrs={'class':['hthree']}),
+	dict(name='ul', attrs={'class':['defaultul']}),
+	dict(name='form', attrs={'id':['commentForm']}),
+	dict(name='h6', attrs={'class':['hthree']}),
+	dict(name='h6', attrs={'class':['more2']}),
+	dict(name='img', attrs={'class':['framed']}),
+	dict(name='td', attrs={'class':['greyboxbody','embedvideobody','embedvideofooter','embedvideobottom']}),
+
+
+
+	]
+
+    feeds          = [
+#	(u'\xd6sszes', 'http://hvg.hu/rss'),
+	(u'Itthon', 'http://hvg.hu/rss/itthon'),
+	(u'Vil\xe1g', 'http://hvg.hu/rss/vilag'),
+	(u'Gazdas\xe1g', 'http://hvg.hu/rss/gazdasag'),
+	(u'Tudom\xe1ny', 'http://hvg.hu/rss/tudomany'),
+	(u'Panor\xe1ma', 'http://hvg.hu/rss/panorama'),
+	(u'Karrier', 'http://hvg.hu/rss/karrier'),
+	(u'Gasztron\xf3mia', 'http://hvg.hu/rss/gasztronomia'),
+	(u'Helyi \xe9rt\xe9k', 'http://hvg.hu/rss/helyiertek'),
+	(u'Kult\xfara', 'http://hvg.hu/rss/kultura'),
+	(u'C\xe9gaut\xf3', 'http://hvg.hu/rss/cegauto'),
+	(u'V\xe1llalkoz\xf3 szellem', 'http://hvg.hu/rss/kkv'),
+	(u'Eg\xe9szs\xe9g', 'http://hvg.hu/rss/egeszseg'),
+	(u'V\xe9lem\xe9ny', 'http://hvg.hu/rss/velemeny'),
+	(u'Sport', 'http://hvg.hu/rss/sport')
+]
--- a/recipes/icons/biolog_pl.png
+++ b/recipes/icons/biolog_pl.png
--- a/recipes/icons/blues.png
+++ b/recipes/icons/blues.png
--- a/recipes/icons/computerworld_pl.png
+++ b/recipes/icons/computerworld_pl.png
--- a/recipes/icons/descopera_org.png
+++ b/recipes/icons/descopera_org.png
--- a/recipes/icons/dziennik_pl.png
+++ b/recipes/icons/dziennik_pl.png
--- a/recipes/icons/formulaas.png
+++ b/recipes/icons/formulaas.png
--- a/recipes/icons/infra_pl.png
+++ b/recipes/icons/infra_pl.png
--- a/recipes/icons/kosmonauta_pl.png
+++ b/recipes/icons/kosmonauta_pl.png
--- a/recipes/icons/mlody_technik_pl.png
+++ b/recipes/icons/mlody_technik_pl.png
--- a/recipes/icons/moneynews.png
+++ b/recipes/icons/moneynews.png
--- a/recipes/icons/novilist_novine_hr.png
+++ b/recipes/icons/novilist_novine_hr.png
--- a/recipes/icons/novilist_portal_hr.png
+++ b/recipes/icons/novilist_portal_hr.png
--- a/recipes/icons/rionegro.png
+++ b/recipes/icons/rionegro.png
--- a/recipes/icons/skylife.png
+++ b/recipes/icons/skylife.png
--- a/recipes/icons/zaman.png
+++ b/recipes/icons/zaman.png
--- a/recipes/ideal_almeria.recipe
+++ b/recipes/ideal_almeria.recipe
@ -0,0 +1,68 @@
+# encoding: utf-8 -*-
+
+__license__     = 'GPL v3'
+__author__      = 'Josemi Liébana <office at josemi-liebana.com>'
+__copyright__   = 'Josemi Liébana'
+__version__     = 'v0.1'
+__date__        = '5 January 2012'
+
+
+'''
+www.ideal.es
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Ideal(BasicNewsRecipe):
+    title                 = u'Ideal (Edición Almería)'
+    __author__            = u'Josemi Liébana'
+    description           = u'Noticias de Almería y el resto del mundo'
+    publisher             = 'Ideal'
+    category              = u'News, Politics, Spain, Almería'
+    publication_type      = 'Newspaper'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'es'
+    remove_empty_feeds    = True
+    masthead_url          = u'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
+    cover_url             = u'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
+    extra_css             = u' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [
+                         dict(attrs={'id':'title'})
+                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
+                     ]
+
+    remove_tags = [dict(name='ul')]
+
+    remove_attributes = ['width','height']
+
+    feeds = [
+              (u'Última Hora'       , u'http://www.ideal.es/almeria/rss/feeds/ultima.xml'           )
+             ,(u'Portada'           , u'http://www.ideal.es/almeria/portada.xml'                    )
+             ,(u'Local'             , u'http://www.ideal.es/almeria/rss/feeds/granada.xml'          )
+             ,(u'Deportes'          , u'http://www.ideal.es/almeria/rss/feeds/deportes.xml'         )
+             ,(u'Sociedad'          , u'http://www.ideal.es/almeria/rss/feeds/sociedad.xml'         )
+             ,(u'Cultura'           , u'http://www.ideal.es/almeria/rss/feeds/cultura.xml'          )
+             ,(u'Economía'          , u'http://www.ideal.es/almeria/rss/feeds/economia.xml'         )
+             ,(u'Costa'             , u'http://www.ideal.es/almeria/rss/feeds/costa.xml'            )
+             ,(u'Puerta Purchena'   , u'http://www.ideal.es/almeria/rss/feeds/puerta_purchena.xml'  )
+             ,(u'Andalucía'         , u'http://www.ideal.es/almeria/rss/feeds/andalucia.xml'        )
+             ,(u'España'            , u'http://www.ideal.es/almeria/rss/feeds/espana.xml'           )
+             ,(u'Mundo'             , u'http://www.ideal.es/almeria/rss/feeds/internacional.xml'    )
+             ,(u'Vivir'             , u'http://www.ideal.es/almeria/rss/feeds/vivir.xml'            )
+             ,(u'Opinión'           , u'http://www.ideal.es/almeria/rss/feeds/opinion.xml'          )
+             ,(u'Televisión'        , u'http://www.ideal.es/almeria/rss/feeds/television.xml'       )
+             ,(u'Contraportada'     , u'http://www.ideal.es/almeria/rss/feeds/contraportada.xml'    )
+            ]
+
--- a/recipes/ideal_granada.recipe
+++ b/recipes/ideal_granada.recipe
@ -0,0 +1,69 @@
+# encoding: utf-8 -*-
+
+__license__     = 'GPL v3'
+__author__      = 'Josemi Liébana <office at josemi-liebana.com>'
+__copyright__   = 'Josemi Liébana'
+__version__     = 'v0.1'
+__date__        = '5 January 2012'
+
+
+'''
+www.ideal.es
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Ideal(BasicNewsRecipe):
+    title                 = u'Ideal (Edición Granada)'
+    __author__            = u'Josemi Liébana'
+    description           = u'Noticias de Granada y el resto del mundo'
+    publisher             = 'Ideal'
+    category              = 'News, Politics, Spain, Granada'
+    publication_type      = 'Newspaper'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'es'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
+    cover_url             = 'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [
+                         dict(attrs={'id':'title'})
+                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
+                     ]
+
+    remove_tags = [dict(name='ul')]
+
+    remove_attributes = ['width','height']
+
+    feeds = [
+              (u'Última Hora'       , u'http://www.ideal.es/granada/rss/feeds/ultima.xml'           )
+             ,(u'Portada'           , u'http://www.ideal.es/granada/portada.xml'                    )
+             ,(u'Local'             , u'http://www.ideal.es/granada/rss/feeds/granada.xml'          )
+             ,(u'Deportes'          , u'http://www.ideal.es/granada/rss/feeds/deportes.xml'         )
+             ,(u'Sociedad'          , u'http://www.ideal.es/granada/rss/feeds/sociedad.xml'         )
+             ,(u'Cultura'           , u'http://www.ideal.es/granada/rss/feeds/cultura.xml'          )
+             ,(u'Economía'          , u'http://www.ideal.es/granada/rss/feeds/economia.xml'         )
+             ,(u'Costa'             , u'http://www.ideal.es/granada/rss/feeds/costa.xml'            )
+             ,(u'La Carrera'        , u'http://www.ideal.es/granada/rss/feeds/la_carrera.xml'       )
+             ,(u'Puerta Real'       , u'http://www.ideal.es/granada/rss/feeds/puerta_real.xml'      )
+             ,(u'Andalucía'         , u'http://www.ideal.es/granada/rss/feeds/andalucia.xml'        )
+             ,(u'España'            , u'http://www.ideal.es/granada/rss/feeds/espana.xml'           )
+             ,(u'Mundo'             , u'http://www.ideal.es/granada/rss/feeds/internacional.xml'    )
+             ,(u'Vivir'             , u'http://www.ideal.es/granada/rss/feeds/vivir.xml'            )
+             ,(u'Opinión'           , u'http://www.ideal.es/granada/rss/feeds/opinion.xml'          )
+             ,(u'Televisión'        , u'http://www.ideal.es/granada/rss/feeds/television.xml'       )
+             ,(u'Contraportada'     , u'http://www.ideal.es/granada/rss/feeds/contraportada.xml'    )
+            ]
+
--- a/recipes/ideal_jaen.recipe
+++ b/recipes/ideal_jaen.recipe
@ -0,0 +1,67 @@
+# encoding: utf-8 -*-
+
+__license__     = 'GPL v3'
+__author__      = 'Josemi Liébana <office at josemi-liebana.com>'
+__copyright__   = 'Josemi Liébana'
+__version__     = 'v0.1'
+__date__        = '5 January 2012'
+
+
+'''
+www.ideal.es
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Ideal(BasicNewsRecipe):
+    title                 = u'Ideal (Edición Jaén)'
+    __author__            = u'Josemi Liébana'
+    description           = u'Noticias de Jaén y el resto del mundo'
+    publisher             = 'Ideal'
+    category              = u'News, Politics, Spain, Jaén'
+    publication_type      = 'Newspaper'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'es'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
+    cover_url             = 'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [
+                         dict(attrs={'id':'title'})
+                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
+                     ]
+
+    remove_tags = [dict(name='ul')]
+
+    remove_attributes = ['width','height']
+
+    feeds = [
+              (u'Última Hora'       , u'http://www.ideal.es/jaen/rss/feeds/ultima.xml'          )
+             ,(u'Portada'           , u'http://www.ideal.es/jaen/portada.xml'                   )
+             ,(u'Local'             , u'http://www.ideal.es/jaen/rss/feeds/granada.xml'         )
+             ,(u'Deportes'          , u'http://www.ideal.es/jaen/rss/feeds/deportes.xml'        )
+             ,(u'Sociedad'          , u'http://www.ideal.es/jaen/rss/feeds/sociedad.xml'        )
+             ,(u'Cultura'           , u'http://www.ideal.es/jaen/rss/feeds/cultura.xml'         )
+             ,(u'Economía'          , u'http://www.ideal.es/jaen/rss/feeds/economia.xml'        )
+             ,(u'Costa'             , u'http://www.ideal.es/jaen/rss/feeds/costa.xml'           )
+             ,(u'Andalucía'         , u'http://www.ideal.es/jaen/rss/feeds/andalucia.xml'       )
+             ,(u'España'            , u'http://www.ideal.es/jaen/rss/feeds/espana.xml'          )
+             ,(u'Mundo'             , u'http://www.ideal.es/jaen/rss/feeds/internacional.xml'   )
+             ,(u'Vivir'             , u'http://www.ideal.es/jaen/rss/feeds/vivir.xml'           )
+             ,(u'Opinión'           , u'http://www.ideal.es/jaen/rss/feeds/opinion.xml'         )
+             ,(u'Televisión'        , u'http://www.ideal.es/jaen/rss/feeds/television.xml'      )
+             ,(u'Contraportada'     , u'http://www.ideal.es/jaen/rss/feeds/contraportada.xml'   )
+            ]
+
--- a/recipes/iht.recipe
+++ b/recipes/iht.recipe
@ -1,63 +1,30 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Derry FitzGerald'
-'''
-iht.com
-'''
-import re
-
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ptempfile import PersistentTemporaryFile

+class NYTimesGlobal(BasicNewsRecipe):
+    title          = u'NY Times Global'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    use_embedded_content = False

-class InternationalHeraldTribune(BasicNewsRecipe):
-    title          = u'The International Herald Tribune'
-    __author__     = 'Derry FitzGerald'
-    language = 'en'
-
-    oldest_article = 1
-    max_articles_per_feed = 30
    no_stylesheets = True
+    auto_cleanup = True

-    remove_tags    = [dict(name='div', attrs={'class':['footer','header']}),
-                      dict(name=['form'])]
-    preprocess_regexps = [
-            (re.compile(r'<!-- webtrends.*', re.DOTALL),
-             lambda m:'</body></html>')
-                          ]
-    extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'

-    remove_empty_feeds = True
-    
    feeds          = [
-                      (u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'),
-                      (u'Business', u'http://www.iht.com/rss/business.xml'),
-                      (u'Americas', u'http://www.iht.com/rss/america.xml'),
-                      (u'Europe', u'http://www.iht.com/rss/europe.xml'),
-                      (u'Asia', u'http://www.iht.com/rss/asia.xml'),
-                      (u'Africa and Middle East', u'http://www.iht.com/rss/africa.xml'),
-                      (u'Opinion', u'http://www.iht.com/rss/opinion.xml'),
-                      (u'Technology', u'http://www.iht.com/rss/technology.xml'),
-                      (u'Health and Science', u'http://www.iht.com/rss/healthscience.xml'),
-                      (u'Sports', u'http://www.iht.com/rss/sports.xml'),
-                      (u'Culture', u'http://www.iht.com/rss/arts.xml'),
-                      (u'Style and Design', u'http://www.iht.com/rss/style.xml'),
-                      (u'Travel', u'http://www.iht.com/rss/travel.xml'),
-                      (u'At Home Abroad', u'http://www.iht.com/rss/athome.xml'),
-                      (u'Your Money', u'http://www.iht.com/rss/yourmoney.xml'),
-                      (u'Properties', u'http://www.iht.com/rss/properties.xml')
-                    ]
-    temp_files = []
-    articles_are_obfuscated = True
-    
-    masthead_url = 'http://graphics8.nytimes.com/images/misc/iht-masthead-logo.gif'
-    
-    def get_obfuscated_article(self, url):
-        br = self.get_browser()
-        br.open(url)
-        response1 = br.follow_link(url_regex=re.compile(r'.*pagewanted=print.*'))
-        html = response1.read()
-        
-        self.temp_files.append(PersistentTemporaryFile('_iht.html'))
-        self.temp_files[-1].write(html)
-        self.temp_files[-1].close()
-        return self.temp_files[-1].name
+('NYTimes',
+ 'http://www.nytimes.com/services/xml/rss/nyt/HomePage.xml'),
+('NYTimes global',
+ 'http://www.nytimes.com/services/xml/rss/nyt/GlobalHome.xml'),
+('World',
+ 'http://www.nytimes.com/services/xml/rss/nyt/World.xml'),
+('U.S.',
+ 'http://www.nytimes.com/services/xml/rss/nyt/US.xml'),
+('Business',
+ 'http://feeds.nytimes.com/nyt/rss/Business'),
+('Sports',
+ 'http://www.nytimes.com/services/xml/rss/nyt/Sports.xml'),
+('Technology',
+ 'http://feeds.nytimes.com/nyt/rss/Technology'),
+]
--- a/recipes/iktibas.recipe
+++ b/recipes/iktibas.recipe
@ -0,0 +1,12 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BasicUserRecipe1324739406(BasicNewsRecipe):
+    title          = u'\u0130ktibas'
+    language = 'tr'
+    __author__ = 'asalet_r'
+
+    oldest_article = 7
+    max_articles_per_feed = 20
+    auto_cleanup = True
+
+    feeds          = [(u'\u0130ktibas', u'http://www.iktibasdergisi.com/rss/rss.xml')]
--- a/recipes/independent.recipe
+++ b/recipes/independent.recipe
@ -1,27 +1,26 @@
 # adapted from old recipe by Darko Miletic <darko.miletic at gmail.com>

-import string, re
-from calibre import strftime
+import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
+from calibre.ebooks.BeautifulSoup import Tag, NavigableString


 class TheIndependentNew(BasicNewsRecipe):
-    
+
    # flag to enable/disable article graphics on business pages/some others
    # eg http://www.independent.co.uk/news/world/europe/berlusconi-departure-fails-to-calm-the-markets-6259682.html
    # -max dimensions can be altered using the .pictureContainer img selector in the css
    _FETCH_ARTICLE_GRAPHICS = True
-    
+
    #Flag to enable/disable image fetching (not business)
    _FETCH_IMAGES = True
-    
-    
+
+
     #used for converting rating to stars
    _STAR_URL = 'http://www.independent.co.uk/skins/ind/images/rating_star.png'
    _NO_STAR_URL = 'http://www.independent.co.uk/skins/ind/images/rating_star_grey.png'
-   
-   
+
+
    title                   = u'The Independent'
    __author__              = 'Will'
    description             = 'The latest in UK News and World News from The \
@ -40,28 +39,30 @@ class TheIndependentNew(BasicNewsRecipe):
    encoding                = 'utf-8'
    remove_tags             =[
                               dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
-                               dict(attrs={'class' : ['autoplay','openBiogPopup']})
+                               dict(attrs={'class' : ['autoplay','openBiogPopup']}),
+                               dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}),
+                               dict(attrs={'style' : re.compile('.*')}),
                             ]
-   
+
    keep_only_tags          =[dict(attrs={'id':'main'})]
    recursions = 0
-    
+
    # fixes non compliant html nesting and 'marks' article graphics links
    preprocess_regexps      = [
                                (re.compile('<span class="storyTop ">(?P<nested>.*?)</span>', re.DOTALL),
                                lambda match: '<div class="storyTop">' + match.group('nested') + '</div>'),
                                (re.compile('(<strong>.*?[Cc]lick.*?<a.*?((HERE)|([Hh]ere)).*?</strong>)', re.DOTALL),
                                lambda match: '<div class="article-graphic">' + match.group(0) + '</div>'),
-                              ] 
-    
-    
+                              ]
+
+
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
-                        }  
-                        
+                        }
+
    extra_css             = """
                               h1{font-family: Georgia,serif }
                               body{font-family: Verdana,Arial,Helvetica,sans-serif}
@ -81,124 +82,133 @@ class TheIndependentNew(BasicNewsRecipe):
                               .articleContent {display: block; clear:left;}
                               .storyTop{}
                               .pictureContainer img { max-width: 400px; max-height: 400px;}
-                            """                     
-    
+                            """
+
    oldest_article = 1
    max_articles_per_feed = 100
-    
+
    _processed_urls = []
-    
-    
+
+
    def get_article_url(self, article):
        url = super(self.__class__,self).get_article_url(article)
-        
+
        title = article.get('title', None)
        if title and re.search("^Video:",title):
            return None
-        
-        #remove duplicates 
+
+        #remove duplicates
        if not (url in self._processed_urls):
            self._processed_urls.append(url)
        else:
            url = None
        return url

+    def populate_article_metadata(self, article, soup, first):
+        if first and hasattr(self, 'add_toc_thumbnail'):
+            picdiv = soup.find('img')
+            if picdiv is not None:
+                self.add_toc_thumbnail(article,picdiv['src'])
+
    def preprocess_html(self, soup):
-        
+
        #remove 'advertorial articles'
        strapline = soup.find('div',attrs={'class' : re.compile('.*strapLine.*')})
        if strapline:
            for para in strapline.findAll('p'):
                if len(para.contents) and isinstance(para.contents[0],NavigableString) \
                and para.contents[0] == 'ADVERTORIAL FEATURE':
-                    return None                          
-        
+                    return None
+
        items_to_extract = []
-        
+        slideshow_elements = []
+
        for item in soup.findAll(attrs={'class' : re.compile("widget.*")}):
            remove = True
            pattern = re.compile('((articleContent)|(title))$')
            if (pattern.search(item['class'])) is not None:
                remove = False
-            
+
            # corrections
            # story content always good
-            pattern = re.compile('storyContent')    
+            pattern = re.compile('storyContent')
            if (pattern.search(item['class'])) is not None:
                remove = False
-            
+
            #images
-            pattern = re.compile('slideshow')    
+            pattern = re.compile('slideshow')
            if (pattern.search(item['class'])) is not None:
                if self._FETCH_IMAGES:
                    remove = False
+                    slideshow_elements.append(item)
                else:
                    remove = True
-            
+
            #social widgets always bad
-            pattern = re.compile('socialwidget')    
+            pattern = re.compile('socialwidget')
            if (pattern.search(item['class'])) is not None:
                remove = True
-            
+
            if remove:
                items_to_extract.append(item)
-                
+
        for item in items_to_extract:
-            item.extract()  
-            
-        items_to_extract = []                
-        
-        if self._FETCH_IMAGES:        
-            for item in soup.findAll('a',attrs={'href' : re.compile('.*')}):
-                if item.img is not None:
-                    #use full size image
-                    img = item.findNext('img')
-        
-                    img['src'] = item['href']
-        
-                    #insert caption if available
-                    if img['title'] is not None and (len(img['title']) > 1):
-                        tag = Tag(soup,'h3')
-                        text = NavigableString(img['title'])
-                        tag.insert(0,text)
-                    
-                        #picture before text
-                        img.extract()
-                        item.insert(0,img)
-                        item.insert(1,tag)
-        
-                    # remove link
-                    item.name = "div"
-                    item["class"]='image'
-                    del item["href"]
-                
-                
+            item.extract()
+
+        items_to_extract = []
+
+        if self._FETCH_IMAGES:
+            for element in slideshow_elements:
+                for item in element.findAll('a',attrs={'href' : re.compile('.*')}):
+                    if item.img is not None:
+                        #use full size image
+                        img = item.findNext('img')
+
+                        img['src'] = item['href']
+
+                        #insert caption if available
+                        if img.get('title') and (len(img['title']) > 1):
+                            tag = Tag(soup,'h3')
+                            text = NavigableString(img['title'])
+                            tag.insert(0,text)
+
+                            #picture before text
+                            img.extract()
+                            item.insert(0,img)
+                            item.insert(1,tag)
+
+                        # remove link
+                        item.name = "div"
+                        item["class"]='image'
+                        del item["href"]
+
+
        #remove empty subtitles
        """
        currently the subtitle is located in first paragraph after
        sibling <h3 class="subtitle"> tag. This may be 'fixed' at
-        some point. 
-        """       
+        some point.
+        """
        subtitle = soup.find('h3',attrs={'class' : 'subtitle'})
        if subtitle is not None:
            subtitleText = subtitle.findNext('p')
            if subtitleText is not None:
-                if len(subtitleText.contents[0]) <= 1 : 
+                if len(subtitleText.contents[0]) <= 1 :
                    subtitleText.extract()
                    subtitle.extract()
-                       
-        
+
+
        #replace rating numbers with stars
        for item in soup.findAll('div',attrs={ 'class' : 'starRating'}):
            if item is not None:
                soup2 = self._insertRatingStars(soup,item)
            if soup2 is not None:
                soup = soup2
-               
-       
+
+
        #remove empty paragraph tags in storyTop which can leave a space
        #between first paragraph and rest of story
-        nested_content = False       
+        nested_content = False
        storyTop = soup.find('div',attrs={ 'class' : ['storyTop']})
        for item in storyTop.findAll('p'):
            for nested in item:
@ -207,19 +217,19 @@ class TheIndependentNew(BasicNewsRecipe):
                    break
            if not nested_content and item.contents is not None and len(item.contents[0]) <= 1 :
                items_to_extract.append(item)
-                
+
        for item in items_to_extract:
-            item.extract()  
-            
-        items_to_extract = [] 
-        
-        
+            item.extract()
+
+        items_to_extract = []
+
+
        #remove line breaks immediately next to tags with default margins
        #to prevent double line spacing and narrow columns of text
        storyTop = soup.find('div',attrs={ 'class' : ['storyTop']})
-        self._remove_undesired_line_breaks_from_tag(storyTop,soup)          
-        
-                                          
+        self._remove_undesired_line_breaks_from_tag(storyTop,soup)
+
+
        #replace article graphics link with the graphics themselves
        if self._FETCH_ARTICLE_GRAPHICS:
            items_to_insert = []
@ -231,20 +241,20 @@ class TheIndependentNew(BasicNewsRecipe):
                    if isinstance(child,Tag):
                        if str(child.name) == 'a':
                            items_to_insert.extend(self._get_article_graphic(strong,child['href'],soup))
-                
+
            for item in items_to_insert:
-                item[0].replaceWith(item[1])               
-        
+                item[0].replaceWith(item[1])
+
        for item in items_to_extract:
-            item.extract()      
-        
+            item.extract()
+
        return soup
-        
-        
+
+
    def _get_article_graphic(self,old_item,url,soup):
-        
+
        items_to_insert = []
-        
+
        if re.search('\.jpg$',str(url)):
            div = Tag(soup,'div')
            div['class'] = 'pictureContainer'
@ -254,20 +264,23 @@ class TheIndependentNew(BasicNewsRecipe):
            div.insert(0,img)
            items_to_insert.append((old_item,div,))
            return items_to_insert
-            
+
        soup2 = self.index_to_soup(url)
        for item in soup2.findAll('div',attrs={'class' : re.compile("widget picture article.*")}):
            items_to_insert.append((old_item,item),)
        return items_to_insert
-    
-    
+
+
    def _insertRatingStars(self,soup,item):
-        if item.contents is None:
+        if item.contents is None or len(item.contents) < 1:
            return
        rating = item.contents[0]
-        if not rating.isdigit():
-            return None
-        rating = int(item.contents[0])    
+
+        try:
+            rating = float(item.contents[0])
+        except:
+            print 'Could not convert decimal rating to star: malformatted float.'
+            return
        for i in range(1,6):
            star = Tag(soup,'img')
            if i <= rating:
@ -277,26 +290,26 @@ class TheIndependentNew(BasicNewsRecipe):
            star['alt'] = 'star number ' +  str(i)
            item.insert(i,star)
        #item.contents[0] = NavigableString('(' + str(rating) + ')')
-        item.contents[0] = '' 
-        
+        item.contents[0] = ''
+
    def postprocess_html(self,soup, first_fetch):
        #find broken images and remove captions
        items_to_extract = []
        for item in soup.findAll('div', attrs={'class' : 'image'}):
            img = item.findNext('img')
-            if img is not None and img['src'] is not None:
-                # broken images still point to remote url       
-                pattern = re.compile('http://www.independent.co.uk.*')    
+            if img and img.get('src'):
+                # broken images still point to remote url
+                pattern = re.compile('http://www.independent.co.uk.*')
                if pattern.match(img["src"]) is not None:
                    caption = img.findNextSibling('h3')
                    if caption is not None:
                        items_to_extract.append(caption)
                    items_to_extract.append(img)
-        
+
        for item in items_to_extract:
-            item.extract()              
+            item.extract()
        return soup
-        
+
    def _recurisvely_linearise_tag_tree(
        self,
        item,
@ -311,25 +324,25 @@ class TheIndependentNew(BasicNewsRecipe):
        if not (isinstance(item,Tag)):
            return linearised
        for nested in item:
-            linearised.append(nested)  
+            linearised.append(nested)
            linearised = self._recurisvely_linearise_tag_tree(nested,linearised, count)
        return linearised
-    
-    
+
+
    def _get_previous_tag(self,current_index, tag_tree):
        if current_index == 0:
            return None
        else:
            return tag_tree[current_index - 1]
-    
-    
+
+
    def _get_next_tag(self,current_index, tag_tree):
        if current_index < len(tag_tree) - 1:
            return tag_tree[current_index + 1]
        else:
            return None
-    
-    
+
+
    def _list_match(self,test_str, list_regex):
        for regex in list_regex:
            match = re.match(regex, test_str)
@ -338,24 +351,24 @@ class TheIndependentNew(BasicNewsRecipe):
        return False

    def _remove_undesired_line_breaks_from_tag(self,parent,soup):
-        
+
        if parent is None:
            return
-        
-         
+
+
        tag_tree = self._recurisvely_linearise_tag_tree(parent)
        items_to_remove = []
-       
-        
+
+
        for item in tag_tree:
            if item == u'\n':
               items_to_remove.append(item)
               continue;
-               
+
        for item in items_to_remove:
            tag_tree.remove(item)
-            
-        
+
+
        spaced_tags = [r'p', r'h\d', r'blockquote']
        tags_to_extract = []
        tags_to_replace = []
@ -363,41 +376,41 @@ class TheIndependentNew(BasicNewsRecipe):
            if isinstance(tag, Tag):
                if str(tag) == '<br />':
                    previous_tag = self._get_previous_tag(i, tag_tree)
-        
+
                    if isinstance(previous_tag, Tag):
                        previous_tag_is_spaced = previous_tag is not None\
                             and self._list_match(str(previous_tag.name),
                                spaced_tags)
                    else:
                        previous_tag_is_spaced = False
-        
+
                    next_tag = self._get_next_tag(i, tag_tree)
-        
+
                    if isinstance(next_tag, Tag):
                        next_tag_is_spaced = next_tag is not None\
                             and self._list_match(str(next_tag.name), spaced_tags)
                    else:
                        next_tag_is_spaced = False
-        
+
                    if previous_tag_is_spaced or next_tag_is_spaced or i == 0\
                         or i == len(tag_tree) - 1:
                        tags_to_extract.append(tag)
                    else:
                        tags_to_replace.append((tag,NavigableString(' '),))
-                       
-        
+
+
        for pair in tags_to_replace:
-            pair[0].replaceWith(pair[1])       
+            pair[0].replaceWith(pair[1])
        for tag in tags_to_extract:
            tag.extract()
-                                                                            
+
    feeds = [
        (u'News - UK',
         u'http://www.independent.co.uk/news/uk/?service=rss'),
        (u'News - World',
         u'http://www.independent.co.uk/news/world/?service=rss'),
        (u'News - Business',
-         u'http://www.independent.co.uk/news/business/?service=rss'), 
+         u'http://www.independent.co.uk/news/business/?service=rss'),
        (u'News - People',
         u'http://www.independent.co.uk/news/people/?service=rss'),
        (u'News - Science',
@ -497,4 +510,4 @@ class TheIndependentNew(BasicNewsRecipe):
        (u'IndyBest',
         u'http://www.independent.co.uk/extras/indybest/?service=rss'),
        ]
-        
+
--- a/recipes/indy_star.recipe
+++ b/recipes/indy_star.recipe
@ -1,16 +1,20 @@
-from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds.recipes import BasicNewsRecipe

-class AdvancedUserRecipe1234144423(BasicNewsRecipe):
-    title          = u'Indianapolis Star'
-    oldest_article = 5
-    language = 'en'
+class IndianapolisStar(BasicNewsRecipe):
+	title                 = u'Indianapolis Star'
+	oldest_article        = 10
+	auto_cleanup          = True
+	language              = 'en'
+	__author__            = 'Owen Kelly'
+	max_articles_per_feed = 100
+	cover_url = u'http://www2.indystar.com/frontpage/images/today.jpg'
+	feeds = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss'),
+		(u'News Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss'),
+		(u'Business Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss'),
+		(u'Politics and Government', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS05&template=rss'),
+		(u'Lifestyle Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'),
+		(u'Opinion Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML')
+		]

-    __author__     = 'Owen Kelly'
-    max_articles_per_feed = 100
-
-    cover_url  = u'http://www2.indystar.com/frontpage/images/today.jpg'
-    
-    feeds          = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss&mime=XML'), (u'News Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss&mime=XML'), (u'Business Headlines', u'http://www..indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss&mime=XML'), (u'Sports Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=SPORTS&template=rss&mime=XML'), (u'Lifestyle Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'), (u'Opinion Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML')]
-
-    def print_version(self, url):
-        return url + '&template=printart'
+	def print_version(self, url):
+		return url + '&template=printart'
--- a/recipes/infra_pl.recipe
+++ b/recipes/infra_pl.recipe
@ -0,0 +1,17 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class INFRA(BasicNewsRecipe):
+    title          = u'INFRA'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
+    cover_url      = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
+    category       = 'UFO'
+    language       = 'pl'
+    max_articles_per_feed = 100
+    no_stylesheers=True
+    remove_tags_before=dict(name='h2', attrs={'class':'contentheading'})
+    remove_tags_after=dict(attrs={'class':'pagenav'})
+    remove_tags=[dict(attrs={'class':'pagenav'})]
+    feeds          = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')]
--- a/recipes/izdiham.com.recipe
+++ b/recipes/izdiham.com.recipe
@ -0,0 +1,12 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BasicUserRecipe1324158549(BasicNewsRecipe):
+    title          = u'izdiham.com'
+    language = 'tr'
+    __author__ = 'asalet_r'
+
+    oldest_article = 7
+    max_articles_per_feed = 20
+    auto_cleanup = True
+
+    feeds          = [(u'\u0130zdiham', u'http://www.izdiham.com/index.php/feed')]
--- a/recipes/japan_news.recipe
+++ b/recipes/japan_news.recipe
@ -0,0 +1,18 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NewsOnJapan(BasicNewsRecipe):
+    title          = u'News On Japan'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    use_embedded_content = False
+
+    no_stylesheets = True
+    auto_cleanup = True
+
+
+    feeds          = [
+('News',
+ 'http://newsonjapan.com/rss/top.xml'),
+]
--- a/recipes/klip_me.recipe
+++ b/recipes/klip_me.recipe
@ -0,0 +1,72 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1299694372(BasicNewsRecipe):
+    title                             = u'Klipme'
+    __author__                  = 'Ken Sun'
+    publisher                     = 'Klip.me'
+    category                      = 'info, custom, Klip.me'
+    oldest_article               = 365
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    remove_javascript     = True
+    remove_tags              = [
+    dict(name='div', attrs={'id':'text_controls_toggle'})
+    ,dict(name='script')
+    ,dict(name='div', attrs={'id':'text_controls'})
+    ,dict(name='div', attrs={'id':'editing_controls'})
+    ,dict(name='div', attrs={'class':'bar bottom'})
+     ]
+    use_embedded_content  = False
+    needs_subscription    = True
+    INDEX                 = u'http://www.klip.me'
+    LOGIN                 = INDEX + u'/fav/signin?callback=/fav'
+
+
+    feeds          = [
+            (u'Klip.me unread', u'http://www.klip.me/fav'),
+            (u'Klip.me started', u'http://www.klip.me/fav?s=starred')
+            ]
+
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None:
+            br.open(self.LOGIN)
+            br.select_form(nr=0)
+            br['Email'] = self.username
+            if self.password is not None:
+               br['Passwd'] = self.password
+            br.submit()
+        return br
+
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            for item in soup.findAll('table',attrs={'class':['item','item new']}):
+                atag = item.a
+                if atag and atag.has_key('href'):
+                    url         = atag['href']
+                    articles.append({
+                                     'url'        :url
+                                    })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds
+
+    def print_version(self, url):
+        return 'http://www.klip.me' + url
+
+    def populate_article_metadata(self, article, soup, first):
+        article.title  = soup.find('title').contents[0].strip()
+
+    def postprocess_html(self, soup, first_fetch):
+        for link_tag in soup.findAll(attrs={"id" : "story"}):
+            link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>')
+            print link_tag
+
+        return soup
+
--- a/recipes/kopalniawiedzy.recipe
+++ b/recipes/kopalniawiedzy.recipe
@ -1,79 +1,79 @@
 __license__   = 'GPL v3'
-__copyright__ = '2011, Attis <attis@attis.one.pl>'
+__copyright__ = '2011 Attis <attis@attis.one.pl>, 2012 Tomasz Długosz <tomek3d@gmail.com>'
 __version__ = 'v. 0.1'

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class KopalniaWiedzy(BasicNewsRecipe):
-		title          = u'Kopalnia Wiedzy'
-		publisher      = u'Kopalnia Wiedzy'
-		description    = u'Ciekawostki ze świata nauki i techniki'
-		encoding       = 'utf-8'
-		__author__     = 'Attis'
-		language       = 'pl'
-		oldest_article = 7
-		max_articles_per_feed = 100
-		INDEX          = u'http://kopalniawiedzy.pl/'
-		remove_javascript     = True    
-		no_stylesheets        = True
-		
-		remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'} }, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}]
-		remove_tags_after = dict(attrs={'class':'ad-square'})
-		keep_only_tags    = [dict(name="div", attrs={'id':'articleContent'})]
-		extra_css      = '.topimage {margin-top: 30px}'
-		
-		preprocess_regexps = [
-				(re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
-				lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
-				(re.compile(u'<br  /><br  />'),
-				lambda match: '<br\/>')
-			]
-		
-		feeds = [
-			(u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
-			(u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
-			(u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
-			(u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
-			(u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
-			(u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
-		]
-		
-		def is_link_wanted(self, url, tag):
-			return tag['class'] == 'next'
-			
-		def remove_beyond(self, tag, next):
-				while tag is not None and getattr(tag, 'name', None) != 'body':
-						after = getattr(tag, next)
-						while after is not None:
-								ns = getattr(tag, next)
-								after.extract()
-								after = ns
-						tag = tag.parent
-		
-		def append_page(self, soup, appendtag, position):
-				pager = soup.find('a',attrs={'class':'next'})
-				if pager:
-					nexturl = self.INDEX + pager['href']
-					soup2 = self.index_to_soup(nexturl)
-					texttag = soup2.find('div', attrs={'id':'articleContent'})
-					
-					tag = texttag.find(attrs={'class':'pages'})
-					self.remove_beyond(tag, 'nextSibling')
-					
-					newpos = len(texttag.contents)
-					self.append_page(soup2,texttag,newpos)
+        title          = u'Kopalnia Wiedzy'
+        publisher      = u'Kopalnia Wiedzy'
+        description    = u'Ciekawostki ze świata nauki i techniki'
+        encoding       = 'utf-8'
+        __author__     = 'Attis & Tomasz Długosz'
+        language       = 'pl'
+        oldest_article = 7
+        max_articles_per_feed = 100
+        INDEX          = u'http://kopalniawiedzy.pl/'
+        remove_javascript     = True
+        no_stylesheets        = True

-					appendtag.insert(position,texttag)
+        remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}]
+        remove_tags_after = dict(attrs={'class':'ad-square'})
+        keep_only_tags    = [dict(name="div", attrs={'class':'article-text text-small'})]
+        extra_css      = '.topimage {margin-top: 30px}'
+
+        preprocess_regexps = [
+                (re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
+                lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
+                (re.compile(u'<br  /><br  />'),
+                lambda match: '<br\/>')
+            ]
+
+        feeds = [
+            (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
+            (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
+            (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
+            (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
+            (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
+            (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
+        ]
+
+        def is_link_wanted(self, url, tag):
+            return tag['class'] == 'next'
+
+        def remove_beyond(self, tag, next):
+                while tag is not None and getattr(tag, 'name', None) != 'body':
+                        after = getattr(tag, next)
+                        while after is not None:
+                                ns = getattr(tag, next)
+                                after.extract()
+                                after = ns
+                        tag = tag.parent
+
+        def append_page(self, soup, appendtag, position):
+                pager = soup.find('a',attrs={'class':'next'})
+                if pager:
+                    nexturl = self.INDEX + pager['href']
+                    soup2 = self.index_to_soup(nexturl)
+                    texttag = soup2.find('div', attrs={'id':'articleContent'})
+
+                    tag = texttag.find(attrs={'class':'pages'})
+                    self.remove_beyond(tag, 'nextSibling')
+
+                    newpos = len(texttag.contents)
+                    self.append_page(soup2,texttag,newpos)
+
+                    appendtag.insert(position,texttag)


-		def preprocess_html(self, soup): 
-				self.append_page(soup, soup.body, 3)
-				
-				for item in soup.findAll('div',attrs={'class':'pages'}):
-					item.extract()
-					
-				for item in soup.findAll('p', attrs={'class':'wykop'}):
-					item.extract()
-					
-				return soup
+        def preprocess_html(self, soup):
+                self.append_page(soup, soup.body, 3)
+
+                for item in soup.findAll('div',attrs={'class':'pages'}):
+                    item.extract()
+
+                for item in soup.findAll('p', attrs={'class':'wykop'}):
+                    item.extract()
+
+                return soup
--- a/recipes/kosmonauta_pl.recipe
+++ b/recipes/kosmonauta_pl.recipe
@ -0,0 +1,14 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class Kosmonauta(BasicNewsRecipe):
+    title          = u'Kosmonauta.net'
+    __author__        = 'fenuks'
+    description   = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
+    category       = 'astronomy'
+    language       = 'pl'
+    cover_url='http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
+    no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    feeds          = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/index.php/feed/rss.html')]
--- a/recipes/la_razon_bo.recipe
+++ b/recipes/la_razon_bo.recipe
@ -1,10 +1,9 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.la-razon.com
 '''

-from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe

 class LaRazon_Bol(BasicNewsRecipe):
@ -16,19 +15,17 @@ class LaRazon_Bol(BasicNewsRecipe):
    oldest_article        = 1
    max_articles_per_feed = 200
    no_stylesheets        = True
-    encoding              = 'cp1252'
+    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'es_BO'
    publication_type      = 'newspaper'
-    delay                 = 1
    remove_empty_feeds    = True
-    cover_url             = strftime('http://www.la-razon.com/portadas/%Y%m%d_LaRazon.jpg')
-    masthead_url          = 'http://www.la-razon.com/imagenes/logo.jpg'
-    extra_css             = """ body{font-family: Arial,Helvetica,sans-serif }
-                                img{margin-bottom: 0.4em}
-                                .noticia-titulo{font-family: Georgia,"Times New Roman",Times,serif}
-                                .lead{font-weight: bold; font-size: 0.8em}
-                                """
+    masthead_url          = 'http://www.la-razon.com/static/LRZRazon/images/lrz-logo.png'
+    extra_css             = """ body{font-family: Georgia,"Times New Roman",Times,serif}
+                                img{margin-bottom: 0.4em; display: block}
+                                .meta{font-size: small; font-family: Arial,Helvetica,sans-serif}
+                            """
+    INDEX = 'http://www.la-razon.com/'

    conversion_options = {
                          'comment'   : description
@ -37,28 +34,37 @@ class LaRazon_Bol(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    keep_only_tags    = [dict(name='div', attrs={'class':['noticia-titulo','noticia-desarrollo']})]
-    remove_tags       = [dict(name=['meta','link','form','iframe','embed','object'])]
+    keep_only_tags    = [dict(name='div', attrs={'class':['pg-hd', 'pg-bd']})]
+    remove_tags       = [
+                          dict(name=['meta','link','form','iframe','embed','object'])
+                         ,dict(name='div', attrs={'class':'bd'})
+                        ]
    remove_attributes = ['width','height']

    feeds = [
-              (u'Editorial'     , u'http://www.la-razon.com/rss_editorial.php' )
-             ,(u'Opinión'       , u'http://www.la-razon.com/rss_opinion.php'   )
-             ,(u'Nacional'      , u'http://www.la-razon.com/rss_nacional.php'  )
-             ,(u'Economia'      , u'http://www.la-razon.com/rss_economia.php'  )
-             ,(u'Ciudades'      , u'http://www.la-razon.com/rss_ciudades.php'  )
-             ,(u'Sociedad'      , u'http://www.la-razon.com/rss_sociedad.php'  )
-             ,(u'Mundo'         , u'http://www.la-razon.com/rss_sociedad.php'  )
-             ,(u'La Revista'    , u'http://www.la-razon.com/rss_larevista.php' )
-             ,(u'Sociales'      , u'http://www.la-razon.com/rss_sociales.php'  )
-             ,(u'Mia'           , u'http://www.la-razon.com/rss_mia.php'       )
-             ,(u'Marcas'        , u'http://www.la-razon.com/rss_marcas.php'    )
-             ,(u'Escape'        , u'http://www.la-razon.com/rss_escape.php'    )
-             ,(u'El Financiero' , u'http://www.la-razon.com/rss_financiero.php')
-             ,(u'Tendencias'    , u'http://www.la-razon.com/rss_tendencias.php')
+              (u'Editorial'     , u'http://www.la-razon.com/rss/opinion/editorial/'     )
+             ,(u'Nacional'      , u'http://www.la-razon.com/rss/nacional/'              )
+             ,(u'Economia'      , u'http://www.la-razon.com/rss/economia/'              )
+             ,(u'Ciudades'      , u'http://www.la-razon.com/rss/ciudades/'              )
+             ,(u'Sociedad'      , u'http://www.la-razon.com/rss/sociedad/'              )
+             ,(u'Mundo'         , u'http://www.la-razon.com/rss/mundo/'                 )
+             ,(u'La Revista'    , u'http://www.la-razon.com/rss/la_revista/'            )
+             ,(u'Sociales'      , u'http://www.la-razon.com/rss/sociales/'              )
+             ,(u'Mia'           , u'http://www.la-razon.com/rss/suplementos/mia/'       )
+             ,(u'Marcas'        , u'http://www.la-razon.com/rss/marcas/'                )
+             ,(u'Escape'        , u'http://www.la-razon.com/rss/suplementos/escape/'    )
+             ,(u'El Financiero' , u'http://www.la-razon.com/rss/suplementos/financiero/')
+             ,(u'Tendencias'    , u'http://www.la-razon.com/rss/suplementos/tendencias/')
            ]

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
+
+    def get_cover_url(self):
+        soup = self.index_to_soup(self.INDEX)
+        lightbox = soup.find('div', attrs = {'class' : 'lightbox lightbox-frontpage'})
+        return lightbox.img['src']
+
+
--- a/recipes/la_republica.recipe
+++ b/recipes/la_republica.recipe
@ -1,13 +1,12 @@
 __license__   = 'GPL v3'
 __author__    = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
 __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
-description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version'
+description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version'

 '''
 http://www.repubblica.it/
 '''

-import re
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe

@ -32,12 +31,6 @@ class LaRepubblica(BasicNewsRecipe):
                              """
                           
    remove_attributes = ['width','height','lang','xmlns:og','xmlns:fb']
-
-    preprocess_regexps = [
-        (re.compile(r'.*?<head>', re.DOTALL|re.IGNORECASE), lambda match: '<head>'),
-        (re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE), lambda match: '<head><title>'),
-        (re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE), lambda match: '</title></head>')
-    ]
    
    def get_article_url(self, article):
        link = BasicNewsRecipe.get_article_url(self, article)
@ -73,15 +66,15 @@ class LaRepubblica(BasicNewsRecipe):
    remove_tags        = [
                            dict(name=['object','link','meta','iframe','embed']),
                            dict(name='span',attrs={'class':'linkindice'}),
-                            dict(name='div', attrs={'class':'bottom-mobile'}),
-                            dict(name='div', attrs={'id':['rssdiv','blocco']}),
-                            dict(name='div', attrs={'class':'utility'}),
+                            dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}),
+                            dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head']}),
+                            dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}),
                            dict(name='div', attrs={'class':'generalbox'}),
                            dict(name='ul', attrs={'id':'hystory'})
                         ]

    feeds          = [
-                       (u'Rilievo', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
+                       (u'Homepage', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
                       (u'Cronaca', u'http://www.repubblica.it/rss/cronaca/rss2.0.xml'),
                       (u'Esteri', u'http://www.repubblica.it/rss/esteri/rss2.0.xml'),
                       (u'Economia', u'http://www.repubblica.it/rss/economia/rss2.0.xml'),
@ -110,3 +103,5 @@ class LaRepubblica(BasicNewsRecipe):
            del item['style']           
        return soup
                      
+    def preprocess_raw_html(self, raw, url):
+       return '<html><head>'+raw[raw.find('</head>'):]
--- a/recipes/lega_nerd.recipe
+++ b/recipes/lega_nerd.recipe
@ -0,0 +1,14 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1326135232(BasicNewsRecipe):
+    title          = u'Lega Nerd'
+    description = 'nerd / geek culture, pc, comics, music, culture'
+    language = 'it'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    feeds          = [(u'Lega Nerd', u'http://feeds.feedburner.com/LegaNerd')]
+    __author__      = 'faber1971'
+__version__     = 'v1.0'
+__date__        = '9, January 2011'
--- a/recipes/letsgetcritical.recipe
+++ b/recipes/letsgetcritical.recipe
@ -0,0 +1,94 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LetsGetCritical(BasicNewsRecipe):
+    title          = u"Let's Get Critical"
+    description    = 'Curation / aggregation of criticisms of the arts and culture '
+    language = 'en'
+    __author__     = 'barty on mobileread.com forum'
+    max_articles_per_feed = 100
+    no_stylesheets = False
+    timefmt        = ' [%a, %d %b, %Y]'
+    oldest_article = 365
+    auto_cleanup   = True
+    INDEX          = 'http://www.letsgetcritical.org'
+    CATEGORIES     = [
+        # comment out categories you don't want
+        # (user friendly name, system name, max number of articles to load)
+        ('Architecture','architecture',30),
+        ('Art','art',30),
+        ('Books','books',30),
+        ('Design','design',30),
+        ('Digital','digital',30),
+        ('Food','food',30),
+        ('Movies','movies',30),
+        ('Music','music',30),
+        ('Television','television',30),
+        ('Other articles','',10)
+        ]
+
+    def parse_index(self):
+        self.cover_url = 'http://www.letsgetcritical.org/wp-content/themes/lets_get_critical/images/lgc.jpg'
+        feeds = []
+        seen_urls = set([])
+        regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
+
+        for category in self.CATEGORIES:
+
+            (cat_name, tag, max_articles) = category
+
+            tagurl = '' if tag=='' else '/category/'+tag.lower()
+            self.log('Reading category:', cat_name)
+
+            articles = []
+            pageno = 1
+
+            while len(articles) < max_articles and pageno < 100:
+
+                page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
+                pageno += 1
+
+                self.log('\tReading page:', page)
+                try:
+                    soup = self.index_to_soup(page)
+                except:
+                    break
+
+                posts = soup.findAll('div',attrs={'class':'post_multi'})
+                if len(posts) == 0:
+                    break
+
+                for post in posts:
+                    dt = post.find('div',attrs={'class':'title'})
+                    atag = dt.find('a')
+                    url = atag['href']
+                    # skip promotionals and duplicate
+                    if url.startswith('http://letsgetcritical') or url.startswith('/') or url in seen_urls:
+                        continue
+                    seen_urls.add(url)
+                    title = self.tag_to_string(atag)
+                    self.log('\tFound article:', title)
+                    self.log('\t', url)
+                    desc = post.find('blockquote')
+                    desc = self.tag_to_string(desc) if desc else ''
+                    m = regex.match( url)
+                    if m:
+                        desc = "[%s] %s" %  (m.group(2), desc)
+                    #self.log('\t', desc)
+                    date = ''
+                    p = post.previousSibling
+                    # navigate up sibling to find date
+                    while p:
+                        if hasattr(p,'class') and p['class'] == 'singledate':
+                            date = self.tag_to_string(p)
+                            break
+                        p = p.previousSibling
+                    articles.append({'title':title,'url':url,'description':desc,'date':date})
+                    if len(articles) >= max_articles:
+                        break
+
+            if articles:
+                feeds.append((cat_name, articles))
+
+        return feeds
+
--- a/recipes/los_tiempos_bo.recipe
+++ b/recipes/los_tiempos_bo.recipe
@ -41,7 +41,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
    keep_only_tags    = [dict(name='div', attrs={'id':'articulo'})]
    remove_tags       = [
                          dict(name=['meta','link','form','iframe','embed','object','hr'])
-                         ,dict(attrs={'class':['caja_fonts sin_border_bot','pub']})
+                         ,dict(attrs={'class':['caja_fonts sin_border_bot','pub','twitter-share-button']})
                        ]
    remove_attributes = ['width','height']

--- a/recipes/lwn_weekly.recipe
+++ b/recipes/lwn_weekly.recipe
@ -14,8 +14,11 @@ class WeeklyLWN(BasicNewsRecipe):
    description = 'Weekly summary of what has happened in the free software world.'
    __author__ = 'Davide Cavalca'
    language = 'en'
+    site_url = 'http://lwn.net'

-    cover_url = 'http://lwn.net/images/lcorner.png'
+    extra_css = 'pre,code,samp,kbd,tt { font-size: 80% }\nblockquote {margin-left:0 }\n* { color: black }\n'
+
+    cover_url = site_url + '/images/lcorner.png'
    #masthead_url = 'http://lwn.net/images/lcorner.png'
    publication_type = 'magazine'

@ -43,11 +46,29 @@ class WeeklyLWN(BasicNewsRecipe):
            br.submit()
        return br

+    def print_version(self, url):
+
+        # Strip off anchor
+        url = url.split('#')[0]
+
+        # Prepend site_url
+        if url[0:len(self.site_url)] != self.site_url:
+            url = self.site_url + url
+
+        # Append printable URL parameter
+        print_param = '?format=printable'
+        if url[-len(print_param):] != print_param:
+            url += print_param
+
+        #import sys
+        #print >>sys.stderr, "*** print_version(url):", url
+        return url
+
    def parse_index(self):
        if self.username is not None and self.password is not None:
-            index_url = 'http://lwn.net/current/bigpage?format=printable'
+            index_url = self.print_version('/current/bigpage')
        else:
-            index_url = 'http://lwn.net/free/bigpage?format=printable'
+            index_url = self.print_version('/free/bigpage')
        soup = self.index_to_soup(index_url)
        body = soup.body

@ -56,19 +77,19 @@ class WeeklyLWN(BasicNewsRecipe):
        url_re = re.compile('^/Articles/')

        while True:
-            tag_title = body.findNext(name='p', attrs={'class':'SummaryHL'})
+            tag_title = body.findNext(attrs={'class':'SummaryHL'})
            if tag_title == None:
                break

-            tag_section = tag_title.findPrevious(name='p', attrs={'class':'Cat1HL'})
+            tag_section = tag_title.findPrevious(attrs={'class':'Cat1HL'})
            if tag_section == None:
                section = 'Front Page'
            else:
                section = tag_section.string

-            tag_section2 = tag_title.findPrevious(name='p', attrs={'class':'Cat2HL'})
+            tag_section2 = tag_title.findPrevious(attrs={'class':'Cat2HL'})
            if tag_section2 != None:
-                if tag_section2.findPrevious(name='p', attrs={'class':'Cat1HL'}) == tag_section:
+                if tag_section2.findPrevious(attrs={'class':'Cat1HL'}) == tag_section:
                    section = "%s: %s" %(section, tag_section2.string)

            if section not in articles.keys():
@ -94,9 +115,10 @@ class WeeklyLWN(BasicNewsRecipe):
            if tag_url == None:
                break

+
            article = dict(
                title=self.tag_to_string(tag_title),
-                url= 'http://lwn.net' + tag_url['href'].split('#')[0] + '?format=printable',
+                url=tag_url['href'],
                description='', content='', date='')
            articles[section].append(article)

--- a/Show More
+++ b/Show More