Merge from John Store

2025-08-30 23:00:21 -04:00 · 2011-12-03 09:36:48 +01:00 · 2011-12-03 09:36:48 +01:00 · 5223b9f519
commit 5223b9f519
parent 18ed5671c6 bbca4d0334
386 changed files with 142414 additions and 108191 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,433 @@
 #  new recipes:
 #    - title: 
 - version: 0.8.28
  date: 2011-11-25
  new features:
    - title: "Get Books: Add litres.ru store"
    - title: "Change the algorithm that generates title sort strings to strip leading articles from both english and the current language set for the calibre user interface. In addition, in the edit metadata dialog, calibre will use the book's language when calculating the sort string. This behavior can be adjusted via Preferences->Tweaks."
      tickets: [886763]
    - title: "Driver for Cybook Odyssey."
      tickets: [893457]
    - title: "Irex driver: Put books into the top level directory instead of into /ebooks or /Books."
      tickets: [883616]
  bug fixes:
    - title: "Have downloaded periodicals recognized when transferred via USB to the Kindle Fire"
    - title: "MOBI Output: Fix underline and strikethrough properties declared on parents not being rendered on child tags."
      tickets: [894245]
    - title: "Template language: Fix regression that broke ordering of items when formatting a list"
    - title: "Conversion pipeline: When removing obsolete <font> tags convert them to <div> instead of <span> if they contain block level tags."
      tickets: [892525] 
    - title: "When downloading metadata, fix the case normalization of double-barelled author names."
      tickets: [893257]
    - title: "Template language: Fix regression that broke using general program mode in save to disk templates"
    - title: "calibredb: Fix use of ranges when specifying ids for the remove command"
    - title: "Apple driver: Add ids for iPhone 4S. More robust against iTunes automation errors when adding artwork."
      tickets: [892468]
    - title: "Fix encoding of comments incorrectly detected when downloading metadata from ozon.ru"
    - title: "Fix calibre not getting list of books on the Kindle Fire"
  improved recipes:
    - El Mundo
    - BBC
    - NIN Online
    - ABC Australia
    - Salon.com
    - Expansion (Spanish)
    - The Week
    - Heise Online
  new recipes:
    - title: Give me something to read and Let's get Critical
      author: Barty
    - title: Worldcrunch
      author: Krittika Goyal
 - version: 0.8.27
  date: 2011-11-18
  new features:
    - title: "Drivers for the Kindle Fire and the Nook Tablet"
      tickets: [890918] 
    - title: "Conversion: Add an option under Look & Feel to remove specified style information (CSS) from the document during conversion."
      tickets: [871384]
    - title: "Add an option in the bulk metadata edit dialog to restore the pre-conversion files for many books with a single click."
      tickets: [886116]
    - title: "Jobs list: Add the ability to search for and to hide jobs, useful if you have run a lot of jobs and the list is getting crowded."
      tickets: [883734]
    - title: "Book jacket generation: Add ability to customize the book jacket template and add custom columns into the jacket."
      tickets: [889912]
    - title: "MOBI Input: Performance improvement when viewing/converting a file with a lot of links"
  bug fixes:
    - title: "Fix regression in 0.8.26 that broke disabling the update of particular fields during a bulk metadata download." 
      tickets: [889696]
    - title: "Get Books: Fix DRM status for legimi"
    - title: "When parsing for lxml via BeatifulSoup, use the calibre modified copy of BeautifulSoup (more robust)."
      tickets: [889890]
    - title: "HTML Input: Handle double encoded URLs in img tags"
      tickets: [889323] 
  improved recipes:
    - Various Polish recipes
    - Academia Catavencu
    - El Periodico de Aragon 
    - Weblogs SL
    - Folha de Sao Paolo (subscription)
  new recipes:
    - title: News on Japan
      author: Krittika Goyal
    - title: Formula AS 
      author: Silviu Cotoara
    - title: Various Turkish news sources 
      author: Osman Kaysan
    - title: Infra.pl and Spider's Web 
      author: fenuks
 - version: 0.8.26
  date: 2011-11-12
  new features:
    - title: "Tweak to control sorting of date type columns. You can choose to have them sorted only by displayed fields"
    - title: "Driver for the Trekstor 3.0"
    - title: "Performance improvements when evaluating templates, and in particular general program mode templates"
  bug fixes:
    - title: "ODT Input: When converting to EPUB improve handling of large images placed inside small frames, to prevent them from obscuring text."
      tickets: [860272,884759]
    - title: "EPUB Input: Automatically strip entries of type application/text from the spine. Apparently there are EPUB production tools out there that create them."
      tickets: [884792]
    - title: "Keep the startup splash screen visible until the GUI has fully completed initializing."
      tickets: [885827]
    - title: "ODT Input: Fix handling of span tags containing only whitespace."
      tickets: [887311]
    - title: "On windows when changing title or author via the main book list, handle the case of one of the books files being open in another program more gracefully."
      tickets: [880585]
    - title: "When adding a format to an existing book record, ensure that no changes are made to the database until after the file operations have succeeded."
    - title: "Fix bug that prevented configuring which fields to download metadata for when adding books by ISBN"
      tickets: [856076] 
    - title: "Fix Japanese characters not being crrectly displayed on index pages in news downloads for the SONY T1"
      tickets: [888029] 
    - title: "Get Books: Fix booleans in search expressions not working in non-English calibre versions"
      tickets: [887554]
    - title: "Fix a bug in the support for hours/minutes/seconds in datetime format strings"
      tickets: [887412] 
    - title: "Treat an author_sort value of 'Unknown' the same way as unknown authors are treated in template processing"
    - title: "Detect SD card in Kobo Vox"
    - title: "Amazon metadata download: Workaround for change in Amazon website causing some books to have incorrect ratings downloaded"
  improved recipes:
    - Metro NL
    - The Independent
    - Frankfurter Rundschau
    - L'Espresso
    - Il Giornale
    - Berlingske.dk
    - Suedeutsche Zeitung
  new recipes:
    - title: Techtarget 
      author: Julio Map
 - version: 0.8.25
  date: 2011-11-06
  new features:
    - title: "Drivers for the LG Optimus 2X, HTC Incredible S, Samsung Stratosphere and the Kobo Vox"
      tickets: [886558, 885058, 884762, 884039] 
    - title: "Get books: Add ebookpoint.pl store"
    - title: "Support hour/minute/seconds in datetime format strings in the template language and in tweaks"
  bug fixes:
    - title: "Fix Book detils preferences showing custom columns even after they have been deleted"
      tickets: [884799]
    - title: "Replace use of insecure tempfile in the bundled rtf2xml library."
      tickets: [885245]
    - title: "Remove the suid mount helper used on linux and bsd, as it proved impossible to make it secure."
      description: "This means that if you are on BSD or an older linux distribution, without support for udisks, device detection will no longer work in calibre. You will have to either mount the devices by hand before starting calibre, or stick with version 0.8.24 (the vulnerability in the mount helper is a privilege escalation, which is relatively harmless on the vast majority of single user systems)."
      tickets: [885027]
    - title: "Do not error out if there is an invalid regex for title sort set in tweaks"
    - title: "Content server: Fix another place where --url-prefix was forgotten"
      tickets: [885332]
    - title: "HTML Input: Limit memory consumption when converting HTML files that link to large binary files."
      tickets: [884821]
    - title: "T1 driver: Workaround for T1 showing error messages when opening some news downloads on the device"
    - title: "Kobo driver: Fix longstanding bug that would prevent re-adding a epub that has been previously deleted from the Kobo using calibre"
    - title: "Fix partial cover search not resuming after pressing back in the metadata download dialog"
      tickets: [875196]
    - title: "T1 driver: Fix auto refresh covers option"
    - title: "Content server: Do not show tracebacks in HTML output when not running in develop mode"
    - title: "Textile output; Fix out of memory issue when dealing with large margins."
  improved recipes:
    - The Independent
    - Die Zeit subscription version
    - NIN online
    - Science News
    - Updated Daily Mirror
    - Science AAAS
  new recipes:
    - title: b365 Realitatea and Catavencii
      author: Silviu Cotoara
    - title: Various Greek news sources 
      author: Stelios
    - title: Real world economics blog 
      author: Julio Map
 - version: 0.8.24
  date: 2011-10-27
  new features:
    - title: "Kobo: Add support for fetching annotations from the kobo reader."
      description: "Right click the send to device button in calibre with your kobo connected and choose fetch annotations. The annotations are placed into the comments of the corresponding books in the calibre library. This feature is still experimental."
      type: major
    - title: "Preserve the set of selected books in the library view when a device is connected, fixing a long standing annoyance"
  bug fixes:
    - title: "Prevent changing of device metadata management option while a device is connected."
      tickets: [874118]
    - title: "Book details panel: Show tooltip only when hovering over cover, not the rest of the book information, as it makes it hard to read."
      tickets: [876454]
    - title: "MOBI Output: Fix use of list elements as link anchors caused links to always point to start of list."
      tickets: [879391]
    - title: "RB Output: Fix calibre generated rb files not being opened by the RocketBook."
      tickets: [880930]
    - title: "FB2 Input: Dont choke on FB2 files that have empty embedded content tags."
      tickets: [880904] 
    - title: "ODT Input: CSS rationalization should not fail with non ascii class names"
    - title: "Fix creating new library using the copy structure option incorrectly setting all text type columns to be like the tags column"
    - title: "E-book viewer: Don't choke on windows installs with a non UTF-8 filesystem encoding."
      tickets: [879740]
  improved recipes:
    - Novaya Gazeta
    - El Universal (Venezuela)
    - The Australian (subscription enabled)
    - Metro NL
    - The Scotsman
    - Japan Times
  new recipes:
    - title: Silicon Republic 
      author: Neil Grogan
    - title: Calibre Blog 
      author: Krittika Goyal
 - version: 0.8.23
  date: 2011-10-21
  new features:
    - title: "Drivers for T-Mobile Move, new Pandigital Novel, New Onyx Boox and Freescale MX 515"
    - title: "SONY T1 driver: Support for periodicals and better timezone detection"
    - title: "Add a remove cover entry to the right click menu of the cover display in the right panel"
      tickets: [874689]
  bug fixes:
    - title: "Amazon metadata download: Fix for change in Amazon website that broke downloading metadata."
      tickets: [878395]
    - title: "MOBI metadata: When reading titles from MOBI files only use the title in the PDB header if there is no long title in the EXTH header"
      tickets: [ 875243 ]
    - title: "Fix regression that broke use of complex custom columns in save to disk templates."
      tickets: [877366] 
    - title: "Fix regression that broke reading metadata from CHM files"
    - title: "Fix a bug that broke conversion of some zipped up HTML files with non ascii filenames on certain windows installs."
      tickets: [873288] 
    - title: "RTF Input: Fix bug in handling of paragraph separators."
      tickets: [863735]
    - title: "Fix a regression that broke downloading certain periodicals for the Kindle."
      tickets: [875595]
    - title: "Fix regression that broke updating of covers inside ebook files when saving to disk"
    - title: "Fix regression breaking editing the 'show in tag browser' checkbox in custom column setup editing"
    - title: "Fix typo that broke stopping selected jobs in 0.8.22"
  improved recipes:
    - Columbus Dispatch
    - Ming Pao
    - La Republica
    - Korea Times
    - USA Today
    - CNN
    - Liberation
    - El Pais
    - Helsingin Sanomat
  new recipes:
    - title: Kyugyhang, Hankyoreh and Hankyoreh21
      author: Seongkyoun Yoo.
    - title: English Katherimini 
      author: Thomas Scholl
    - title: Various French news sources
      author: Aurelien Chabot.
 - version: 0.8.22
  date: 2011-10-14
  new features:
    - title: "Input plugin for OCR-ed DJVU files (i.e. .djvu files that contain text. Only the text is converted)"
      type: major
    - title: "Driver for the SONY PRS T1"
    - title: "Add a 'Back' button to the metadata download dialog while downloading covers, so that you can go back and select a different match if you dont lke the covers, instead of having to re-do the entire download."
      tickets: [855055]
    - title: "Add an option in Preferences->Saving to disk to not show files in file browser after saving to disk"
    - title: "Get Books: Add the amazon.fr store. Remove leading 'by' from author names. Fix encoding issues with non English titles/names"
    - title: "Driver for Onyx BOOX A61S/X61S"
      tickets: [872741] 
    - title: "Kobo: Add support for uploading new covers to the device without converting the ePub. You can just resend the book to have the cover updated"
    - title: "Make it a little harder to ignore the fact that there are multiple toolbars when customizing toolbars"
      tickets: [864589]
  bug fixes:
    - title:   "MOBI Input: Remove invalid tags of the form <xyz: >"
      tickets: [872883]
    - title: "calibredb add_format does not refresh running calibre instance"
      tickets: [872961] 
    - title: "Conversion pipeline: Translate <font face> to CSS font-family"
      tickets: [871388]
    - title: "When sending email add a Date: header so that amavis does not consider the emails to be spam"
    - title: "Fix for the problem where setting the restriction to an empty current search clears the restriction box but does not clear the restriction." 
      tickets: [871921]
    - title: "Fix generation of column coloring rules for date/time columns"
    - title: "Fix plugboard problem where customizations to formats accepted by a device were ignored."
    - title: "Enable adding of various actions to the toolbar when device is connected (they had been erroneously marked as being non-addable)"
    - title: "Fixable content in library check is not hidden after repair"
      tickets: [864096]
    - title: "Catalog generation: Handle a corrupted thumbnail cache."
    - title: "Do not error out when user clicks stop selected job with no job selected."
      tickets: [863766]
  improved recipes:
    - automatiseringgids 
    - CNET
    - Geek and Poke
    - Gosc Niedzielny
    - Dilbert
    - Economist
    - Ming Pao
    - Metro UK
    - Heise Online
    - FAZ.net
    - Houston Chronicle
    - Slate
    - Descopera
  new recipes:
    - title: WoW Insider 
      author: Krittika Goyal
    - title: Merco Press and Penguin news
      author: Russell Phillips
    - title: Defense News
      author: Darko Miletic
    - title: Revista Piaui 
      author: Eduardo Simoes
    - title: Dark Horizons
      author: Jaded
    - title: Various polish news sources
      author: fenuks
 - version: 0.8.21
  date: 2011-09-30
--- a/recipes/20minutes.recipe
+++ b/recipes/20minutes.recipe
@ -0,0 +1,70 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
 '''
 20minutes.fr
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Minutes(BasicNewsRecipe):
    title                  = '20 minutes'
    __author__             = 'calibre'
    description            = 'Actualités'
    encoding               = 'cp1252'
    publisher              = '20minutes.fr'
    category               = 'Actualités, France, Monde'
    language               = 'fr'
    use_embedded_content   = False
    timefmt                = ' [%d %b %Y]'
    max_articles_per_feed  = 15
    no_stylesheets         = True
    remove_empty_feeds     = True
    filterDuplicates       = True
    extra_css = '''
                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    .mna-details {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .mna-image {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .mna-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    remove_tags = [
            dict(name='iframe'),
            dict(name='div', attrs={'class':['mn-section-heading']}),
            dict(name='a', attrs={'href':['#commentaires']}),
            dict(name='div', attrs={'class':['mn-right']}),
            dict(name='div', attrs={'class':['mna-box']}),
            dict(name='div', attrs={'class':['mna-comment-call']}),
            dict(name='div', attrs={'class':['mna-tools']}),
            dict(name='div', attrs={'class':['mn-trilist']})
    ]
    keep_only_tags    = [dict(id='mn-article')]
    remove_tags_after  = dict(name='div', attrs={'class':['mna-body','mna-signature']})
    feeds = [
        ('France', 'http://www.20minutes.fr/rss/actu-france.xml'),
        ('International', 'http://www.20minutes.fr/rss/monde.xml'),
        ('Tech/Web', 'http://www.20minutes.fr/rss/hightech.xml'),
        ('Sciences', 'http://www.20minutes.fr/rss/sciences.xml'),
        ('Economie', 'http://www.20minutes.fr/rss/economie.xml'),
        ('Politique', 'http://www.20minutes.fr/rss/politique.xml'),
        (u'Médias', 'http://www.20minutes.fr/rss/media.xml'),
        ('Cinema', 'http://www.20minutes.fr/rss/cinema.xml'),
        ('People', 'http://www.20minutes.fr/rss/people.xml'),
        ('Culture', 'http://www.20minutes.fr/rss/culture.xml'),
        ('Sport', 'http://www.20minutes.fr/rss/sport.xml'),
        ('Paris', 'http://www.20minutes.fr/rss/paris.xml'),
        ('Lyon', 'http://www.20minutes.fr/rss/lyon.xml'),
        ('Toulouse', 'http://www.20minutes.fr/rss/toulouse.xml')
    ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/abc_au.recipe
+++ b/recipes/abc_au.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Dean Cording'
+__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
 '''
 abc.net.au/news
 '''
@ -8,7 +8,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 class ABCNews(BasicNewsRecipe):
    title                  = 'ABC News'
-    __author__             = 'Dean Cording'
+    __author__             = 'Pat Stapleton, Dean Cording'
    description            = 'News from Australia'
    masthead_url           = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
    cover_url              = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
@ -23,7 +23,9 @@ class ABCNews(BasicNewsRecipe):
    category               = 'News, Australia, World'
    language               = 'en_AU'
    publication_type       = 'newsportal'
-    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+#    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
 #Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
    preprocess_regexps     = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
@ -32,23 +34,23 @@ class ABCNews(BasicNewsRecipe):
                            ,'linearize_tables': False
                         }
-    keep_only_tags    =  dict(id='article')
+    keep_only_tags = [dict(attrs={'class':['article section']})]
-    remove_tags = [dict(attrs={'class':['related', 'tags']}),
+    remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
-                     dict(id='statepromo')
+        'inline-content story left', 'inline-content map left contracted', 'published',
-                        ]
+        'story-map', 'statepromo', 'topics', ]})]
    remove_attributes = ['width','height']
    feeds          = [
-                      ('Top Stories', 'http://www.abc.net.au/news/syndicate/topstoriesrss.xml'),
+                      ('Top Stories', 'http://www.abc.net.au/news/feed/45910/rss.xml'),
-                      ('Canberra', 'http://www.abc.net.au/news/indexes/idx-act/rss.xml'),
+                      ('Canberra', 'http://www.abc.net.au/news/feed/6910/rss.xml'),
-                      ('Sydney', 'http://www.abc.net.au/news/indexes/sydney/rss.xml'),
+                      ('Sydney', 'http://www.abc.net.au/news/feed/10232/rss.xml'),
-                      ('Melbourne', 'http://www.abc.net.au/news/indexes/melbourne/rss.xml'),
+                      ('Melbourne', 'http://www.abc.net.au/news/feed/21708/rss.xml'),
-                      ('Brisbane', 'http://www.abc.net.au/news/indexes/brisbane/rss.xml'),
+                      ('Brisbane', 'http://www.abc.net.au/news/feed/12858/rss.xml'),
-                      ('Perth', 'http://www.abc.net.au/news/indexes/perth/rss.xml'),
+                      ('Perth', 'feed://www.abc.net.au/news/feed/24886/rss.xml'),
-                      ('Australia', 'http://www.abc.net.au/news/indexes/idx-australia/rss.xml'),
+                      ('Australia', 'http://www.abc.net.au/news/feed/46182/rss.xml'),
-                      ('World', 'http://www.abc.net.au/news/indexes/world/rss.xml'),
+                      ('World', 'http://www.abc.net.au/news/feed/52278/rss.xml'),
-                      ('Business', 'http://www.abc.net.au/news/indexes/business/rss.xml'),
+                      ('Business', 'http://www.abc.net.au/news/feed/51892/rss.xml'),
-                      ('Science and Technology', 'http://www.abc.net.au/news/tag/science-and-technology/rss.xml'),
+                      ('Science and Technology', 'http://www.abc.net.au/news/feed/2298/rss.xml'),
                    ]
--- a/recipes/automatiseringgids.recipe
+++ b/recipes/automatiseringgids.recipe
@ -10,27 +10,15 @@ class autogids(BasicNewsRecipe):
    publisher              = 'AutomatiseringGids'
    category               = 'Nieuws, IT, Nederlandstalig'
    simultaneous_downloads = 5
-    #delay          = 1
+    timefmt        = ' [%a, %d %B, %Y]'
    timefmt        = ' [%A, %d %B, %Y]'
    #timefmt        = ''
    no_stylesheets = True
    remove_javascript = True
    remove_empty_feeds = True
    publication_type = 'newspaper'
    encoding              = 'utf-8'
-    cover_url    = 'http://www.automatiseringgids.nl/siteimg/header_logo.gif'
+    cover_url    = 'http://www.automatiseringgids.nl/binaries/content/gallery/ag/marketing/ag-avatar-100x50.jpg'
-    keep_only_tags = [dict(id=['content'])]
+    keep_only_tags = [dict(name='div', attrs={'class':['content']})]
    extra_css = '.artikelheader {font-size:0.8em; color: #666;} .artikelintro {font-weight:bold} div.imgArticle {float: right; margin: 0 0em 1em 1em; display: block; position: relative; } \
        h2 { margin: 0 0 0.5em; min-height: 30px; font-size: 1.5em; letter-spacing: -0.2px; margin: 0 0 0.5em; color: black; font-weight: bold; line-height: 1.2em; padding: 4px 3px 0; }'
    remove_tags = [dict(name='div', attrs={'id':['loginbox','reactiecollapsible','reactiebox']}),
        dict(name='div', attrs={'class':['column_a','column_c','bannerfullsize','reactieheader','reactiecollapsible','formulier','artikel_headeroptions']}),
        dict(name='ul', attrs={'class':['highlightlist']}),
        dict(name='input', attrs={'type':['button']}),
        dict(name='div', attrs={'style':['display:block; width:428px; height:30px; float:left;']}),
    ]
    preprocess_regexps = [
        (re.compile(r'(<h3>Reacties</h3>|<h2>Zie ook:</h2>|<div style=".*</div>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),
        lambda match: ''),
--- a/recipes/b365realitatea.recipe
+++ b/recipes/b365realitatea.recipe
@ -0,0 +1,52 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 b365.realitatea.net
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class b365Realitatea(BasicNewsRecipe):
    title                 = u'b365 Realitatea'
    __author__            = u'Silviu Cotoar\u0103'
    publisher             = u'b365 Realitatea'
    description           = u'b365 Realitatea'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Romania,Bucuresti'
    encoding              = 'utf-8'
    cover_url             = 'http://b365.realitatea.net/wp-content/themes/b/images/b365-logo.png'
    conversion_options = {
                'comments'    : description
                ,'tags'       : category
                ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
                      dict(name='div', attrs={'class':'newsArticle'})
                     ]
    remove_tags = [
             dict(name='div', attrs={'class':'date'})
           , dict(name='dic', attrs={'class':'addthis_toolbox addthis_default_style'})
           , dict(name='div', attrs={'class':'related_posts'})
           , dict(name='div', attrs={'id':'RelevantiWidget'})
                  ]
    remove_tags_after = [
                     dict(name='div', attrs={'id':'RelevantiWidget'})
                   ]
    feeds  = [
        (u'\u0218tiri', u'http://b365.realitatea.net/rss-full/')
         ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/bbc.recipe
+++ b/recipes/bbc.recipe
@ -1,61 +1,648 @@
-__license__   = 'GPL v3'
+##
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+## Title:        BBC News, Sport, and Blog Calibre Recipe
 ## Contact:      mattst - jmstanfield@gmail.com
 ##
 ## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
 ## Copyright:    mattst - jmstanfield@gmail.com
 ##
 ## Written:      November 2011
 ## Last Edited:  2011-11-19
 ##
 __license__     = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
 __copyright__   = 'mattst - jmstanfield@gmail.com'
 '''
-news.bbc.co.uk
+BBC News, Sport, and Blog Calibre Recipe
 '''
 # Import the regular expressions module.
 import re
 # Import the BasicNewsRecipe class which this class extends.
 from calibre.web.feeds.recipes import BasicNewsRecipe
-class BBC(BasicNewsRecipe):
+class BBCNewsSportBlog(BasicNewsRecipe):
-    title                  = 'BBC News'
+
-    __author__             = 'Darko Miletic, Starson17'
+    #
-    description            = 'News from UK. '
+    #    **** IMPORTANT USERS READ ME ****
-    oldest_article         = 2
+    #
-    max_articles_per_feed  = 100
+    #  First select the feeds you want then scroll down below the feeds list
-    no_stylesheets         = True
+    #  and select the values you want for the other user preferences, like
-    #delay                  = 1
+    #  oldest_article and such like.
-    use_embedded_content   = False
+    #
-    encoding               = 'utf8'
+    #
-    publisher              = 'BBC'
+    #  Select the BBC rss feeds which you want in your ebook.
-    category               = 'news, UK, world'
+    #  Selected feed have NO '#' at their start, de-selected feeds begin with a '#'.
-    language               = 'en_GB'
+    #
-    publication_type       = 'newsportal'
+    #  Eg.  ("News Home", "http://feeds.bbci.co.uk/... - include feed.
-    extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    #  Eg. #("News Home", "http://feeds.bbci.co.uk/... - do not include feed.
-    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+    #
-    conversion_options = {
+    # There are 68 feeds below which constitute the bulk of the available rss
-                             'comments'        : description
+    # feeds on the BBC web site. These include 5 blogs by editors and
-                            ,'tags'            : category
+    # correspondants, 16 sports feeds, 15 'sub' regional feeds (Eg. North West
-                            ,'language'        : language
+    # Wales, Scotland Business), and 7 Welsh language feeds.
-                            ,'publisher'       : publisher
+    #
-                            ,'linearize_tables': True
+    # Some of the feeds are low volume (Eg. blogs), or very low volume (Eg. Click)
    # so if "oldest_article = 1.5" (only articles published in the last 36 hours)
    # you may get some 'empty feeds' which will not then be included in the ebook.
    #
    # The 15 feeds currently selected below are simply my default ones.
    #
    # Note: With all 68 feeds selected, oldest_article set to 2,
    # max_articles_per_feed set to 100, and simultaneous_downloads set to 10,
    # the ebook creation took 29 minutes on my speedy 100 mbps net connection,
    # fairly high-end desktop PC running Linux (Ubuntu Lucid-Lynx).
    # More realistically with 15 feeds selected, oldest_article set to 1.5,
    # max_articles_per_feed set to 100, and simultaneous_downloads set to 20,
    # it took 6 minutes. If that's too slow increase 'simultaneous_downloads'.
    #
    # Select / de-select the feeds you want in your ebook.
    #
    feeds = [
              ("News Home", "http://feeds.bbci.co.uk/news/rss.xml"),
              ("UK", "http://feeds.bbci.co.uk/news/uk/rss.xml"),
              ("World", "http://feeds.bbci.co.uk/news/world/rss.xml"),
              #("England", "http://feeds.bbci.co.uk/news/england/rss.xml"),
              #("Scotland", "http://feeds.bbci.co.uk/news/scotland/rss.xml"),
              #("Wales", "http://feeds.bbci.co.uk/news/wales/rss.xml"),
              #("N. Ireland", "http://feeds.bbci.co.uk/news/northern_ireland/rss.xml"),
              #("Africa", "http://feeds.bbci.co.uk/news/world/africa/rss.xml"),
              #("Asia", "http://feeds.bbci.co.uk/news/world/asia/rss.xml"),
              #("Europe", "http://feeds.bbci.co.uk/news/world/europe/rss.xml"),
              #("Latin America", "http://feeds.bbci.co.uk/news/world/latin_america/rss.xml"),
              #("Middle East", "http://feeds.bbci.co.uk/news/world/middle_east/rss.xml"),
              ("US & Canada", "http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"),
              ("Politics", "http://feeds.bbci.co.uk/news/politics/rss.xml"),
              ("Science/Environment", "http://feeds.bbci.co.uk/news/science_and_environment/rss.xml"),
              ("Technology", "http://feeds.bbci.co.uk/news/technology/rss.xml"),
              ("Magazine", "http://feeds.bbci.co.uk/news/magazine/rss.xml"),
              ("Entertainment/Arts", "http://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"),
              #("Health", "http://feeds.bbci.co.uk/news/health/rss.xml"),
              #("Education/Family", "http://feeds.bbci.co.uk/news/education/rss.xml"),
              ("Business", "http://feeds.bbci.co.uk/news/business/rss.xml"),
              ("Special Reports", "http://feeds.bbci.co.uk/news/special_reports/rss.xml"),
              ("Also in the News", "http://feeds.bbci.co.uk/news/also_in_the_news/rss.xml"),
              #("Newsbeat", "http://www.bbc.co.uk/newsbeat/rss.xml"),
              #("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"),
              ("Blog: Nick Robinson (Political Editor)", "http://feeds.bbci.co.uk/news/correspondents/nickrobinson/rss.sxml"),
              #("Blog: Mark D'Arcy (Parliamentary Correspondent)", "http://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"),
              #("Blog: Robert Peston (Business Editor)", "http://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"),
              #("Blog: Stephanie Flanders (Economics Editor)", "http://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"),
              ("Blog: Rory Cellan-Jones (Technology correspondent)", "http://feeds.bbci.co.uk/news/correspondents/rorycellanjones/rss.sxml"),
              ("Sport Front Page", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml"),
              #("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"),
              #("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"),
              #("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"),
              #("Rugby League", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_league/rss.xml"),
              #("Tennis", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/tennis/rss.xml"),
              #("Golf", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/golf/rss.xml"),
              #("Motorsport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/motorsport/rss.xml"),
              #("Boxing", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/boxing/rss.xml"),
              #("Athletics", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/athletics/rss.xml"),
              #("Snooker", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/snooker/rss.xml"),
              #("Horse Racing", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/horse_racing/rss.xml"),
              #("Cycling", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/cycling/rss.xml"),
              #("Disability Sport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/disability_sport/rss.xml"),
              #("Other Sport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/rss.xml"),
              #("Olympics 2012", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/olympics_2012/rss.xml"),
              #("N. Ireland Politics", "http://feeds.bbci.co.uk/news/northern_ireland/northern_ireland_politics/rss.xml"),
              #("Scotland Politics", "http://feeds.bbci.co.uk/news/scotland/scotland_politics/rss.xml"),
              #("Scotland Business", "http://feeds.bbci.co.uk/news/scotland/scotland_business/rss.xml"),
              #("E. Scotland, Edinburgh & Fife", "http://feeds.bbci.co.uk/news/scotland/edinburgh_east_and_fife/rss.xml"),
              #("W. Scotland & Glasgow", "http://feeds.bbci.co.uk/news/scotland/glasgow_and_west/rss.xml"),
              #("Highlands & Islands", "http://feeds.bbci.co.uk/news/scotland/highlands_and_islands/rss.xml"),
              #("NE. Scotland, Orkney & Shetland", "http://feeds.bbci.co.uk/news/scotland/north_east_orkney_and_shetland/rss.xml"),
              #("South Scotland", "http://feeds.bbci.co.uk/news/scotland/south_scotland/rss.xml"),
              #("Central Scotland & Tayside", "http://feeds.bbci.co.uk/news/scotland/tayside_and_central/rss.xml"),
              #("Wales Politics", "http://feeds.bbci.co.uk/news/wales/wales_politics/rss.xml"),
              #("NW. Wales", "http://feeds.bbci.co.uk/news/wales/north_west_wales/rss.xml"),
              #("NE. Wales", "http://feeds.bbci.co.uk/news/wales/north_east_wales/rss.xml"),
              #("Mid. Wales", "http://feeds.bbci.co.uk/news/wales/mid_wales/rss.xml"),
              #("SW. Wales", "http://feeds.bbci.co.uk/news/wales/south_west_wales/rss.xml"),
              #("SE. Wales", "http://feeds.bbci.co.uk/news/wales/south_east_wales/rss.xml"),
              #("Newyddion - News in Welsh", "http://feeds.bbci.co.uk/newyddion/rss.xml"),
              #("Gwleidyddiaeth", "http://feeds.bbci.co.uk/newyddion/gwleidyddiaeth/rss.xml"),
              #("Gogledd-Ddwyrain", "http://feeds.bbci.co.uk/newyddion/gogledd-ddwyrain/rss.xml"),
              #("Gogledd-Orllewin", "http://feeds.bbci.co.uk/newyddion/gogledd-orllewin/rss.xml"),
              #("Canolbarth", "http://feeds.bbci.co.uk/newyddion/canolbarth/rss.xml"),
              #("De-Ddwyrain", "http://feeds.bbci.co.uk/newyddion/de-ddwyrain/rss.xml"),
              #("De-Orllewin", "http://feeds.bbci.co.uk/newyddion/de-orllewin/rss.xml"),
            ]
    #    **** SELECT YOUR USER PREFERENCES ****
    # Title to use for the ebook.
    #
    title = 'BBC News'
    # A brief description for the ebook.
    #
    description = u'BBC web site ebook created using rss feeds.'
    # The max number of articles which may be downloaded from each feed.
    # I've never seen more than about 70 articles in a single feed in the
    # BBC feeds.
    #
    max_articles_per_feed = 100
    # The max age of articles which may be downloaded from each feed. This is
    # specified in days - note fractions of days are allowed, Eg. 2.5 (2 and a
    # half days). My default of 1.5 days is the last 36 hours, the point at
    # which I've decided 'news' becomes 'old news', but be warned this is not
    # so good for the blogs, technology, magazine, etc., and sports feeds.
    # You may wish to extend this to 2-5 but watch out ebook creation time will
    # increase as well. Setting this to 30 will get everything (AFAICT) as long
    # as max_articles_per_feed remains set high (except for 'Click' which is
    # v. low volume and its currently oldest article is 4th Feb 2011).
    #
    oldest_article = 1.5
    # Number of simultaneous downloads. 20 is consistantly working fine on the
    # BBC News feeds with no problems. Speeds things up from the defualt of 5.
    # If you have a lot of feeds and/or have increased oldest_article above 2
    # then you may wish to try increasing simultaneous_downloads to 25-30,
    # Or, of course, if you are in a hurry. [I've not tried beyond 20.]
    #
    simultaneous_downloads = 20
    # Timeout for fetching files from the server in seconds. The default of
    # 120 seconds, seems somewhat excessive.
    #
    timeout = 30
    # The format string for the date shown on the ebook's first page.
    # List of all values: http://docs.python.org/library/time.html
    # Default in news.py has a leading space so that's mirrored here.
    # As with 'feeds' select/de-select by adding/removing the initial '#',
    # only one timefmt should be selected, here's a few to choose from.
    #
    timefmt = ' [%a, %d %b %Y]'              # [Fri, 14 Nov 2011] (Calibre default)
    #timefmt = ' [%a, %d %b %Y %H:%M]'       # [Fri, 14 Nov 2011 18:30]
    #timefmt = ' [%a, %d %b %Y %I:%M %p]'    # [Fri, 14 Nov 2011 06:30 PM]
    #timefmt = ' [%d %b %Y]'                 # [14 Nov 2011]
    #timefmt = ' [%d %b %Y %H:%M]'           # [14 Nov 2011 18.30]
    #timefmt = ' [%Y-%m-%d]'                 # [2011-11-14]
    #timefmt = ' [%Y-%m-%d-%H-%M]'           # [2011-11-14-18-30]
    #
    #    **** IMPORTANT ****
    #
    #    DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
    #
    #    DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
    #
    #    I MEAN IT, YES I DO, ABSOLUTELY, AT YOU OWN RISK. :)
    #
    #    **** IMPORTANT ****
    #
    # Author of this recipe.
    __author__ = 'mattst'
    # Specify English as the language of the RSS feeds (ISO-639 code).
    language = 'en_GB'
    # Set tags.
    tags = 'news, sport, blog'
    # Set publisher and publication type.
    publisher = 'BBC'
    publication_type = 'newspaper'
    # Disable stylesheets from site.
    no_stylesheets = True
    # Specifies an override encoding for sites that have an incorrect charset
    # specified. Default of 'None' says to auto-detect. Some other BBC recipes
    # use 'utf8', which works fine (so use that if necessary) but auto-detecting
    # with None is working fine, so stick with that for robustness.
    encoding = None
    # Sets whether a feed has full articles embedded in it. The BBC feeds do not.
    use_embedded_content = False
    # Removes empty feeds - why keep them!?
    remove_empty_feeds = True
    # Create a custom title which fits nicely in the Kindle title list.
    # Requires "import time" above class declaration, and replacing
    # title with custom_title in conversion_options (right column only).
    # Example of string below: "BBC News - 14 Nov 2011"
    #
    # custom_title = "BBC News - " + time.strftime('%d %b %Y')
    '''
    # Conversion options for advanced users, but don't forget to comment out the
    # current conversion_options below. Avoid setting 'linearize_tables' as that
    # plays havoc with the 'old style' table based pages.
    #
    conversion_options = { 'title'       : title,
                           'comments'    : description,
                           'tags'        : tags,
                           'language'    : language,
                           'publisher'   : publisher,
                           'authors'     : publisher,
                           'smarten_punctuation' : True
                         }
    '''
-    keep_only_tags    = [
+    conversion_options = { 'smarten_punctuation' : True }
                       dict(name='div', attrs={'class':['layout-block-a layout-block']})
                       ,dict(attrs={'class':['story-body','storybody']})
                        ]
-    remove_tags = [
+    # Specify extra CSS - overrides ALL other CSS (IE. Added last).
-                       dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper',
+    extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
-                                                    'story-feature wide ', 'story-feature narrow']}),
+                 .introduction, .first { font-weight: bold; } \
-                       dict(id=['hypertab', 'comment-form']),
+                 .cross-head { font-weight: bold; font-size: 125%; } \
-                        ]
+                 .cap, .caption { display: block; font-size: 80%; font-style: italic; } \
                 .cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } \
                 .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
                    .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
                    text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } \
                 .story-date, .published { font-size: 80%; } \
                 table { width: 100%; } \
                 td img { display: block; margin: 5px auto; } \
                 ul { padding-top: 10px; } \
                 ol { padding-top: 10px; } \
                 li { padding-top: 5px; padding-bottom: 5px; } \
                 h1 { text-align: center; font-size: 175%; font-weight: bold; } \
                 h2 { text-align: center; font-size: 150%; font-weight: bold; } \
                 h3 { text-align: center; font-size: 125%; font-weight: bold; } \
                 h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'
-    remove_attributes = ['width','height']
+    # Remove various tag attributes to improve the look of the ebook pages.
    remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
                          'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
-    feeds          = [
+    # Remove the (admittedly rarely used) line breaks, "<br />", which sometimes
-                      ('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
+    # cause a section of the ebook to start in an unsightly fashion or, more
-                      ('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
+    # frequently, a "<br />" will muck up the formatting of a correspondant's byline.
-                      ('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
+    # "<br />" and "<br clear/>" are far more frequently used on the table formatted
-                      ('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
+    # style of pages, and really spoil the look of the ebook pages.
-                      ('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
+    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
-                      ('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
+                              (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: '')]
                      ('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
                      ('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
                      ('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
                      ('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
                      ('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
                      ('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
                      ('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
                    ]
    # Create regular expressions for tag keeping and removal to make the matches more
    # robust against minor changes and errors in the HTML, Eg. double spaces, leading
    # and trailing spaces, missing hyphens, and such like.
    # Python regular expression ('re' class) page: http://docs.python.org/library/re.html
    # ***************************************
    # Regular expressions for keep_only_tags:
    # ***************************************
    # The BBC News HTML pages use variants of 'storybody' to denote the section of a HTML
    # page which contains the main text of the article. Match storybody variants: 'storybody',
    # 'story-body', 'story body','storybody ', etc.
    storybody_reg_exp = '^.*story[_ -]*body.*$'
    # The BBC sport and 'newsbeat' (features) HTML pages use 'blq_content' to hold the title
    # and published date. This is one level above the usual news pages which have the title
    # and date within 'story-body'. This is annoying since 'blq_content' must also be kept,
    # resulting in a lot of extra things to be removed by remove_tags.
    blq_content_reg_exp = '^.*blq[_ -]*content.*$'
    # The BBC has an alternative page design structure, which I suspect is an out-of-date
    # design but which is still used in some articles, Eg. 'Click' (technology), 'FastTrack'
    # (travel), and in some sport pages. These alternative pages are table based (which is
    # why I think they are an out-of-date design) and account for -I'm guesstimaking- less
    # than 1% of all articles. They use a table class 'storycontent' to hold the article
    # and like blq_content (above) have required lots of extra removal by remove_tags.
    story_content_reg_exp = '^.*story[_ -]*content.*$'
    # Keep the sections of the HTML which match the list below. The HTML page created by
    # Calibre will fill <body> with those sections which are matched. Note that the
    # blq_content_reg_exp must be listed before storybody_reg_exp in keep_only_tags due to
    # it being the parent of storybody_reg_exp, that is to say the div class/id 'story-body'
    # will be inside div class/id 'blq_content' in the HTML (if 'blq_content' is there at
    # all). If they are the other way around in keep_only_tags then blq_content_reg_exp
    # will end up being discarded.
    keep_only_tags = [ dict(name='table', attrs={'class':re.compile(story_content_reg_exp, re.IGNORECASE)}),
                       dict(name='div',   attrs={'class':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
                       dict(name='div',   attrs={'id':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
                       dict(name='div',   attrs={'class':re.compile(storybody_reg_exp, re.IGNORECASE)}),
                       dict(name='div',   attrs={'id':re.compile(storybody_reg_exp, re.IGNORECASE)}) ]
    # ************************************
    # Regular expressions for remove_tags:
    # ************************************
    # Regular expression to remove share-help and variant tags. The share-help class
    # is used by the site for a variety of 'sharing' type links, Eg. Facebook, delicious,
    # twitter, email. Removed to avoid page clutter.
    share_help_reg_exp = '^.*share[_ -]*help.*$'
    # Regular expression to remove embedded-hyper and variant tags. This class is used to
    # display links to other BBC News articles on the same/similar subject.
    embedded_hyper_reg_exp = '^.*embed*ed[_ -]*hyper.*$'
    # Regular expression to remove hypertabs and variant tags. This class is used to
    # display a tab bar at the top of an article which allows the user to switch to
    # an article (viewed on the same page) providing further info., 'in depth' analysis,
    # an editorial, a correspondant's blog entry, and such like. The ability to handle
    # a tab bar of this nature is currently beyond the scope of this recipe and
    # possibly of Calibre itself (not sure about that - TO DO - check!).
    hypertabs_reg_exp = '^.*hyper[_ -]*tabs.*$'
    # Regular expression to remove story-feature and variant tags. Eg. 'story-feature',
    # 'story-feature related narrow', 'story-feature wide', 'story-feature narrow'.
    # This class is used to add additional info. boxes, or small lists, outside of
    # the main story. TO DO: Work out a way to incorporate these neatly.
    story_feature_reg_exp = '^.*story[_ -]*feature.*$'
    # Regular expression to remove video and variant tags, Eg. 'videoInStoryB',
    # 'videoInStoryC'. This class is used to embed video.
    video_reg_exp = '^.*video.*$'
    # Regular expression to remove audio and variant tags, Eg. 'audioInStoryD'.
    # This class is used to embed audio.
    audio_reg_exp = '^.*audio.*$'
    # Regular expression to remove pictureGallery and variant tags, Eg. 'pictureGallery'.
    # This class is used to embed a photo slideshow. See also 'slideshow' below.
    picture_gallery_reg_exp = '^.*picture.*$'
    # Regular expression to remove slideshow and variant tags, Eg. 'dslideshow-enclosure'.
    # This class is used to embed a slideshow (not necessarily photo) but both
    # 'slideshow' and 'pictureGallery' are used for slideshows.
    slideshow_reg_exp = '^.*slide[_ -]*show.*$'
    # Regular expression to remove social-links and variant tags. This class is used to
    # display links to a BBC bloggers main page, used in various columnist's blogs
    # (Eg. Nick Robinson, Robert Preston).
    social_links_reg_exp = '^.*social[_ -]*links.*$'
    # Regular expression to remove quote and (multi) variant tags, Eg. 'quote',
    # 'endquote', 'quote-credit', 'quote-credit-title', etc. These are usually
    # removed by 'story-feature' removal (as they are usually within them), but
    # not always. The quotation removed is always (AFAICT) in the article text
    # as well but a 2nd copy is placed in a quote tag to draw attention to it.
    # The quote class tags may or may not appear in div's.
    quote_reg_exp = '^.*quote.*$'
    # Regular expression to remove hidden and variant tags, Eg. 'hidden'.
    # The purpose of these is unclear, they seem to be an internal link to a
    # section within the article, but the text of the link (Eg. 'Continue reading
    # the main story') never seems to be displayed anyway. Removed to avoid clutter.
    # The hidden class tags may or may not appear in div's.
    hidden_reg_exp = '^.*hidden.*$'
    # Regular expression to remove comment and variant tags, Eg. 'comment-introduction'.
    # Used on the site to display text about registered users entering comments.
    comment_reg_exp = '^.*comment.*$'
    # Regular expression to remove form and variant tags, Eg. 'comment-form'.
    # Used on the site to allow registered BBC users to fill in forms, typically
    # for entering comments about an article.
    form_reg_exp = '^.*form.*$'
    # Extra things to remove due to the addition of 'blq_content' in keep_only_tags.
    #<div class="story-actions"> Used on sports pages for 'email' and 'print'.
    story_actions_reg_exp = '^.*story[_ -]*actions.*$'
    #<div class="bookmark-list"> Used on sports pages instead of 'share-help' (for
    # social networking links).
    bookmark_list_reg_exp = '^.*bookmark[_ -]*list.*$'
    #<div id="secondary-content" class="content-group">
    # NOTE: Don't remove class="content-group" that is needed.
    # Used on sports pages to link to 'similar stories'.
    secondary_content_reg_exp = '^.*secondary[_ -]*content.*$'
    #<div id="featured-content" class="content-group">
    # NOTE: Don't remove class="content-group" that is needed.
    # Used on sports pages to link to pages like 'tables', 'fixtures', etc.
    featured_content_reg_exp = '^.*featured[_ -]*content.*$'
    #<div id="navigation">
    # Used on sports pages to link to pages like 'tables', 'fixtures', etc.
    # Used sometimes instead of "featured-content" above.
    navigation_reg_exp = '^.*navigation.*$'
    #<a class="skip" href="#blq-container-inner">Skip to top</a>
    # Used on sports pages to link to the top of the page.
    skip_reg_exp = '^.*skip.*$'
    # Extra things to remove due to the addition of 'storycontent' in keep_only_tags,
    # which are the alterative table design based pages. The purpose of some of these
    # is not entirely clear from the pages (which are a total mess!).
    # Remove mapping based tags, Eg. <map id="world_map">
    # The dynamic maps don't seem to work during ebook creation. TO DO: Investigate.
    map_reg_exp = '^.*map.*$'
    # Remove social bookmarking variation, called 'socialBookMarks'.
    social_bookmarks_reg_exp = '^.*social[_ -]*bookmarks.*$'
    # Remove page navigation tools, like 'search', 'email', 'print', called 'blq-mast'.
    blq_mast_reg_exp = '^.*blq[_ -]*mast.*$'
    # Remove 'sharesb', I think this is a generic 'sharing' class. It seems to appear
    # alongside 'socialBookMarks' whenever that appears. I am removing it as well
    # under the assumption that it can appear alone as well.
    sharesb_reg_exp = '^.*sharesb.*$'
    # Remove class 'o'. The worst named user created css class of all time. The creator
    # should immediately be fired. I've seen it used to hold nothing at all but with
    # 20 or so empty lines in it. Also to hold a single link to another article.
    # Whatever it was designed to do it is not wanted by this recipe. Exact match only.
    o_reg_exp = '^o$'
    # Remove 'promotopbg' and 'promobottombg', link lists. Have decided to
    # use two reg expressions to make removing this (and variants) robust.
    promo_top_reg_exp = '^.*promotopbg.*$'
    promo_bottom_reg_exp = '^.*promobottombg.*$'
    # Remove 'nlp', provides heading for link lists. Requires an exact match due to
    # risk of matching those letters in something needed, unless I see a variation
    # of 'nlp' used at a later date.
    nlp_reg_exp = '^nlp$'
    # Remove 'mva', provides embedded floating content of various types. Variant 'mvb'
    # has also now been seen. Requires an exact match of 'mva' or 'mvb' due to risk of
    # matching those letters in something needed.
    mva_or_mvb_reg_exp = '^mv[ab]$'
    # Remove 'mvtb', seems to be page navigation tools, like 'blq-mast'.
    mvtb_reg_exp = '^mvtb$'
    # Remove 'blq-toplink', class to provide a link to the top of the page.
    blq_toplink_reg_exp = '^.*blq[_ -]*top[_ -]*link.*$'
    # Remove 'products and services' links, Eg. desktop tools, alerts, and so on.
    # Eg. Class="servicev4 ukfs_services" - what a mess of a name. Have decided to
    # use two reg expressions to make removing this (and variants) robust.
    prods_services_01_reg_exp = '^.*servicev4.*$'
    prods_services_02_reg_exp = '^.*ukfs[_ -]*services.*$'
    # Remove -what I think is- some kind of navigation tools helper class, though I am
    # not sure, it's called: 'blq-rst blq-new-nav'. What I do know is it pops up
    # frequently and it is not wanted. Have decided to use two reg expressions to make
    # removing this (and variants) robust.
    blq_misc_01_reg_exp = '^.*blq[_ -]*rst.*$'
    blq_misc_02_reg_exp = '^.*blq[_ -]*new[_ -]*nav.*$'
    # Remove 'puffbox' - this may only appear inside 'storyextra', so it may not
    # need removing - I have no clue what it does other than it contains links.
    # Whatever it is - it is not part of the article and is not wanted.
    puffbox_reg_exp = '^.*puffbox.*$'
    # Remove 'sibtbg' and 'sibtbgf' - some kind of table formatting classes.
    sibtbg_reg_exp = '^.*sibtbg.*$'
    # Remove 'storyextra' - links to relevant articles and external sites.
    storyextra_reg_exp = '^.*story[_ -]*extra.*$'
    remove_tags = [ dict(name='div',  attrs={'class':re.compile(story_feature_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(share_help_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(embedded_hyper_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(hypertabs_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(video_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(audio_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(picture_gallery_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(slideshow_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(story_actions_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(bookmark_list_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'id':re.compile(secondary_content_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'id':re.compile(featured_content_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'id':re.compile(navigation_reg_exp, re.IGNORECASE)}),
                    dict(name='form', attrs={'id':re.compile(form_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(social_links_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(skip_reg_exp, re.IGNORECASE)}),
                    dict(name='map', attrs={'id':re.compile(map_reg_exp, re.IGNORECASE)}),
                    dict(name='map', attrs={'name':re.compile(map_reg_exp, re.IGNORECASE)}),
                    dict(name='div', attrs={'id':re.compile(social_bookmarks_reg_exp, re.IGNORECASE)}),
                    dict(name='div', attrs={'id':re.compile(blq_mast_reg_exp, re.IGNORECASE)}),
                    dict(name='div', attrs={'class':re.compile(sharesb_reg_exp, re.IGNORECASE)}),
                    dict(name='div', attrs={'class':re.compile(o_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(promo_top_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(promo_bottom_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(nlp_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(mva_or_mvb_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(mvtb_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(blq_toplink_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(prods_services_01_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(prods_services_02_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(blq_misc_01_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(blq_misc_02_reg_exp, re.IGNORECASE)}),
                    dict(name='div',  attrs={'class':re.compile(puffbox_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(sibtbg_reg_exp, re.IGNORECASE)}),
                    dict(attrs={'class':re.compile(storyextra_reg_exp, re.IGNORECASE)})
                  ]
    # Uses url to create and return the 'printer friendly' version of the url.
    # In other words the 'print this page' address of the page.
    #
    # There are 3 types of urls used in the BBC site's rss feeds. There is just
    # 1 type for the standard news while there are 2 used for sports feed urls.
    # Note: Sports urls are linked from regular news feeds (Eg. 'News Home') when
    # there is a major story of interest to 'everyone'. So even if no BBC sports
    # feeds are added to 'feeds' the logic of this method is still needed to avoid
    # blank / missing / empty articles which have an index title and then no body.
    def print_version(self, url):
        # Handle sports page urls type 01:
        if (url.find("go/rss/-/sport1/") != -1):
            temp_url = url.replace("go/rss/-/", "")
        # Handle sports page urls type 02:
        elif (url.find("go/rss/int/news/-/sport1/") != -1):
            temp_url = url.replace("go/rss/int/news/-/", "")
        # Handle regular news page urls:
        else:
            temp_url = url.replace("go/rss/int/news/-/", "")
        # Always add "?print=true" to the end of the url.
        print_url = temp_url + "?print=true"
        return print_url
    # Remove articles in feeds based on a string in the article title or url.
    #
    # Code logic written by: Starson17 - posted in: "Recipes - Re-usable code"
    # thread, in post with title: "Remove articles from feed", see url:
    # http://www.mobileread.com/forums/showpost.php?p=1165462&postcount=6
    # Many thanks and all credit to Starson17.
    #
    # Starson17's code has obviously been altered to suite my requirements.
    def parse_feeds(self):
        # Call parent's method.
        feeds = BasicNewsRecipe.parse_feeds(self)
        # Loop through all feeds.
        for feed in feeds:
            # Loop through all articles in feed.
            for article in feed.articles[:]:
                # Match key words and remove article if there's a match.
                # Most BBC rss feed video only 'articles' use upper case 'VIDEO'
                # as a title prefix. Just match upper case 'VIDEO', so that
                # articles like 'Video game banned' won't be matched and removed.
                if 'VIDEO' in article.title:
                    feed.articles.remove(article)
                # Most BBC rss feed audio only 'articles' use upper case 'AUDIO'
                # as a title prefix. Just match upper case 'AUDIO', so that
                # articles like 'Hi-Def audio...' won't be matched and removed.
                elif 'AUDIO' in article.title:
                    feed.articles.remove(article)
                # Most BBC rss feed photo slideshow 'articles' use 'In Pictures',
                # 'In pictures', and 'in pictures', somewhere in their title.
                # Match any case of that phrase.
                elif 'IN PICTURES' in article.title.upper():
                    feed.articles.remove(article)
                # As above, but user contributed pictures. Match any case.
                elif 'YOUR PICTURES' in article.title.upper():
                    feed.articles.remove(article)
                # 'Sportsday Live' are articles which contain a constantly and
                # dynamically updated 'running commentary' during a live sporting
                # event. Match any case.
                elif 'SPORTSDAY LIVE' in article.title.upper():
                    feed.articles.remove(article)
                # Sometimes 'Sportsday Live' (above) becomes 'Live - Sport Name'.
                # These are being matched below using 'Live - ' because removing all
                # articles with 'live' in their titles would remove some articles
                # that are in fact not live sports pages. Match any case.
                elif 'LIVE - ' in article.title.upper():
                    feed.articles.remove(article)
                # 'Quiz of the week' is a Flash player weekly news quiz. Match only
                # the 'Quiz of the' part in anticipation of monthly and yearly
                # variants. Match any case.
                elif 'QUIZ OF THE' in article.title.upper():
                    feed.articles.remove(article)
                # Remove articles with 'scorecards' in the url. These are BBC sports
                # pages which just display a cricket scorecard. The pages have a mass
                # of table and css entries to display the scorecards nicely. Probably
                # could make them work with this recipe, but might take a whole day
                # of work to sort out all the css - basically a formatting nightmare.
                elif 'scorecards' in article.url:
                    feed.articles.remove(article)
        return feeds
 # End of class and file.
--- a/recipes/berlingske_dk.recipe
+++ b/recipes/berlingske_dk.recipe
@ -1,4 +1,3 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
@ -18,11 +17,17 @@ class Berlingske_dk(BasicNewsRecipe):
    no_stylesheets        = True
    remove_empty_feeds    = True
    use_embedded_content  = False
    remove_javascript     = True
    publication_type      = 'newspaper'
    encoding              = 'utf8'
    language              = 'da'
-    masthead_url          = 'http://www.berlingske.dk/sites/all/themes/bm/img/layout/masthead_bg.gif'
+    auto_cleanup          = True
-    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } h1,.manchet,.byline{font-family: Cambria,Georgia,Times,"Times New Roman",serif } '
+    extra_css             = '''
                            .manchet {color:#888888;}
                            .dateline {font-size: x-small; color:#444444;}
                            .manchet,.dateline { font-family: Cambria,Georgia,Times,"Times New Roman",serif }
                            .body {font-family: Arial,Helvetica,sans-serif }
                            '''
    conversion_options = {
                          'comment'  : description
@ -32,18 +37,14 @@ class Berlingske_dk(BasicNewsRecipe):
                        }
    feeds              = [
-                            (u'Breaking news' , u'http://www.berlingske.dk/breaking/rss'          )
+                            (u'Breaking news' , u'http://www.b.dk/breaking/rss'          )
-                           ,(u'Seneste nyt'   , u'http://www.berlingske.dk/seneste/rss'           )
+                           ,(u'Seneste nyt'   , u'http://www.b.dk/seneste/rss'           )
-                           ,(u'Topnyheder'    , u'http://www.berlingske.dk/top/rss'               )
+                           ,(u'Topnyheder'    , u'http://www.b.dk/top/rss'               )
-                           ,(u'Danmark'       , u'http://www.berlingske.dk/danmark/seneste/rss'   )
+                           ,(u'Danmark'       , u'http://www.b.dk/danmark/seneste/rss'   )
-                           ,(u'Verden'        , u'http://www.berlingske.dk/verden/seneste/rss'    )
+                           ,(u'Verden'        , u'http://www.b.dk/verden/seneste/rss'    )
-                           ,(u'Klima'         , u'http://www.berlingske.dk/klima/seneste/rss'     )
+                           ,(u'Klima'         , u'http://www.b.dk/klima/seneste/rss'     )
-                           ,(u'Debat'         , u'http://www.berlingske.dk/debat/seneste/rss'     )
+                           ,(u'Debat'         , u'http://www.b.dk/debat/seneste/rss'     )
-                           ,(u'Koebenhavn'    , u'http://www.berlingske.dk/koebenhavn/seneste/rss')
+                           ,(u'Koebenhavn'    , u'http://www.b.dk/koebenhavn/seneste/rss')
-                           ,(u'Politik'       , u'http://www.berlingske.dk/politik/seneste/rss'   )
+                           ,(u'Politik'       , u'http://www.b.dk/politik/seneste/rss'   )
-                           ,(u'Kultur'        , u'http://www.berlingske.dk/kultur/seneste/rss'    )
+                           ,(u'Kultur'        , u'http://www.b.dk/kultur/seneste/rss'    )
                          ]
    keep_only_tags     = [dict(attrs={'class':['first','pt-article']})]
    remove_tags        = [dict(name=['object','link','base','iframe','embed'])]
--- a/recipes/biamag.recipe
+++ b/recipes/biamag.recipe
@ -0,0 +1,38 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 bianet.com.tr
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Radikal_tr(BasicNewsRecipe):
    title                 = 'BiaMag'
    __author__            = 'Osman Kaysan'
    description           = 'Independent News from Turkey'
    publisher             = 'BiaMag'
    category              = 'news, politics, Turkey'
    oldest_article        = 15
    max_articles_per_feed = 120
    masthead_url          = 'http://bianet.org/images/biamag_logo.gif'
    language              = 'tr'
    no_stylesheets        = True
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                ,'remove_paragraph_spacing': True,
                          }
    remove_tags_before  = dict(name='div', attrs={'class':'manset'})
    remove_tags = [ dict(name='ul', attrs={'class':['altul']}), dict(name='div', attrs={'id':['habermenu']}), dict(name='div', attrs={'class':['mail']}), dict(name='div', attrs={'class':['from']})]
    remove_tags_after   = dict(name='div', attrs={'id':'habermenu'})
    feeds = [(u'BiaMag', u'http://www.bianet.org/biamag.rss')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/biamag_en.recipe
+++ b/recipes/biamag_en.recipe
@ -0,0 +1,38 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 bianet.com.tr
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Radikal_tr(BasicNewsRecipe):
    title                 = 'Bianet-English'
    __author__            = 'Osman Kaysan'
    description           = 'Independent News Network from Turkey(English)'
    publisher             = 'Bianet'
    category              = 'news, politics, Turkey'
    oldest_article        = 7
    max_articles_per_feed = 150
    masthead_url          = 'http://bianet.org/images/english_logo.gif'
    language              = 'en_TR'
    no_stylesheets        = True
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                ,'remove_paragraph_spacing': True,
                          }
    remove_tags_before  = dict(name='div', attrs={'class':'manset'})
    remove_tags = [ dict(name='ul', attrs={'class':['altul']}), dict(name='div', attrs={'id':['habermenu']}), dict(name='div', attrs={'class':['mail']}), dict(name='div', attrs={'class':['from']})]
    remove_tags_after   = dict(name='div', attrs={'id':'habermenu'})
    feeds = [(u'Bianet-English', u'http://www.bianet.org/english.rss')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/bianet.recipe
+++ b/recipes/bianet.recipe
@ -0,0 +1,38 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 bianet.com.tr
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Radikal_tr(BasicNewsRecipe):
    title                 = 'Bianet'
    __author__            = 'Osman Kaysan'
    description           = 'Independent News from Turkey'
    publisher             = 'Bianet'
    category              = 'news, politics, Turkey'
    oldest_article        = 7
    max_articles_per_feed = 120
    masthead_url          = 'http://bianet.org/images/bianet_logo.gif'
    language              = 'tr'
    no_stylesheets        = True
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                ,'remove_paragraph_spacing': True,
                          }
    remove_tags_before  = dict(name='div', attrs={'class':'manset'})
    remove_tags = [ dict(name='ul', attrs={'class':['altul']}), dict(name='div', attrs={'id':['habermenu']}), dict(name='div', attrs={'class':['mail']}), dict(name='div', attrs={'class':['from']})]
    remove_tags_after   = dict(name='div', attrs={'id':'habermenu'})
    feeds = [(u'Bianet', u'http://bianet.org/bianet.rss')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/birgun_gazetesi.recipe
+++ b/recipes/birgun_gazetesi.recipe
@ -0,0 +1,50 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class Birgun (BasicNewsRecipe):
    title                  = u'Birgün Gazetesi'
    __author__             = u'Osman Kaysan'
    oldest_article         = 7
    max_articles_per_feed  =150
    use_embedded_content  = False
    description           = 'Birgun gazatesi haberleri, kose yazarlari'
    publisher              = 'Birgün'
    category               = 'news,haberler,turkce,gazete,birgun'
    language               = 'tr'
    no_stylesheets        = True
    publication_type = 'newspaper'
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                ,'remove_paragraph_spacing': True,
                          }
    cover_img_url = 'http://www.birgun.net/i/birgun.png'
    masthead_url = 'http://www.birgun.net/i/birgun.png'
    remove_attributes = ['width','height']
    remove_tags_before  = dict(name='h2', attrs={'class':'storyHeadline'})
    #remove_tags_after   = dict(name='div', attrs={'class':'toollinks'})
    remove_tags_after   = dict(name='tr', attrs={'valign':'top'})
    remove_tags   = [ dict(name='div', attrs={'id':'byLine'}), dict(name='div', attrs={'class':'toollinks'})
 , dict(name='div', attrs={'class':'main-lead'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})
 , dict(name='a', attrs={'class':'addthis_button'})]
    remove_empty_feeds= True
    feeds          = [
                      ( u'Güncel', u'http://www.birgun.net/actuels.xml')
         ,( u'Köşe Yazarları', u'http://www.birgun.net/writer.xml')
         ,( u'Politika', u'http://www.birgun.net/politics.xml')
         ,( u'Ekonomi', u'http://www.birgun.net/economic.xml')
         ,( u'Çalışma Yaşamı', u'http://www.birgun.net/workers.xml')
         ,( u'Dünya', u'http://www.birgun.net/worlds.xml')
         ,( u'Yaşam', u'http://www.birgun.net/lifes.xml')
                     ]
--- a/recipes/brand_eins.recipe
+++ b/recipes/brand_eins.recipe
@ -110,8 +110,10 @@ class BrandEins(BasicNewsRecipe):
    selected_issue = issue_map[selected_issue_key]
    url = selected_issue.get('href', False)
    # Get the title for the magazin - build it out of the title of the cover - take the issue and year;
-    self.title = "brand eins " + selected_issue_key[4:] + "/" + selected_issue_key[0:4]
+    # self.title = "brand eins " + selected_issue_key[4:] + "/" + selected_issue_key[0:4]
    # Get the alternative title for the magazin - build it out of the title of the cover - without the issue and year;
    url = 'http://brandeins.de/'+url
    self.timefmt = ' ' + selected_issue_key[4:] + '/' + selected_issue_key[:4]
    # url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
    titles_and_articles = self.brand_eins_parse_issue(url)
@ -163,4 +165,3 @@ class BrandEins(BasicNewsRecipe):
          current_articles.append({'title': title, 'url': url, 'description': description, 'date':''})
    titles_and_articles.append([chapter_title, current_articles])
    return titles_and_articles
--- a/recipes/buffalo_news.recipe
+++ b/recipes/buffalo_news.recipe
@ -10,49 +10,39 @@ http://www.buffalonews.com/RSS/
 from calibre.web.feeds.news import BasicNewsRecipe
-class AdvancedUserRecipe1298680852(BasicNewsRecipe):
+class BuffaloNews(BasicNewsRecipe):
    title          = u'Buffalo News'
    oldest_article = 2
    language = 'en'
-    __author__ = 'ChappyOnIce'
+    __author__ = 'ChappyOnIce, Krittika Goyal'
    max_articles_per_feed = 20
    encoding = 'utf-8'
    masthead_url = 'http://www.buffalonews.com/buffalonews/skins/buffalonews/images/masthead/the_buffalo_news_logo.png'
-    remove_javascript = True
+    auto_cleanup = True
-    extra_css = 'body {text-align: justify;}\n  \
+    remove_empty_feeds = True
       p {text-indent: 20px;}'
-    keep_only_tags    = [
+    feeds          = [
-                       dict(name='div', attrs={'class':['main-content-left']})
+            (u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'),
-                        ]
+            (u'Southern Erie County', u'http://www.buffalonews.com/city/communities/southern-erie/?widget=rssfeed&view=feed&contentId=77944'),
-
+            (u'Eastern Erie County', u'http://www.buffalonews.com/city/communities/eastern-erie/?widget=rssfeed&view=feed&contentId=77944'),
-    remove_tags = [
+            (u'Southern Tier', u'http://www.buffalonews.com/city/communities/southern-tier/?widget=rssfeed&view=feed&contentId=77944'),
-                       dict(name='div', attrs={'id':['commentCount']}),
+            (u'Niagara County', u'http://www.buffalonews.com/city/communities/niagara-county/?widget=rssfeed&view=feed&contentId=77944'),
-       dict(name='div', attrs={'class':['story-list-links']})
+            (u'Business', u'http://www.buffalonews.com/business/?widget=rssfeed&view=feed&contentId=77944'),
-                        ]
+            (u'MoneySmart', u'http://www.buffalonews.com/business/moneysmart/?widget=rssfeed&view=feed&contentId=77944'),
-
+            (u'Bills & NFL', u'http://www.buffalonews.com/sports/bills-nfl/?widget=rssfeed&view=feed&contentId=77944'),
-    remove_tags_after  = dict(name='div', attrs={'class':['body storyContent']})
+            (u'Sabres & NHL', u'http://www.buffalonews.com/sports/sabres-nhl/?widget=rssfeed&view=feed&contentId=77944'),
-
+            (u'Bob DiCesare', u'http://www.buffalonews.com/sports/columns/bob-dicesare/?widget=rssfeed&view=feed&contentId=77944'),
-    feeds          = [(u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Bucky Gleason', u'http://www.buffalonews.com/sports/columns/bucky-gleason/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Southern Erie County', u'http://www.buffalonews.com/city/communities/southern-erie/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Mark Gaughan', u'http://www.buffalonews.com/sports/bills-nfl/inside-the-nfl/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Eastern Erie County', u'http://www.buffalonews.com/city/communities/eastern-erie/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Mike Harrington', u'http://www.buffalonews.com/sports/columns/mike-harrington/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Southern Tier', u'http://www.buffalonews.com/city/communities/southern-tier/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Jerry Sullivan', u'http://www.buffalonews.com/sports/columns/jerry-sullivan/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Niagara County', u'http://www.buffalonews.com/city/communities/niagara-county/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Other Sports Columns', u'http://www.buffalonews.com/sports/columns/other-sports-columns/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Business', u'http://www.buffalonews.com/business/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Life', u'http://www.buffalonews.com/life/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'MoneySmart', u'http://www.buffalonews.com/business/moneysmart/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Bruce Andriatch', u'http://www.buffalonews.com/city/columns/bruce-andriatch/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Bills & NFL', u'http://www.buffalonews.com/sports/bills-nfl/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Donn Esmonde', u'http://www.buffalonews.com/city/columns/donn-esmonde/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Sabres & NHL', u'http://www.buffalonews.com/sports/sabres-nhl/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Rod Watson', u'http://www.buffalonews.com/city/columns/rod-watson/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Bob DiCesare', u'http://www.buffalonews.com/sports/columns/bob-dicesare/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Entertainment', u'http://www.buffalonews.com/entertainment/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Bucky Gleason', u'http://www.buffalonews.com/sports/columns/bucky-gleason/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Off Main Street', u'http://www.buffalonews.com/city/columns/off-main-street/?widget=rssfeed&view=feed&contentId=77944'),
-         (u'Mark Gaughan', u'http://www.buffalonews.com/sports/bills-nfl/inside-the-nfl/?widget=rssfeed&view=feed&contentId=77944'),
+            (u'Editorials', u'http://www.buffalonews.com/editorial-page/buffalo-news-editorials/?widget=rssfeed&view=feed&contentId=77944')
         (u'Mike Harrington', u'http://www.buffalonews.com/sports/columns/mike-harrington/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Jerry Sullivan', u'http://www.buffalonews.com/sports/columns/jerry-sullivan/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Other Sports Columns', u'http://www.buffalonews.com/sports/columns/other-sports-columns/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Life', u'http://www.buffalonews.com/life/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Bruce Andriatch', u'http://www.buffalonews.com/city/columns/bruce-andriatch/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Donn Esmonde', u'http://www.buffalonews.com/city/columns/donn-esmonde/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Rod Watson', u'http://www.buffalonews.com/city/columns/rod-watson/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Entertainment', u'http://www.buffalonews.com/entertainment/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Off Main Street', u'http://www.buffalonews.com/city/columns/off-main-street/?widget=rssfeed&view=feed&contentId=77944'),
         (u'Editorials', u'http://www.buffalonews.com/editorial-page/buffalo-news-editorials/?widget=rssfeed&view=feed&contentId=77944')
         ]
--- a/recipes/calibre_blog.recipe
+++ b/recipes/calibre_blog.recipe
@ -0,0 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class CalibreBlog(BasicNewsRecipe):
    title          = u'Calibre Blog'
    language       = 'en'
    __author__     = 'Krittika Goyal'
    oldest_article = 1000 #days
    max_articles_per_feed = 5
    use_embedded_content = False
    no_stylesheets = True
    auto_cleanup = True
    feeds          = [
 ('Article',
 'http://blog.calibre-ebook.com/feeds/posts/default'),
 ]
--- a/recipes/capital_gr.recipe
+++ b/recipes/capital_gr.recipe
@ -0,0 +1,35 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Capital(BasicNewsRecipe):
    title                  = 'Capital.gr'
    __author__             ='Stelios'
    description            = 'Financial News from Greece'
    #max_articles_per_feed  = 100
    oldest_article         = 3
    publisher              = 'Capital.gr'
    category               = 'news, GR'
    language               = 'el'
    encoding               = 'windows-1253'
    cover_url             = 'http://files.capital.gr/images/caplogo.gif'
    no_stylesheets         = True
    use_embedded_content  = False
    remove_empty_feeds    = True
    keep_only_tags = [
  dict(name='h1'),
  dict(name='p'),
  dict(name='span', attrs={'id' : ["textbody"]})
                        ]
 #3 posts seemed to have utf8 encoding
    feeds          = [
                     (u'\u039F\u039B\u0395\u03A3 \u039F\u0399 \u0395\u0399\u0394\u0397\u03A3\u0395\u0399\u03A3', 'http://www.capital.gr/news/newsrss.asp?s=-1'),
 	(u'\u0395\u03A0\u0399\u03A7\u0395\u0399\u03A1\u0397\u03A3\u0395\u0399\u03A3', 'http://www.capital.gr/news/newsrss.asp?s=-2'),
 	(u'\u0391\u0393\u039F\u03A1\u0395\u03A3', 'http://www.capital.gr/news/newsrss.asp?s=-3'),
 	(u'\u039F\u0399\u039A\u039F\u039D\u039F\u039C\u0399\u0391', 'http://www.capital.gr/news/newsrss.asp?s=-4'),
 	(u'\u03A7\u03A1\u0397\u039C. \u0391\u039D\u0391\u039A\u039F\u0399\u039D\u03A9\u03A3\u0395\u0399\u03A3', 'http://www.capital.gr/news/newsrss.asp?s=-6'),
 	(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u039C\u0395 \u0391\u03A0\u039F\u03A8\u0397', 'http://www.capital.gr/articles/articlesrss.asp?catid=4'),
 	(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A3\u0399\u03A9\u03A0\u0397\u03A4\u0397\u03A1\u0399\u039F', 'http://www.capital.gr/articles/articlesrss.asp?catid=6'),
 	(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A0\u0399\u03A3\u03A9 \u0391\u03A0\u039F \u03A4\u0399\u03A3 \u0393\u03A1\u0391\u039C\u039C\u0395\u03A3', 'http://www.capital.gr/articles/articlesrss.asp?catid=8'),
 	#(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A4\u0395\u03A7\u039D\u039F\u039B\u039F\u0393\u0399\u0391', 'http://www.capital.gr/news/newsrss.asp?s=-8') not working for now
 ]
--- a/recipes/catavencii.recipe
+++ b/recipes/catavencii.recipe
@ -0,0 +1,51 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 catavencii.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Catavencii(BasicNewsRecipe):
    title                 = u'Ca\u0163avencii'
    __author__            = u'Silviu Cotoar\u0103'
    publisher             = u'Ca\u0163avencii'
    description           = u'Ca\u0163avencii'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Romania'
    encoding              = 'utf-8'
    cover_url        	  = 'http://www.simonatache.ro/wp-content/uploads/2011/06/catavencii-logo.png'
    conversion_options = {
                'comments'    : description
                ,'tags'       : category
                ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
                      dict(name='div', attrs={'id':'content'})
                     ]
    remove_tags = [
             dict(name='div', attrs={'id':'breadcrumbs'})
           , dict(name='span', attrs={'class':'info'})
 		   , dict(name='div', attrs={'id':'social-media-article'})
                  ]
    remove_tags_after = [
 			         dict(name='div', attrs={'id':'social-media-article'})
 	               ]
    feeds  = [
        (u'\u0218tiri', u'http://www.catavencii.ro/rss')
         ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/catavencu.recipe
+++ b/recipes/catavencu.recipe
@ -4,16 +4,16 @@
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
-catavencu.ro
+academiacatavencu.info
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
-class Catavencu(BasicNewsRecipe):
+class AcademiaCatavencu(BasicNewsRecipe):
    title                 = u'Academia Ca\u0163avencu'
    __author__            = u'Silviu Cotoar\u0103'
    description           = 'Tagma cum laude'
-    publisher             = 'Catavencu'
+    publisher             = u'Ca\u0163avencu'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
@ -21,32 +21,31 @@ class Catavencu(BasicNewsRecipe):
    use_embedded_content  = False
    category              = 'Ziare'
    encoding              = 'utf-8'
-    cover_url         = 'http://upload.wikimedia.org/wikipedia/en/1/1e/Academia_Catavencu.jpg'
+    cover_url         = 'http://www.academiacatavencu.info/images/logo.png'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
-                ,'publisher'  : publisher
+							,'publisher'  : publisher
                         }
    keep_only_tags = [
-            dict(name='ul', attrs={'class':'articles'})
+            dict(name='h1', attrs={'class':'art_title'}),
 			dict(name='div', attrs={'class':'art_text'})
                     ]
    remove_tags = [
-             dict(name='div', attrs={'class':['tools']})
+             dict(name='div', attrs={'class':['desp_m']})
-           , dict(name='div', attrs={'class':['share']})
+           , dict(name='div', attrs={'id':['tags']})          
           , dict(name='div', attrs={'class':['category']})
           , dict(name='div', attrs={'id':['comments']})
                  ]
    remove_tags_after = [
-              dict(name='div', attrs={'id':'comments'})
+              dict(name='div', attrs={'class':['desp_m']})
            ]
    feeds          = [
-            (u'Feeds', u'http://catavencu.ro/feed/rss')
+            (u'Feeds', u'http://www.academiacatavencu.info/rss.xml')
                 ]
    def preprocess_html(self, soup):
--- a/recipes/cgm_pl.recipe
+++ b/recipes/cgm_pl.recipe
@ -27,7 +27,7 @@ class CGM(BasicNewsRecipe):
            del item['style']
        ad=soup.findAll('a')
        for r in ad:
-            if 'http://www.hustla.pl' in r['href']:                
+            if 'http://www.hustla.pl' in r['href'] or 'http://www.ebilet.pl' in r['href']:                
                 r.extract()
        gallery=soup.find('div', attrs={'class':'galleryFlash'})
        if gallery:
--- a/recipes/cnetnews.recipe
+++ b/recipes/cnetnews.recipe
@ -5,8 +5,8 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 Changelog:
 2011-09-24
 Changed cover (drMerry)
-'''
+2011-10-13
-'''
+Updated Cover (drMerry)
 news.cnet.com
 '''
@ -24,7 +24,7 @@ class CnetNews(BasicNewsRecipe):
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'en'
-
+    cover_url = 'http://reviews.cnet.com/i/ff/wp/logo_cnet.gif'
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
--- a/recipes/cnn.recipe
+++ b/recipes/cnn.recipe
@ -22,6 +22,14 @@ class CNN(BasicNewsRecipe):
    #match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html']
    max_articles_per_feed = 25
    extra_css = '''
                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    .cnn_story_author, .cnn_stryathrtmp {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .cnn_strycaptiontxt, .cnnArticleGalleryPhotoContainer {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .cnn_strycbftrtxt, .cnnEditorialNote {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .cnn_strycntntlft {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    preprocess_regexps = [
        (re.compile(r'<!--\[if.*if\]-->', re.DOTALL), lambda m: ''),
        (re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
@ -32,7 +40,12 @@ class CNN(BasicNewsRecipe):
    remove_tags = [
            {'class':['cnn_strybtntools', 'cnn_strylftcntnt',
                'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
-                'cnn_strycntntrgt', 'hed_side', 'foot']},
+                'cnn_strycntntrgt', 'hed_side', 'foot', 'cnn_strylftcntnt cnn_strylftcexpbx']},
            {'class':['cnn_html_media_title_new', 'cnn_html_media_title_new cnn_html_media_title_none',
                'cnnArticleGalleryCaptionControlText', 'articleGalleryNavContainer']},
            {'id':['articleGalleryNav00JumpPrev', 'articleGalleryNav00Prev',
                'articleGalleryNav00Next', 'articleGalleryNav00JumpNext']},
            {'style':['display:none']},
            dict(id=['ie_column']),
    ]
@ -58,3 +71,12 @@ class CNN(BasicNewsRecipe):
        ans = BasicNewsRecipe.get_article_url(self, article)
        return ans.partition('?')[0]
    def get_masthead_url(self):
        masthead = 'http://i.cdn.turner.com/cnn/.element/img/3.0/global/header/intl/hdr-globe-central.gif'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
--- a/recipes/columbusdispatch.recipe
+++ b/recipes/columbusdispatch.recipe
@ -14,67 +14,43 @@ class ColumbusDispatchRecipe(BasicNewsRecipe):
    use_embedded_content = False
    remove_empty_feeds = True
    oldest_article = 1.2
-    max_articles_per_feed = 100
+    use_embedded_content = False
    no_stylesheets = True
-    remove_javascript = True
+    auto_cleanup = True
-    encoding = 'utf-8'
+    #auto_cleanup_keep = '//div[@id="story-photos"]'
    # Seems to work best, but YMMV
    simultaneous_downloads = 2
    # Feeds from http://www.dispatch.com/live/content/rss/index.html
-    feeds = []
+    feeds          = [
-    feeds.append((u'News: Local and state news', u'http://www.dispatch.com/live/static/crt/2_rss_localnews.xml'))
+('Local', 
-    feeds.append((u'News: National news', u'http://www.dispatch.com/live/static/crt/2_rss_nationalnews.xml'))
+ 'http://www.dispatch.com/content/syndication/news_local-state.xml'),
-    feeds.append((u'News: Editorials', u'http://www.dispatch.com/live/static/crt/2_rss_editorials.xml'))
+('National', 
-    feeds.append((u'News: Columnists', u'http://www.dispatch.com/live/static/crt/2_rss_columnists.xml'))
+ 'http://www.dispatch.com/content/syndication/news_national.xml'),
-    feeds.append((u'News: Health news', u'http://www.dispatch.com/live/static/crt/2_rss_health.xml'))
+('Business', 
-    feeds.append((u'News: Science news', u'http://www.dispatch.com/live/static/crt/2_rss_science.xml'))
+ 'http://www.dispatch.com/content/syndication/news_business.xml'),
-    feeds.append((u'Sports: OSU football', u'http://www.dispatch.com/live/static/crt/2_rss_osufootball.xml'))
+('Editorials', 
-    feeds.append((u'Sports: OSU men\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osumensbball.xml'))
+ 'http://www.dispatch.com/content/syndication/opinion_editorials.xml'),
-    feeds.append((u'Sports: OSU women\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osuwomensbball.xml'))
+('Columnists', 
-    feeds.append((u'Sports: OSU sports', u'http://www.dispatch.com/live/static/crt/2_rss_osusports.xml'))
+ 'http://www.dispatch.com/content/syndication/opinion_columns.xml'),
-    feeds.append((u'Sports: Blue Jackets', u'http://www.dispatch.com/live/static/crt/2_rss_bluejackets.xml'))
+('Life and Arts', 
-    feeds.append((u'Sports: Crew', u'http://www.dispatch.com/live/static/crt/2_rss_crew.xml'))
+ 'http://www.dispatch.com/content/syndication/lae_life-and-arts.xml'),
-    feeds.append((u'Sports: Clippers', u'http://www.dispatch.com/live/static/crt/2_rss_clippers.xml'))
+ ('OSU Sports', 
-    feeds.append((u'Sports: Indians', u'http://www.dispatch.com/live/static/crt/2_rss_indians.xml'))
+ 'http://www.dispatch.com/content/syndication/sports_osu.xml'),
-    feeds.append((u'Sports: Reds', u'http://www.dispatch.com/live/static/crt/2_rss_reds.xml'))
+ ('Auto Racing', 
-    feeds.append((u'Sports: Golf', u'http://www.dispatch.com/live/static/crt/2_rss_golf.xml'))
+ 'http://www.dispatch.com/content/syndication/sports_auto-racing.xml'),
-    feeds.append((u'Sports: Outdoors', u'http://www.dispatch.com/live/static/crt/2_rss_outdoors.xml'))
+ ('Outdoors', 
-    feeds.append((u'Sports: Cavs/NBA', u'http://www.dispatch.com/live/static/crt/2_rss_cavaliers.xml'))
+ 'http://www.dispatch.com/content/syndication/sports_outdoors.xml'),
-    feeds.append((u'Sports: High Schools', u'http://www.dispatch.com/live/static/crt/2_rss_highschools.xml'))
+ ('Bengals', 
-    feeds.append((u'Sports: Browns', u'http://www.dispatch.com/live/static/crt/2_rss_browns.xml'))
+ 'http://www.dispatch.com/content/syndication/sports_bengals.xml'),
-    feeds.append((u'Sports: Bengals', u'http://www.dispatch.com/live/static/crt/2_rss_bengals.xml'))
+  ('Indians', 
-    feeds.append((u'Sports: Auto Racing', u'http://www.dispatch.com/live/static/crt/2_rss_autoracing.xml'))
+ 'http://www.dispatch.com/content/syndication/sports_indians.xml'),
-    feeds.append((u'Business News', u'http://www.dispatch.com/live/static/crt/2_rss_business.xml'))
+ ('Clippers', 
-    feeds.append((u'Features: Weekender', u'http://www.dispatch.com/live/static/crt/2_rss_weekender.xml'))
+ 'http://www.dispatch.com/content/syndication/sports_clippers.xml'),
-    feeds.append((u'Features: Life and Arts', u'http://www.dispatch.com/live/static/crt/2_rss_lifearts.xml'))
+ ('Crew', 
-    feeds.append((u'Features: Food', u'http://www.dispatch.com/live/static/crt/2_rss_food.xml'))
+ 'http://www.dispatch.com/content/syndication/sports_crew.xml'),
-    feeds.append((u'Features: NOW! for kids', u'http://www.dispatch.com/live/static/crt/2_rss_now.xml'))
+ ('Reds', 
-    feeds.append((u'Features: Travel', u'http://www.dispatch.com/live/static/crt/2_rss_travel.xml'))
+ 'http://www.dispatch.com/content/syndication/sports_reds.xml'),
-    feeds.append((u'Features: Home and Garden', u'http://www.dispatch.com/live/static/crt/2_rss_homegarden.xml'))
+ ('Blue Jackets', 
-    feeds.append((u'Features: Faith and Values', u'http://www.dispatch.com/live/static/crt/2_rss_faithvalues.xml'))
+ 'http://www.dispatch.com/content/syndication/sports_bluejackets.xml'),
-    #feeds.append((u'', u''))
+]
    keep_only_tags = []
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'colhed'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'hed'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subhed'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'date'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'byline'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'srcline'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'body'}))
    remove_tags = []
    remove_tags.append(dict(name = 'div', attrs = {'id': 'middle-story-ad-container'}))
    extra_css = '''
                body {font-family:verdana,arial,helvetica,geneva,sans-serif ;}
                a {text-decoration: none; color: blue;}
                div.colhed {font-weight: bold;}
                div.hed {font-size: xx-large; font-weight: bold; margin-bottom: 0.2em;}
                div.subhed {font-size: large;}
                div.date {font-size: x-small; font-style: italic; color: #666666; margin-top: 0.4em; margin-bottom: 0.4em;}
                div.byline, div.srcline {font-size: small; color: #696969;}
                '''
--- a/recipes/cosmopolitan_uk.recipe
+++ b/recipes/cosmopolitan_uk.recipe
@ -0,0 +1,51 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 #from calibre import __appname__
 from calibre.utils.magick import Image
 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Cosmopolitan UK'
    description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
    __author__ = 'Dave Asbury'
    # greyscale code by Starson
    cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 20
    remove_empty_feeds = True
    remove_javascript     = True
    preprocess_regexps = [
    (re.compile(r'<!-- Begin tmpl module_competition_offer -->.*?<!-- End tmpl module_competition_offer-->', re.IGNORECASE | re.DOTALL), lambda match: '')]
    language = 'en_GB'
    masthead_url        = 'http://www.cosmopolitan.co.uk/cm/cosmopolitanuk/site_images/header/cosmouk_logo_home.gif'
    keep_only_tags = [
                              dict(attrs={'class' : ['dateAuthor', 'publishDate']}),
                              dict(name='div',attrs ={'id' : ['main_content']})
                              ]
    remove_tags    = [
                              dict(name='div',attrs={'class' : ['blogInfo','viral_toolbar','comment_number','prevEntry nav']}),
                              dict(name='div',attrs={'class' : 'blog_module_about_the_authors'}),
                              dict(attrs={'id': ['breadcrumbs','comment','related_links_list','right_rail','content_sec_fb_more','content_sec_mostpopularstories','content-sec_fb_frame_viewfb_bot']}),
                              dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']})
                            ]
    feeds          = [
        (u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')]
    def postprocess_html(self, soup, first):
        #process all the images
        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
            iurl = tag['src']
            img = Image()
            img.open(iurl)
            if img < 0:
                raise RuntimeError('Out of memory')
            img.type = "GrayscaleType"
            img.save(iurl)
        return soup
--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@ -1,10 +1,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
+import re
 class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    title          = u'The Daily Mirror'
    description = 'News as provide by The Daily Mirror -UK'
    __author__ = 'Dave Asbury'
    # last updated 30/10/11
    language = 'en_GB'
    cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
@ -12,26 +13,30 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
-    oldest_article = 1
+    oldest_article = 2
-    max_articles_per_feed = 100
+    max_articles_per_feed = 30
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
    extra_css  = '''
 	body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
                	 '''
    keep_only_tags = [
-    dict(name='h1'),
+       dict(name='div',attrs={'id' : 'body-content'})
-    dict(attrs={'class':['article-attr']}),
+        ]
    dict(name='div', attrs={'class' : [ 'article-body', 'crosshead']})
-
+    remove_tags_after = [dict (name='div',attrs={'class' : 'related'})]
    ]
    remove_tags = [
-           dict(name='div', attrs={'class' : ['caption', 'article-resize']}),
+           dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}),
-           dict( attrs={'class':'append-html'})
+           dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}),
-           ]
+           dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}),
-
+           dict(name='div',attrs={'class' : 'span-12 last sl-others addthis_toolbox addthis_default_style'})
          ]
    preprocess_regexps = [
    (re.compile(r'<dl class="q-search">.*?</dl>', re.IGNORECASE | re.DOTALL), lambda match: '')]
    feeds          = [
@ -43,10 +48,10 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
        ,(u'Music News','http://www.mirror.co.uk/celebs/music/rss.xml')
        ,(u'Celebs and Tv Gossip','http://www.mirror.co.uk/celebs/tv/rss.xml')
        ,(u'Sport','http://www.mirror.co.uk/sport/rss.xml')
-         ,(u'Life Style','http://www.mirror.co.uk/life-style/rss.xml')
+        ,(u'Life Style','http://www.mirror.co.uk/life-style/rss.xml')
-         ,(u'Advice','http://www.mirror.co.uk/advice/rss.xml')
+        ,(u'Advice','http://www.mirror.co.uk/advice/rss.xml')
-             ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
+        ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
           # example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
  ]
  ]
--- a/recipes/daily_writing_tips.recipe
+++ b/recipes/daily_writing_tips.recipe
@ -0,0 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class DailyWritingTips(BasicNewsRecipe):
    title          = u'Daily Writing Tips'
    language       = 'en_GB'
    __author__ = 'NotTaken'
    oldest_article = 7 #days
    max_articles_per_feed = 40
    use_embedded_content = True
    no_stylesheets = True
    auto_cleanup = False
    encoding = 'utf-8'
    feeds          = [
 ('Latest tips',
 'http://feeds2.feedburner.com/DailyWritingTips'),
 ]
--- a/recipes/dallas.recipe
+++ b/recipes/dallas.recipe
@ -8,11 +8,7 @@ class DallasNews(BasicNewsRecipe):
    no_stylesheets = True
    use_embedded_content = False
-    remove_tags_before = dict(name='h1')
+    auto_cleanup = True
    keep_only_tags = {'class':lambda x: x and 'article' in x}
    remove_tags = [
            {'class':['DMNSocialTools', 'article ', 'article first ', 'article premium']},
    ]
    feeds          = [
                      ('Local News',
--- a/recipes/defensenews.recipe
+++ b/recipes/defensenews.recipe
@ -0,0 +1,62 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.defensenews.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class DefenseNews(BasicNewsRecipe):
    title                 = 'Defense News'
    __author__            = 'Darko Miletic'
    description           = 'Find late-breaking defense news from the leading defense news weekly'
    publisher             = 'Gannett Government Media Corporation'
    category              = 'defense news, defence news, defense, defence, defence budget, defence policy'
    oldest_article        = 31
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'en'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.defensenews.com/images/logo_defensenews2.jpg'
    extra_css             = """
                               body{font-family: Arial,Helvetica,sans-serif }
                               img{margin-bottom: 0.4em; display:block}
                               .info{font-size: small; color: gray}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags = [
                     dict(name=['meta','link'])
                    ,dict(attrs={'class':['toolbar','related','left','right']})
                  ]
    remove_tags_before = attrs={'class':'storyWrp'}
    remove_tags_after = attrs={'class':'middle'}
    remove_attributes=['lang']
    feeds = [
              (u'Europe'  , u'http://www.defensenews.com/rss/eur/'            )
             ,(u'Americas', u'http://www.defensenews.com/rss/ame/'            )
             ,(u'Asia & Pacific rim', u'http://www.defensenews.com/rss/asi/'  )
             ,(u'Middle east & Africa', u'http://www.defensenews.com/rss/mid/')
             ,(u'Air', u'http://www.defensenews.com/rss/air/'                 )
             ,(u'Land', u'http://www.defensenews.com/rss/lan/'                )
             ,(u'Naval', u'http://www.defensenews.com/rss/sea/'               )
            ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        return soup
--- a/recipes/deutsche_welle_es.recipe
+++ b/recipes/deutsche_welle_es.recipe
@ -16,7 +16,7 @@ class DeutscheWelle_es(BasicNewsRecipe):
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets        = True
-    language              = 'de_ES'
+    language              = 'de'
    publication_type      = 'newsportal'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
--- a/recipes/di.recipe
+++ b/recipes/di.recipe
@ -46,7 +46,8 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
 		dict(name = 'div', attrs = {'class' : 'poradniki_context'}),
 		dict(name = 'div', attrs = {'class' : 'uniBox'}),
 		dict(name = 'object', attrs = {}),
-		dict(name = 'h3', attrs = {})
+		dict(name = 'h3', attrs = {}),
 		dict(attrs={'class':'twitter-share-button'})
 	]
 	preprocess_regexps = [
@ -58,3 +59,8 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
 			(r'\s*</', lambda match: '</'),
 		]
 	]
 	def skip_ad_pages(self, soup):
 		if 'Advertisement' in soup.title:
 			nexturl=soup.find('a')['href']
 			return self.index_to_soup(nexturl, raw=True)
--- a/recipes/dilbert.recipe
+++ b/recipes/dilbert.recipe
@ -2,6 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 http://www.dilbert.com
 DrMerry added cover Image 2011-11-12
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
@ -9,7 +10,7 @@ import re
 class DilbertBig(BasicNewsRecipe):
    title                  = 'Dilbert'
-    __author__             = 'Darko Miletic and Starson17'
+    __author__             = 'Darko Miletic and Starson17 contribution of DrMerry'
    description            = 'Dilbert'
    reverse_article_order = True
    oldest_article         = 15
@ -20,6 +21,7 @@ class DilbertBig(BasicNewsRecipe):
    publisher              = 'UNITED FEATURE SYNDICATE, INC.'
    category               = 'comic'
    language               = 'en'
    cover_url         = 'http://dilbert.com/mobile/mobile/dilbert.app.icon.png'
    conversion_options = {
                             'comments'        : description
--- a/recipes/economist.recipe
+++ b/recipes/economist.recipe
@ -22,8 +22,6 @@ class Economist(BasicNewsRecipe):
            ' perspective. Best downloaded on Friday mornings (GMT)')
    extra_css      = '.headline {font-size: x-large;} \n h2 { font-size: small;  } \n h1 { font-size: medium;  }'
    oldest_article = 7.0
    cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
    #cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
    remove_tags = [
            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
            dict(attrs={'class':['dblClkTrk', 'ec-article-info',
@ -56,6 +54,19 @@ class Economist(BasicNewsRecipe):
        return br
    '''
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.economist.com/printedition/covers')
        div = soup.find('div', attrs={'class':lambda x: x and
            'print-cover-links' in x})
        a = div.find('a', href=True)
        url = a.get('href')
        if url.startswith('/'):
            url = 'http://www.economist.com' + url
        soup = self.index_to_soup(url)
        div = soup.find('div', attrs={'class':'cover-content'})
        img = div.find('img', src=True)
        return img.get('src')
    def parse_index(self):
        return self.economist_parse_index()
--- a/recipes/economist_free.recipe
+++ b/recipes/economist_free.recipe
@ -22,8 +22,6 @@ class Economist(BasicNewsRecipe):
            ' perspective. Best downloaded on Friday mornings (GMT)')
    extra_css      = '.headline {font-size: x-large;} \n h2 { font-size: small;  } \n h1 { font-size: medium;  }'
    oldest_article = 7.0
    cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
    #cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
    remove_tags = [
            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
            dict(attrs={'class':['dblClkTrk', 'ec-article-info',
@ -40,6 +38,18 @@ class Economist(BasicNewsRecipe):
    # downloaded with connection reset by peer (104) errors.
    delay = 1
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.economist.com/printedition/covers')
        div = soup.find('div', attrs={'class':lambda x: x and
            'print-cover-links' in x})
        a = div.find('a', href=True)
        url = a.get('href')
        if url.startswith('/'):
            url = 'http://www.economist.com' + url
        soup = self.index_to_soup(url)
        div = soup.find('div', attrs={'class':'cover-content'})
        img = div.find('img', src=True)
        return img.get('src')
    def parse_index(self):
        try:
--- a/recipes/ekathemerini.recipe
+++ b/recipes/ekathemerini.recipe
@ -0,0 +1,58 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
 class Ekathimerini(BasicNewsRecipe):
    title                  = 'ekathimerini'
    __author__ = 'Thomas Scholl'
    description            = 'News from Greece, English edition'
    masthead_url           = 'http://wwk.kathimerini.gr/webadmin/EnglishNew/gifs/logo.gif'
    max_articles_per_feed  = 100
    oldest_article         = 100
    publisher              = 'Kathimerini'
    category               = 'news, GR'
    language               = 'en_GR'
    encoding               = 'windows-1253'
    conversion_options     = { 'linearize_tables': True}
    no_stylesheets         = True
    delay                  = 1
    keep_only_tags         = [dict(name='td', attrs={'class':'news'})]
    rss_url = 'http://ws.kathimerini.gr/xml_files/latestnews.xml'
    def find_articles(self, idx, category):
        for article in idx.findAll('item'):
            cat = u''
            cat_elem = article.find('subcat')
            if cat_elem:
                cat = self.tag_to_string(cat_elem)
            if cat == category:
                desc_html = self.tag_to_string(article.find('description'))
                description = self.tag_to_string(BeautifulSoup(desc_html))
                a = {
                        'title':  self.tag_to_string(article.find('title')),
                        'url': self.tag_to_string(article.find('link')),
                        'description': description,
                        'date' : self.tag_to_string(article.find('pubdate')),
                        }
                yield a
    def parse_index(self):
        idx_contents = self.browser.open(self.rss_url).read()
        idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
        cats = list(set([self.tag_to_string(subcat) for subcat in idx.findAll('subcat')]))
        cats.sort()
        feeds = [(u'News',list(self.find_articles(idx, u'')))]
        for cat in cats:
            feeds.append((cat.capitalize(), list(self.find_articles(idx, cat))))
        return feeds
    def print_version(self, url):
       return url.replace('http://www.ekathimerini.com/4dcgi/', 'http://www.ekathimerini.com/4Dcgi/4dcgi/')
--- a/recipes/el_pais.recipe
+++ b/recipes/el_pais.recipe
@ -33,7 +33,7 @@ class ElPais(BasicNewsRecipe):
    remove_javascript = True
    no_stylesheets = True
-    keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','caja_despiece']})]
+    keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','cuerpo_noticia','caja_despiece']})]
    extra_css             = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} '
--- a/recipes/el_periodico.recipe
+++ b/recipes/el_periodico.recipe
@ -5,12 +5,11 @@ __license__     = 'GPL v3'
 __copyright__   = '04 December 2010, desUBIKado'
 __author__      = 'desUBIKado'
 __description__ = 'Daily newspaper from Aragon'
-__version__     = 'v0.07'
+__version__     = 'v0.08'
-__date__        = '06, February 2011'
+__date__        = '13, November 2011'
 '''
 elperiodicodearagon.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
@ -20,13 +19,13 @@ class elperiodicodearagon(BasicNewsRecipe):
    description           = u'Noticias desde Aragon'
    publisher             = u'elperiodicodearagon.com'
    category              = u'news, politics, Spain, Aragon'
-    oldest_article        = 2
+    oldest_article        = 1
    delay                 = 0
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    language              = 'es'
-    encoding              = 'utf8'
+    encoding              = 'iso-8859-1'
    remove_empty_feeds    = True
    remove_javascript     = True
@ -39,61 +38,30 @@ class elperiodicodearagon(BasicNewsRecipe):
                         }
    feeds              = [
-                           (u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
+                           (u'Portada', u'http://zetaestaticos.com/aragon/rss/portada_es.xml'),
-                           (u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
+                           (u'Arag\xf3n', u'http://zetaestaticos.com/aragon/rss/2_es.xml'),
-                           (u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
+                           (u'Internacional', u'http://zetaestaticos.com/aragon/rss/4_es.xml'),
-                           (u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
+                           (u'Espa\xf1a', u'http://zetaestaticos.com/aragon/rss/3_es.xml'),
-                           (u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
+                           (u'Econom\xeda', u'http://zetaestaticos.com/aragon/rss/5_es.xml'),
-                           (u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
+                           (u'Deportes', u'http://zetaestaticos.com/aragon/rss/7_es.xml'),
-                           (u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
+                           (u'Real Zaragoza', u'http://zetaestaticos.com/aragon/rss/10_es.xml'),
-                           (u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
+                           (u'CAI Zaragoza', u'http://zetaestaticos.com/aragon/rss/91_es.xml'),
-                           (u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
+                           (u'Monta\xf1ismo', u'http://zetaestaticos.com/aragon/rss/354_es.xml'),
-                           (u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')
+                           (u'Opini\xf3n', u'http://zetaestaticos.com/aragon/rss/103_es.xml'),
                           (u'Tema del d\xeda', u'http://zetaestaticos.com/aragon/rss/102_es.xml'),
                           (u'Escenarios', u'http://zetaestaticos.com/aragon/rss/105_es.xml'),
                           (u'Sociedad', u'http://zetaestaticos.com/aragon/rss/104_es.xml'),
                           (u'Gente', u'http://zetaestaticos.com/aragon/rss/330_es.xml'),
                           (u'Espacio 3', u'http://zetaestaticos.com/aragon/rss/328_es.xml'),
                           (u'Fiestas del Pilar', u'http://zetaestaticos.com/aragon/rss/107_es.xml')
                         ]
    extra_css = '''
                    h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
                    h2 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
                    h4 {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:20px;}
                    .columnaDeRecursosRelacionados {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
                    img{margin-bottom: 0.4em}
 		'''
    remove_attributes = ['height','width']
-    keep_only_tags     = [dict(name='div', attrs={'id':'contenidos'})]
+    keep_only_tags     = [dict(name='div', attrs={'id':'Noticia'})]
    # Quitar toda la morralla
    remove_tags        = [dict(name='ul', attrs={'class':'herramientasDeNoticia'}),
                          dict(name='span', attrs={'class':'MasInformacion '}),
                          dict(name='span', attrs={'class':'MasInformacion'}),
                          dict(name='div', attrs={'class':'Middle'}),
                          dict(name='div', attrs={'class':'MenuCabeceraRZaragoza'}),
                          dict(name='div', attrs={'id':'MenuCabeceraRZaragoza'}),
                          dict(name='div', attrs={'class':'MenuEquipo'}),
                          dict(name='div', attrs={'class':'TemasRelacionados'}),
                          dict(name='div', attrs={'class':'GaleriaEnNoticia'}),
                          dict(name='div', attrs={'class':'Recorte'}),
                          dict(name='div', attrs={'id':'NoticiasenRecursos'}),
                          dict(name='div', attrs={'id':'NoticiaEnPapel'}),
                          dict(name='p', attrs={'class':'RecorteEnNoticias'}),
                          dict(name='div', attrs={'id':'Comparte'}),
                          dict(name='div', attrs={'id':'CajaComparte'}),
                          dict(name='a', attrs={'class':'EscribirComentario'}),
                          dict(name='a', attrs={'class':'AvisoComentario'}),
                          dict(name='div', attrs={'class':'CajaAvisoComentario'}),
                          dict(name='div', attrs={'class':'navegaNoticias'}),
                          dict(name='div', attrs={'class':'Mensaje'}),
                          dict(name='div', attrs={'id':'PaginadorDiCom'}),
                          dict(name='div', attrs={'id':'CajaAccesoCuentaUsuario'}),
                          dict(name='div', attrs={'id':'CintilloComentario'}),
                          dict(name='div', attrs={'id':'EscribeComentario'}),
                          dict(name='div', attrs={'id':'FormularioComentario'}),
                          dict(name='div', attrs={'id':'FormularioNormas'})]
    # Recuperamos la portada de papel (la imagen format=1 tiene mayor resolucion)
    def get_cover_url(self):
@ -104,23 +72,7 @@ class elperiodicodearagon(BasicNewsRecipe):
              return image['src'].rstrip('format=2') + 'format=1'
        return None
-    # Para quitar espacios entre la noticia y los comentarios (lineas 1 y 2)
+    # Usamos la versión para móviles
    # El indice no apuntaba correctamente al empiece de la noticia (linea 3)
-    preprocess_regexps = [
+    def print_version(self, url):
-        (re.compile(r'<p>&nbsp;</p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
+          return url.replace('http://www.elperiodicodearagon.com/', 'http://www.elperiodicodearagon.com/m/')
        (re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'<p id="">', re.DOTALL|re.IGNORECASE), lambda match: '<p>')
        ]
    # Para sustituir el video incrustado de YouTube por una imagen
    def preprocess_html(self, soup):
        for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
            if video_yt:
               video_yt.name = 'img'
               fuente = video_yt['src']
               fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
               video_yt['src'] = fuente2 + '/0.jpg'
        return soup
--- a/recipes/elmundo.recipe
+++ b/recipes/elmundo.recipe
@ -4,7 +4,8 @@ __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 elmundo.es
 '''
-
+import re
 import time
 from calibre.web.feeds.news import BasicNewsRecipe
 class ElMundo(BasicNewsRecipe):
@ -18,12 +19,15 @@ class ElMundo(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'iso8859_15'
    remove_javascript     = True
    remove_empty_feeds    = True
    language              = 'es'
    masthead_url          = 'http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png'
    publication_type      = 'newspaper'
    extra_css             = """
                               body{font-family: Arial,Helvetica,sans-serif}
                               .metadata_noticia{font-size: small}
                               .pestana_GDP{font-size: small; font-weight:bold}
                               h1,h2,h3,h4,h5,h6,.subtitulo {color: #3F5974}
                               .hora{color: red}
                               .update{color: gray}
@ -41,8 +45,11 @@ class ElMundo(BasicNewsRecipe):
    remove_tags_after  = dict(name='div' , attrs={'id':['desarrollo_noticia','tamano']})
    remove_attributes  = ['lang','border']
    remove_tags = [
-                     dict(name='div', attrs={'class':['herramientas','publicidad_google']})
+                     dict(name='div', attrs={'class':['herramientas','publicidad_google','comenta','col col-2b','apoyos','no-te-pierdas']})
-                    ,dict(name='div', attrs={'id':'modulo_multimedia' })
+                    ,dict(name='div', attrs={'class':['publicidad publicidad_cuerpo_noticia','comentarios_nav','mensaje_privado','interact']})
                    ,dict(name='div', attrs={'class':['num_comentarios estirar']})
                    ,dict(name='span', attrs={'class':['links_comentar']})
                    ,dict(name='div', attrs={'id':['comentar']})
                    ,dict(name='ul', attrs={'class':'herramientas' })
                    ,dict(name=['object','link','embed','iframe','base','meta'])
                  ]
@ -50,13 +57,31 @@ class ElMundo(BasicNewsRecipe):
    feeds = [
              (u'Portada'         , u'http://estaticos.elmundo.es/elmundo/rss/portada.xml'       )                                      
             ,(u'Deportes'        , u'http://estaticos.elmundo.es/elmundodeporte/rss/portada.xml')
-             ,(u'Economia'        , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml'      )
+             ,(u'Econom\xeda'     , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml'      )
-             ,(u'Espana'          , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml'        )
+             ,(u'Espa\xf1a'       , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml'        )
             ,(u'Internacional'   , u'http://estaticos.elmundo.es/elmundo/rss/internacional.xml' )
             ,(u'Cultura'         , u'http://estaticos.elmundo.es/elmundo/rss/cultura.xml'       )
-             ,(u'Ciencia/Ecologia', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml'       )
+             ,(u'Ciencia/Ecolog\xeda', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml'    )
-             ,(u'Comunicacion'    , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml'  )
+             ,(u'Comunicaci\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml'  )
-             ,(u'Television'      , u'http://estaticos.elmundo.es/elmundo/rss/television.xml'    )
+             ,(u'Televisi\xf3n'   , u'http://estaticos.elmundo.es/elmundo/rss/television.xml'    )
             ,(u'Salud'           , u'http://estaticos.elmundo.es/elmundosalud/rss/portada.xml'  )
             ,(u'Solidaridad'     , u'http://estaticos.elmundo.es/elmundo/rss/solidaridad.xml'   )
             ,(u'Su vivienda'     , u'http://estaticos.elmundo.es/elmundo/rss/suvivienda.xml'    )             
             ,(u'Motor'           , u'http://estaticos.elmundo.es/elmundomotor/rss/portada.xml'  )             
             ,(u'Madrid'          , u'http://estaticos.elmundo.es/elmundo/rss/madrid.xml'        )
             ,(u'Barcelona'       , u'http://estaticos.elmundo.es/elmundo/rss/barcelona.xml'     )
             ,(u'Pa\xeds Vasco'   , u'http://estaticos.elmundo.es/elmundo/rss/paisvasco.xml'     )	     
             ,(u'Baleares'        , u'http://estaticos.elmundo.es/elmundo/rss/baleares.xml'      )
 	     ,(u'Castilla y Le\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/castillayleon.xml' )	     
 	     ,(u'Valladolid'      , u'http://estaticos.elmundo.es/elmundo/rss/valladolid.xml'    )
 	     ,(u'Valencia'        , u'http://estaticos.elmundo.es/elmundo/rss/valencia.xml'      )
 	     ,(u'Alicante'        , u'http://estaticos.elmundo.es/elmundo/rss/alicante.xml'      )
 	     ,(u'Castell\xf3n'    , u'http://estaticos.elmundo.es/elmundo/rss/castellon.xml'     )	
 	     ,(u'Andaluc\xeda'    , u'http://estaticos.elmundo.es/elmundo/rss/andalucia.xml'     )
 	     ,(u'Sevilla'         , u'http://estaticos.elmundo.es/elmundo/rss/andalucia_sevilla.xml'  )
 	     ,(u'M\xe1laga'       , u'http://estaticos.elmundo.es/elmundo/rss/andalucia_malaga.xml'   )
            ]
    def preprocess_html(self, soup):
@ -67,3 +92,34 @@ class ElMundo(BasicNewsRecipe):
    def get_article_url(self, article):
        return article.get('guid',  None)
    preprocess_regexps = [     
                           # Para presentar la imagen de los videos incrustados                           
                           (re.compile(r'var imagen', re.DOTALL|re.IGNORECASE), lambda match: '--></script><img src'),
                           (re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
                           (re.compile(r'var video=', re.DOTALL|re.IGNORECASE), lambda match: '<script language="Javascript" type="text/javascript"><!--'),
                           # Para que no salga la numeración de comentarios: 1, 2, 3 ...
                           (re.compile(r'<ol>\n<li style="z-index:', re.DOTALL|re.IGNORECASE), lambda match: '<ul><li style="z-index:'),
                           (re.compile(r'</ol>\n<div class="num_comentarios estirar">', re.DOTALL|re.IGNORECASE), lambda match: '</ul><div class="num_comentarios estirar">'),
                         ]
    # Obtener la imagen de portada
    def get_cover_url(self):
       cover = None
       st = time.localtime()
       year = str(st.tm_year)
       month = "%.2d" % st.tm_mon
       day = "%.2d" % st.tm_mday
 		#http://img.kiosko.net/2011/11/19/es/elmundo.750.jpg
       cover='http://img.kiosko.net/'+ year + '/' +  month + '/' + day +'/es/elmundo.750.jpg'
       br = BasicNewsRecipe.get_browser()
       try:
           br.open(cover)
       except:
           self.log("\nPortada no disponible")
           cover ='http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png'
       return cover 
--- a/recipes/eluniversal_ve.recipe
+++ b/recipes/eluniversal_ve.recipe
@ -56,6 +56,7 @@ class ElUniversal(BasicNewsRecipe):
            ]
    def print_version(self, url):
-        rp,sep,rest = url.rpartition('/')
+        return url + '-imp'
        return rp + sep + 'imp_' + rest
    def get_article_url(self, article):
        return article.get('guid',  None)
--- a/recipes/expansion_spanish.recipe
+++ b/recipes/expansion_spanish.recipe
@ -1,35 +1,43 @@
 #!/usr/bin/env  python
-__license__   = 'GPL v3'
+__license__     = 'GPL v3'
-__author__    = 'Gerardo Diez'
+__copyright__   = '5, January 2011 Gerardo Diez<gerardo.diez.garcia@gmail.com> & desUBIKado'
-__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
+__author__      = 'desUBIKado, based on an earlier version by Gerardo Diez'
-description   = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
+__version__     = 'v1.01'
-__docformat__ = 'restructuredtext en'
+__date__        = '13, November 2011'
 '''
-expansion.es
+[url]http://www.expansion.com/[/url]
 '''
 import time
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
-class Publico(BasicNewsRecipe):
+
-    title               =u'Expansion.com'
+class expansion_spanish(BasicNewsRecipe):
-    __author__      ='Gerardo Diez'
+    __author__      ='Gerardo Diez & desUBIKado'
-    publisher       =u'Unidad Editorial Información Económica, S.L.'
+    description     ='Financial news from Spain'
-    category                ='finances, catalunya'
+    title           =u'Expansion'
-    oldest_article      =1
+    publisher       =u'Unidad Editorial Internet, S.L.'
    category        ='news, finances, Spain'
    oldest_article  = 2
    simultaneous_downloads = 10
    max_articles_per_feed   =100
-    simultaneous_downloads  =10
+    timefmt         = '[%a, %d %b, %Y]'
-    cover_url       =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
+    encoding        ='iso-8859-15'
    timefmt         ='[%A, %d %B, %Y]'
    encoding        ='latin'
    language        ='es'
-    remove_javascript   =True
+    use_embedded_content  = False
-    no_stylesheets      =True
+    remove_javascript     = True
    no_stylesheets        = True
    remove_empty_feeds    = True
    keep_only_tags      =dict(name='div', attrs={'class':['noticia primer_elemento']})
    remove_tags         =[
-                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
+                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto','tit_relacionadas','interact','paginacion estirar','sumario derecha']}),
-                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
+                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia','not_logged']}),
                dict(name='span', attrs={'class':['comentarios']}),
                dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
-                dict(name='div', attrs={'id':['comentarios_lectores_listado']})
+                dict(name='div', attrs={'id':['comentarios_lectores_listado','comentar']})
                            ]
    feeds               =[
                (u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
@ -38,42 +46,112 @@ class Publico(BasicNewsRecipe):
                (u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
                (u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
                (u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),
                (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
                (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
-                (u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
+                (u'Cr\xe9ditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
                (u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
-                (u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
+                (u'Fondos de Inversi\xf3n', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
                (u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),
                (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
                (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
                (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
-                (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
+                (u'Energ\xeda', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
-                (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
+                (u'Inmobiliario y Construcci\xf3n', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
                (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
-                (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
+                (u'Automoci\xf3n e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
-                (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
+                (u'Distribuci\xf3n', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
-                (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
+                (u'Deporte y Negocio', u' [url]http://estaticos.expansion.com/rss/empresasdeporte.xml[/url]'),
                (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
                (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
                (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),
-
+                (u'Portada: Econom\xeda y Pol\xedtica', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
-                (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
+                (u'Pol\xedtica', u'http://estaticos.expansion.com/rss/economia.xml'),
                (u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
                (u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),
-
+                (u'Portada: Opini\xf3n', u'http://estaticos.expansion.com/rss/opinion.xml'),
                (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
                (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
                (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),
-
+                (u'Portada: Jur\xeddico', u'http://estaticos.expansion.com/rss/juridico.xml'),
                (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
                (u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
-                (u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
+                (u'Opini\xf3n', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
                (u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),
                (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
-                (u'Catalu&ntilde;a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
+                (u'Catalu\xf1a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
-                (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
+                (u'Funci\xf3n p\xfablica', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
                ]
    # Obtener la imagen de portada
    def get_cover_url(self):
       cover = None
       st = time.localtime()
       year = str(st.tm_year)
       month = "%.2d" % st.tm_mon
       day = "%.2d" % st.tm_mday
 		#[url]http://img5.kiosko.net/2011/11/14/es/expansion.750.jpg[/url]
       cover='http://img5.kiosko.net/'+ year + '/' +  month + '/' + day +'/es/expansion.750.jpg'
       br = BasicNewsRecipe.get_browser()
       try:
           br.open(cover)
       except:
           self.log("\nPortada no disponible")
           cover ='http://www.aproahp.org/enlaces/images/diario_expansion.gif'
       return cover
    # Para que no salte la publicidad al recuperar la noticia, y que siempre se recupere
    # la página web, mando la variable "t" con la hora "linux" o "epoch" actual
    # haciendole creer al sitio web que justo se acaba de ver la publicidad
    def print_version(self, url):
           st = time.time()
           segundos = str(int(st))
           parametros = '.html?t=' + segundos
           return url.replace('.html', parametros)
    _processed_links = []
    def get_article_url(self, article):
       # Para obtener la url original del artículo a partir de la de "feedsportal"
       link = article.get('link', None)
       if link is None:
           return article
       if link.split('/')[-1]=="story01.htm":
           link=link.split('/')[-2]
           a=['0B','0C','0D','0E','0F','0G','0N'  ,'0L0S','0A']
           b=['.' ,'/' ,'?' ,'-' ,'=' ,'&' ,'.com','www.','0']
           for i in range(0,len(a)):
              link=link.replace(a[i],b[i])
           link="http://"+link
       # Eliminar artículos duplicados en otros feeds
       if not (link in self._processed_links):
            self._processed_links.append(link)
       else:
            link = None
       return link
    # Un poco de css para mejorar la presentación de las noticias
    extra_css = '''
                    .entradilla {font-family:Arial,Helvetica,sans-serif; font-weight:bold; font-style:italic; font-size:16px;}
                    .fecha_publicacion,.autor {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
                '''
    # Para presentar la imagen de los videos incrustados
    preprocess_regexps = [
                           (re.compile(r'var imagen', re.DOTALL|re.IGNORECASE), lambda match: '--></script><img src'),
                           (re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
                           (re.compile(r'var id_reproductor', re.DOTALL|re.IGNORECASE), lambda match: '<script language="Javascript" type="text/javascript"><!--'),
                         ]
--- a/recipes/folhadesaopaulo_sub.recipe
+++ b/recipes/folhadesaopaulo_sub.recipe
@ -8,31 +8,35 @@ class FSP(BasicNewsRecipe):
    __author__ = 'fluzao'
    description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
                  u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
-    INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
+
    #found this to be the easiest place to find the index page (13-Nov-2011).
    #  searching for the "Indice Geral" link
    HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
    masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
    language = 'pt'
    no_stylesheets = True
    max_articles_per_feed  = 40
    remove_javascript     = True
    needs_subscription = True
-    remove_tags_before = dict(name='b')
+
    remove_tags_before = dict(name='p')
    remove_tags  = [dict(name='td', attrs={'align':'center'})]
    remove_attributes = ['height','width']
    masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
    # fixes the problem with the section names
    section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \
                    'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \
                    'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \
-                    'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio'}
+                    'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio', \
                    'imoveis' : u'im\xf3veis', 'negocios' : u'neg\xf3cios', \
                    'veiculos' : u've\xedculos', 'corrida' : 'folha corrida'}
    # this solves the problem with truncated content in Kindle
    conversion_options = {'linearize_tables' : True}
    # this bit removes the footer where there are links for Proximo Texto, Texto Anterior,
    #    Indice e Comunicar Erros
-    preprocess_regexps = [(re.compile(r'<BR><BR>Texto Anterior:.*<!--/NOTICIA-->',
+    preprocess_regexps = [(re.compile(r'<!--/NOTICIA-->.*Comunicar Erros</a>',
                                      re.DOTALL|re.IGNORECASE), lambda match: r''),
                          (re.compile(r'<BR><BR>Pr&oacute;ximo Texto:.*<!--/NOTICIA-->',
                                      re.DOTALL|re.IGNORECASE), lambda match: r'')]
    def get_browser(self):
@ -49,7 +53,25 @@ class FSP(BasicNewsRecipe):
    def parse_index(self):
-        soup = self.index_to_soup(self.INDEX)
+        #Searching for the index page on the HOMEPAGE
        hpsoup = self.index_to_soup(self.HOMEPAGE)
        indexref = hpsoup.find('a', href=re.compile('^indices.*'))
        self.log('--> tag containing the today s index: ', indexref)
        INDEX = indexref['href']
        INDEX = 'http://www1.folha.uol.com.br/fsp/'+INDEX
        self.log('--> INDEX after extracting href and adding prefix: ', INDEX)
        # ... and taking the opportunity to get the cover image link
        coverurl = hpsoup.find('a', href=re.compile('^cp.*'))['href']
        if coverurl:
            self.log('--> tag containing the today s cover: ', coverurl)
            coverurl = coverurl.replace('htm', 'jpg')
            coverurl = 'http://www1.folha.uol.com.br/fsp/images/'+coverurl
            self.log('--> coverurl after extracting href and adding prefix: ', coverurl)
            self.cover_url = coverurl
        #soup = self.index_to_soup(self.INDEX)
        soup = self.index_to_soup(INDEX)
        feeds = []
        articles = []
        section_title = "Preambulo"
@ -68,8 +90,12 @@ class FSP(BasicNewsRecipe):
                self.log('--> new section title:   ', section_title)
            if strpost.startswith('<a href'):
                url = post['href']
                #this bit is kept if they ever go back to the old format (pre Nov-2011)
                if url.startswith('/fsp'):
                    url = 'http://www1.folha.uol.com.br'+url
                #
                if url.startswith('http://www1.folha.uol.com.br/fsp'):
                    #url = 'http://www1.folha.uol.com.br'+url
                    title = self.tag_to_string(post)
                    self.log()
                    self.log('--> post:  ', post)
@ -82,15 +108,11 @@ class FSP(BasicNewsRecipe):
        # keeping the front page url
        minha_capa = feeds[0][1][1]['url']
-        # removing the 'Preambulo' section
+        # removing the first section (now called 'top')
        del feeds[0]
        # creating the url for the cover image
        coverurl = feeds[0][1][0]['url']
        coverurl = coverurl.replace('/opiniao/fz', '/images/cp')
        coverurl = coverurl.replace('01.htm', '.jpg')
        self.cover_url = coverurl
        # inserting the cover page as the first article (nicer for kindle users)
        feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}]))
        return feeds
--- a/recipes/formulaas.recipe
+++ b/recipes/formulaas.recipe
@ -0,0 +1,50 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 formula-as.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class FormulaAS(BasicNewsRecipe):
    title                 = u'Formula AS'
    __author__            = u'Silviu Cotoar\u0103'
    publisher             = u'Formula AS'
    description           = u'Formula AS'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Romania'
    encoding              = 'utf-8'
    cover_url        	  = 'http://www.formula-as.ro/_client/img/header_logo.png'
    conversion_options = {
                'comments'    : description
                ,'tags'       : category
                ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
                      dict(name='div', attrs={'class':'item padded'})					 
                     ]
    remove_tags = [
 					dict(name='ul', attrs={'class':'subtitle lower'})
                  ]
    remove_tags_after = [
 			         dict(name='ul', attrs={'class':'subtitle lower'}),
 					 dict(name='div', attrs={'class':'item-brief-options'})					 
 	               ]
    feeds  = [
        (u'\u0218tiri', u'http://www.formula-as.ro/rss/articole.xml')
         ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/frandroid.recipe
+++ b/recipes/frandroid.recipe
@ -0,0 +1,10 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1318572550(BasicNewsRecipe):
    title          = u'FrAndroid'
    oldest_article = 2
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'FrAndroid', u'http://feeds.feedburner.com/Frandroid')]
--- a/recipes/frankfurter_rundschau.recipe
+++ b/recipes/frankfurter_rundschau.recipe
@ -1,35 +1,61 @@
 #!/usr/bin/env  python
 __license__            = 'GPL v3'
 __copyright__          = '2010-2011, Christian Schmitt'
 '''
 fr-online.de
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe(BasicNewsRecipe):
-    title          = u'Frankfurter Rundschau'
+class FROnlineRecipe(BasicNewsRecipe):
-    __author__  = 'schuster'
+  title                  = 'Frankfurter Rundschau'
-    oldest_article = 1
+  __author__             = 'maccs'
-    max_articles_per_feed = 100
+  description            = 'Nachrichten aus D und aller Welt'
-    no_stylesheets         = True
+  encoding               = 'utf-8'
-    use_embedded_content   = False
+  masthead_url           = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
-    language               = 'de'
+  publisher              = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
-    remove_javascript      = True
+  category               = 'news, germany, world'
-    cover_url = 'http://www.fr-online.de/image/view/-/1474018/data/823538/-/logo.png'
+  language               = 'de'
-    extra_css = '''
+  publication_type       = 'newspaper'
-                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+  use_embedded_content   = False
-                    h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+  remove_javascript      = True
-                    img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
+  no_stylesheets         = True
-                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+  oldest_article         = 1   # Increase this number if you're interested in older articles
-                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+  max_articles_per_feed  = 50  # Seems a reasonable number to me
-	'''
+  extra_css              = '''
-
+                            body { font-family: "arial", "verdana", "geneva", sans-serif; font-size: 12px; margin: 0px; background-color: #ffffff;}
-    feeds          = [(u'Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml'),
+                            .imgSubline{background-color: #f4f4f4; font-size: 0.8em;}
-                          (u'Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml'),
+                            .p--heading-1 {font-weight: bold;}
-                          (u'Meinungen', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml'),
+                            .calibre_navbar {font-size: 0.8em; font-family: "arial", "verdana", "geneva", sans-serif;}
-                          (u'Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml'),
+                            '''
-                          (u'Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml'),
+  keep_only_tags         = [{'class':'ArticleHeadlineH1'}, {'class':'article_text'}]
-                          (u'Kultur', u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml'),
+  cover_url              = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
-                          (u'Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml'),
+  cover_margins          = (100, 150, '#ffffff')
                          (u'Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml'),
                          (u'Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml')
 ]
-    def print_version(self, url):
+  feeds = []
-        return url.replace('index.html', 'view/printVersion/-/index.html')
+  feeds.append(('Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml'))
  feeds.append(('Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml'))
  feeds.append(('Meinung', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml'))
  feeds.append(('Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml'))
  feeds.append(('Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml'))
  feeds.append(('Eintracht Frankfurt', u'http://www.fr-online.de/sport/eintracht-frankfurt/-/1473446/1473446/-/view/asFeed/-/index.xml'))
  feeds.append(('Kultur und Medien', u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml'))
  feeds.append(('Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml'))
  feeds.append(('Frankfurt', u'http://www.fr-online.de/frankfurt/-/1472798/1472798/-/view/asFeed/-/index.xml'))
  feeds.append(('Rhein-Main', u'http://www.fr-online.de/rhein-main/-/1472796/1472796/-/view/asFeed/-/index.xml'))
  feeds.append(('Hanau', u'http://www.fr-online.de/rhein-main/hanau/-/1472866/1472866/-/view/asFeed/-/index.xml'))
  feeds.append(('Darmstadt', u'http://www.fr-online.de/rhein-main/darmstadt/-/1472858/1472858/-/view/asFeed/-/index.xml'))
  feeds.append(('Wiesbaden', u'http://www.fr-online.de/rhein-main/wiesbaden/-/1472860/1472860/-/view/asFeed/-/index.xml'))
  feeds.append(('Offenbach', u'http://www.fr-online.de/rhein-main/offenbach/-/1472856/1472856/-/view/asFeed/-/index.xml'))
  feeds.append(('Bad Homburg', u'http://www.fr-online.de/rhein-main/bad-homburg/-/1472864/1472864/-/view/asFeed/-/index.xml'))
  feeds.append(('Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml'))
  feeds.append(('Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml'))
  def print_version(self, url):
    return url.replace('index.html', 'view/printVersion/-/index.html')
--- a/recipes/frazpc.recipe
+++ b/recipes/frazpc.recipe
@ -18,7 +18,7 @@ class FrazPC(BasicNewsRecipe):
    max_articles_per_feed = 100
    use_embedded_content = False
    no_stylesheets = True
-
+    cover_url='http://www.frazpc.pl/images/logo.png'
    feeds          = [
        (u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'), 
        (u'Artyku\u0142y', u'http://www.frazpc.pl/feed/artykuly')
@ -33,6 +33,7 @@ class FrazPC(BasicNewsRecipe):
        dict(name='div', attrs={'class':'comments_box'})
    ]
    remove_tags_after=dict(name='div', attrs={'class':'content'})
    preprocess_regexps = [(re.compile(r'\| <a href="#comments">Komentarze \([0-9]*\)</a>'), lambda match: '')]
    remove_attributes = [ 'width', 'height' ]
--- a/recipes/ftd.recipe
+++ b/recipes/ftd.recipe
@ -16,7 +16,7 @@ class FTDe(BasicNewsRecipe):
    use_embedded_content   = False
    timefmt = ' [%d %b %Y]'
    language = 'de'
-    max_articles_per_feed = 40
+    max_articles_per_feed = 30
    no_stylesheets = True
    remove_tags = [dict(id='navi_top'),
@ -84,18 +84,18 @@ class FTDe(BasicNewsRecipe):
 		   dict(name='div', attrs={'class':'artikelsplitfaq'})]
    #remove_tags_after = [dict(name='a', attrs={'class':'more'})]
-    feeds =  [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'),
+    feeds =  [
-	       ('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
+	         ('Unternehmen', 'http://www.ftd.de/rss2/unternehmen'),
-	       ('Unternehmen', 'http://www.ftd.de/rss2/unternehmen'),
+	         ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'),
-	       ('Politik', 'http://www.ftd.de/rss2/politik'),
+	         ('Meinungen', 'http://www.ftd.de/rss2/meinungshungrige'),
-	       ('Karriere_Management', 'http://www.ftd.de/rss2/karriere-management'),
+	         ('Politik', 'http://www.ftd.de/rss2/politik'),
-	       ('IT_Medien', 'http://www.ftd.de/rss2/it-medien'),
+	         ('Management & Karriere', 'http://www.ftd.de/rss2/karriere-management'),
-	       ('Wissen', 'http://www.ftd.de/rss2/wissen'),
+	         ('IT & Medien', 'http://www.ftd.de/rss2/it-medien'),
-	       ('Sport', 'http://www.ftd.de/rss2/sport'),
+	         ('Wissen', 'http://www.ftd.de/rss2/wissen'),
-	       ('Auto', 'http://www.ftd.de/rss2/auto'),
+	         ('Sport', 'http://www.ftd.de/rss2/sport'),
-	       ('Lifestyle', 'http://www.ftd.de/rss2/lifestyle')
+	         ('Auto', 'http://www.ftd.de/rss2/auto'),
-
+	         ('Lifestyle', 'http://www.ftd.de/rss2/lifestyle')
-	     ]
+	       ]
    def print_version(self, url):
--- a/recipes/geek_poke.recipe
+++ b/recipes/geek_poke.recipe
@ -1,35 +1,82 @@
 #!/usr/bin/python
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 from calibre.utils.magick import Image, create_canvas
 class AdvancedUserRecipe1307556816(BasicNewsRecipe):
    title          = u'Geek and Poke'
    __author__     = u'DrMerry'
    description    = u'Geek and Poke Cartoons'
    publisher      = u'Oliver Widder'
    author         = u'Oliver Widder, DrMerry (calibre-code), calibre'
    oldest_article = 31
    max_articles_per_feed = 100
    language       = u'en'
-    simultaneous_downloads = 5
+    simultaneous_downloads = 1
    #delay          = 1
-    timefmt        = ' [%A, %d %B, %Y]'
+    timefmt        = ' [%a, %d %B, %Y]'
    summary_length = -1
    no_stylesheets = True
    category = 'News.IT, Cartoon, Humor, Geek'
    use_embedded_content = False
    cover_url = 'http://geekandpoke.typepad.com/aboutcoders.jpeg'
    remove_javascript = True
    remove_empty_feeds = True
    publication_type = 'blog'
    masthead_url = None
    conversion_options = {
                            'comments'         : ''
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'author'          : author
                         }
-    preprocess_regexps = [ (re.compile(r'(<p>&nbsp;</p>|<iframe.*</iframe>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),lambda match: ''),
+    remove_tags_before = dict(name='p', attrs={'class':'content-nav'})
-                                          (re.compile(r'(&nbsp;|  )', re.DOTALL|re.IGNORECASE),lambda match: ' '),
+    remove_tags_after = dict(name='div', attrs={'class':'entry-content'})
-                                          (re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL|re.IGNORECASE),lambda match: '<br>')
+    remove_tags = [dict(name='div', attrs={'class':'entry-footer'}),
-                                         ]
+                        dict(name='div', attrs={'id':'alpha'}),
                        dict(name='div', attrs={'id':'gamma'}),
                        dict(name='iframe'),
                        dict(name='p', attrs={'class':'content-nav'})]
-    extra_css = 'body, h3, p, h2, h1, div, span{margin:0px} h2.date-header {font-size: 0.7em; color:#eee;} h3.entry-header{font-size: 1.0em} div.entry-body{font-size: 0.9em}'
+    filter_regexps = [(r'feedburner\.com'),
                        (r'pixel.quantserve\.com'),
                        (r'googlesyndication\.com'),
                        (r'yimg\.com'),
                        (r'scorecardresearch\.com')]
    preprocess_regexps = [(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>|<!--.*?-->|<h2[^>]*>[^<]*</h2>[^<]*)', re.DOTALL|re.IGNORECASE),lambda match: ''),
                        (re.compile(r'(&nbsp;|\s\s)+\s*', re.DOTALL|re.IGNORECASE),lambda match: ' '),
                        (re.compile(r'(<h3[^>]*>)<a[^>]>((?!</a)*)</a></h3>', re.DOTALL|re.IGNORECASE),lambda match: match.group(1) + match.group(2) + '</h3>'),
                        (re.compile(r'(<img[^>]*alt="([^"]*)"[^>]*>)', re.DOTALL|re.IGNORECASE),lambda match: '<div id="merryImage"><cite>' + match.group(2) + '</cite><br>' + match.group(1) + '</div>'),
                        (re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL|re.IGNORECASE),lambda match: '<br>'),
                        ]
-    remove_tags_before = dict(name='h2', attrs={'class':'date-header'})
+    extra_css = 'body, h3, p, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em}'
    remove_tags_after = dict(name='div', attrs={'class':'entry-body'})
    def postprocess_html(self, soup, first):
        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
            iurl = tag['src']
            img = Image()
            img.open(iurl)
            #width, height = img.size
            #print '***img is: ', iurl, '\n****width is: ', width, 'height is: ', height
            img.trim(0)
            #width, height = img.size
            #print '***TRIMMED img width is: ', width, 'height is: ', height
            left=0
            top=0
            border_color='#ffffff'
            width, height = img.size
            #print '***retrieved img width is: ', width, 'height is: ', height
            height_correction = 1.17
            canvas = create_canvas(width, height*height_correction,border_color)
            canvas.compose(img, left, top)
            #img = canvas
            #img.save(iurl)
            canvas.save(iurl)
            #width, height = canvas.size
            #print '***NEW img width is: ', width, 'height is: ', height
        return soup
-    feeds          = [(u'Geek and Poke', u'http://feeds.feedburner.com/GeekAndPoke?format=xml')]
+    feeds          = ['http://feeds.feedburner.com/GeekAndPoke?format=xml']
--- a/recipes/givemesomethingtoread.recipe
+++ b/recipes/givemesomethingtoread.recipe
@ -0,0 +1,90 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class GiveMeSomethingToRead(BasicNewsRecipe):
    title          = u'Give Me Something To Read'
    description    = 'Curation / aggregation of articles on diverse topics'
    language = 'en'
    __author__     = 'barty on mobileread.com forum'
    max_articles_per_feed = 100
    no_stylesheets = False
    timefmt        = ' [%a, %d %b, %Y]'
    oldest_article = 365
    auto_cleanup   = True
    INDEX          = 'http://givemesomethingtoread.com'
    CATEGORIES     = [
        # comment out categories you don't want
        # (user friendly name, system name, max number of articles to load)
        ('The Arts','arts',25),
        ('Science','science',30),
        ('Technology','technology',30),
        ('Politics','politics',20),
        ('Media','media',30),
        ('Crime','crime',15),
        ('Other articles','',10)
        ]
    def parse_index(self):
        self.cover_url = 'http://thegretchenshow.files.wordpress.com/2009/12/well-read-cat-small.jpg'
        feeds = []
        seen_urls = set([])
        regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
        for category in self.CATEGORIES:
            (cat_name, tag, max_articles) = category
            tagurl = '' if tag=='' else '/tagged/'+tag
            self.log('Reading category:', cat_name)
            articles = []
            pageno = 1
            while len(articles) < max_articles and pageno < 100:
                page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
                pageno += 1
                self.log('\tReading page:', page)
                try:
                    soup = self.index_to_soup(page)
                except:
                    break
                headers = soup.findAll('h2')
                if len(headers) == .0:
                    break
                for header in headers:
                    atag = header.find('a')
                    url = atag['href']
                    # skip promotionals and duplicate
                    if url.startswith('http://givemesomethingtoread') or url.startswith('/') or url in seen_urls:
                        continue
                    seen_urls.add(url)
                    title = self.tag_to_string(header)
                    self.log('\tFound article:', title)
                    #self.log('\t', url)
                    desc = header.parent.find('blockquote')
                    desc = self.tag_to_string(desc) if desc else ''
                    m = regex.match( url)
                    if m:
                        desc = "[%s] %s" %  (m.group(2), desc)
                    #self.log('\t', desc)
                    date = ''
                    p = header.parent.previousSibling
                    # navigate up to find h3, which contains the date
                    while p:
                        if hasattr(p,'name') and p.name == 'h3':
                            date = self.tag_to_string(p)
                            break
                        p = p.previousSibling
                    articles.append({'title':title,'url':url,'description':desc,'date':date})
                    if len(articles) >= max_articles:
                        break
            if articles:
                feeds.append((cat_name, articles))
        return feeds
--- a/recipes/googlemobileblog.recipe
+++ b/recipes/googlemobileblog.recipe
@ -0,0 +1,11 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1318572445(BasicNewsRecipe):
    title          = u'Google Mobile Blog'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Google Mobile Blog', u'http://googlemobile.blogspot.com/atom.xml')]
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@ -12,13 +12,15 @@ class GN(BasicNewsRecipe):
        EDITION = 0
        __author__ = 'Piotr Kontek'
        title = u'Gość niedzielny'
        description = 'Weekly magazine'
        encoding = 'utf-8'
        no_stylesheets = True
        language = 'pl'
        remove_javascript = True
        temp_files = []
        simultaneous_downloads = 1
        masthead_url = 'http://gosc.pl/files/11/03/12/949089_top.gif'
        title = u'Gość niedzielny'
        articles_are_obfuscated = True
@ -63,7 +65,6 @@ class GN(BasicNewsRecipe):
                    if img != None:
                        a = img.parent
                        self.EDITION = a['href']
                        self.title = img['alt']
                        self.cover_url = 'http://www.gosc.pl' + img['src']
                        if not first:
                            break
@ -94,16 +95,16 @@ class GN(BasicNewsRecipe):
        def find_articles(self, main_block):
                for a in main_block.findAll('div', attrs={'class':'prev_doc2'}):
-                        art = a.find('a')
+						art = a.find('a')
-                        yield {
+						yield {
                                'title' : self.tag_to_string(art),
                                'url'   : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
                                'date'  : '',
                                'description' : ''
                                }
                for a in main_block.findAll('div', attrs={'class':'sr-document'}):
-                        art = a.find('a')
+						art = a.find('a')
-                        yield {
+						yield {
                                'title' : self.tag_to_string(art),
                                'url'   : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
                                'date'  : '',
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -9,6 +9,7 @@ www.guardian.co.uk
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from datetime import date
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 class Guardian(BasicNewsRecipe):
@ -16,16 +17,19 @@ class Guardian(BasicNewsRecipe):
    if date.today().weekday() == 6:
        base_url = "http://www.guardian.co.uk/theobserver"
        cover_pic = 'Observer digital edition'
        masthead_url = 'http://static.guim.co.uk/sys-images/Guardian/Pix/site_furniture/2010/10/19/1287478087992/The-Observer-001.gif'
    else:
        base_url = "http://www.guardian.co.uk/theguardian"
        cover_pic = 'Guardian digital edition'
        masthead_url = 'http://static.guim.co.uk/static/f76b43f9dcfd761f0ecf7099a127b603b2922118/common/images/logos/the-guardian/titlepiece.gif'
    __author__ = 'Seabound and Sujata Raman'
    language = 'en_GB'
-    oldest_article = 7
+    oldest_article              = 7
-    max_articles_per_feed = 100
+    max_articles_per_feed       = 100
-    remove_javascript = True
+    remove_javascript           = True
    encoding                    = 'utf-8'
    # List of section titles to ignore
    # For example: ['Sport']
@ -41,6 +45,16 @@ class Guardian(BasicNewsRecipe):
                        dict(name='div', attrs={'class':["guardian-tickets promo-component",]}),
                        dict(name='ul', attrs={'class':["pagination"]}),
                        dict(name='ul', attrs={'id':["content-actions"]}),
                        # article history link
                        dict(name='a', attrs={'class':["rollover history-link"]}),
                        # "a version of this article ..." speil
                        dict(name='div' , attrs = { 'class' : ['section']}),
                        # "about this article" js dialog
                        dict(name='div', attrs={'class':["share-top",]}),
                        # author picture
                        dict(name='img', attrs={'class':["contributor-pic-small"]}),
                        # embedded videos/captions
                        dict(name='span',attrs={'class' : ['inline embed embed-media']}),
                        #dict(name='img'),
                        ]
    use_embedded_content    = False
@ -67,6 +81,13 @@ class Guardian(BasicNewsRecipe):
    def preprocess_html(self, soup):
          # multiple html sections in soup, useful stuff in the first
          html = soup.find('html')
          soup2 = BeautifulSoup()
          soup2.insert(0,html) 
          soup = soup2  
          for item in soup.findAll(style=True):
              del item['style']
@ -75,6 +96,17 @@ class Guardian(BasicNewsRecipe):
          for tag in soup.findAll(name=['ul','li']):
                tag.name = 'div'
         # removes number next to rating stars
          items_to_remove = []
          rating_container = soup.find('div', attrs = {'class': ['rating-container']})
          if rating_container:
            for item in rating_container:
                if isinstance(item, Tag) and str(item.name) == 'span':
                    items_to_remove.append(item)
          for item in items_to_remove:
            item.extract()
          return soup
    def find_sections(self):
@ -119,10 +151,8 @@ class Guardian(BasicNewsRecipe):
                        }
    def parse_index(self):
-        try:
+        feeds = []
-            feeds = []
+        for title, href in self.find_sections():
-            for title, href in self.find_sections():
+            feeds.append((title, list(self.find_articles(href))))
-                feeds.append((title, list(self.find_articles(href))))
+        return feeds
-            return feeds
+
        except:
            raise NotImplementedError
--- a/recipes/hankyoreh.recipe
+++ b/recipes/hankyoreh.recipe
@ -0,0 +1,47 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
 '''
 Profile to download The Hankyoreh
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Hankyoreh(BasicNewsRecipe):
    title          = u'Hankyoreh'
    language = 'ko'
    description = u'The Hankyoreh News articles'
    __author__  = 'Seongkyoun Yoo'
    oldest_article = 5
    recursions = 1
    max_articles_per_feed = 5
    no_stylesheets         = True
    keep_only_tags    = [
                        dict(name='tr', attrs={'height':['60px']}),
                        dict(id=['fontSzArea'])
                        ]
    remove_tags = [
       dict(target='_blank'),
       dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
       dict(name='iframe', attrs={'width':['590']}),
                  ]
    remove_tags_after  = [
                          dict(target='_top')
                         ]
    feeds = [
    ('All News','http://www.hani.co.kr/rss/'),
    ('Politics','http://www.hani.co.kr/rss/politics/'),
    ('Economy','http://www.hani.co.kr/rss/economy/'),
    ('Society','http://www.hani.co.kr/rss/society/'),
    ('International','http://www.hani.co.kr/rss/international/'),
    ('Culture','http://www.hani.co.kr/rss/culture/'),
    ('Sports','http://www.hani.co.kr/rss/sports/'),
    ('Science','http://www.hani.co.kr/rss/science/'),
    ('Opinion','http://www.hani.co.kr/rss/opinion/'),
    ('Cartoon','http://www.hani.co.kr/rss/cartoon/'),
    ('English Edition','http://www.hani.co.kr/rss/english_edition/'),
    ('Specialsection','http://www.hani.co.kr/rss/specialsection/'),
    ('Hanionly','http://www.hani.co.kr/rss/hanionly/'),
    ('Hkronly','http://www.hani.co.kr/rss/hkronly/'),
    ('Multihani','http://www.hani.co.kr/rss/multihani/'),
    ('Lead','http://www.hani.co.kr/rss/lead/'),
    ('Newsrank','http://www.hani.co.kr/rss/newsrank/'),
           ]
--- a/recipes/hankyoreh21.recipe
+++ b/recipes/hankyoreh21.recipe
@ -0,0 +1,25 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
 '''
 Profile to download The Hankyoreh
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Hankyoreh21(BasicNewsRecipe):
    title          = u'Hankyoreh21'
    language = 'ko'
    description = u'The Hankyoreh21 Magazine articles'
    __author__	= 'Seongkyoun Yoo'
    oldest_article = 20
    recursions = 1
    max_articles_per_feed = 120
    no_stylesheets         = True
    remove_javascript     = True
    keep_only_tags    = [
 						dict(name='font', attrs={'class':'t18bk'}),
 						dict(id=['fontSzArea'])
                        ]
    feeds = [
 	('Hani21','http://h21.hani.co.kr/rss/ '),
           ]
--- a/recipes/heise_online.recipe
+++ b/recipes/heise_online.recipe
@ -1,11 +1,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class AdvancedUserRecipe(BasicNewsRecipe):
-    title = 'heise online'
+    title = 'Heise-online'
    description = 'News vom Heise-Verlag'
    __author__ = 'schuster'
    masthead_url = 'http://www.heise.de/icons/ho/heise_online_logo.gif'
    publisher   = 'Heise Zeitschriften Verlag GmbH & Co. KG'
    use_embedded_content   = False
    language = 'de'
    oldest_article = 2
@ -14,11 +14,10 @@ class AdvancedUserRecipe(BasicNewsRecipe):
    remove_empty_feeds = True
    timeout = 5
    no_stylesheets = True
    encoding = 'utf-8'
    remove_tags_after = dict(name ='p', attrs={'class':'editor'})
-    remove_tags = [{'class':'navi_top_container'},
+    remove_tags = [dict(id='navi_top_container'),
                            dict(id='navi_bottom'),
                            dict(id='mitte_rechts'),
                            dict(id='navigation'),
@ -29,27 +28,31 @@ class AdvancedUserRecipe(BasicNewsRecipe):
                            dict(id='seiten_navi'),
                            dict(id='adbottom'),
                            dict(id='sitemap'),
-                            dict(name='a', href=re.compile(r'^/([a-zA-Z]+/)?')),
+                            dict(name='div', attrs={'id':'sitemap'}),
-                ]
+                            dict(name='ul', attrs={'class':'erste_zeile'}),
                            dict(name='ul', attrs={'class':'zweite_zeile'}),
                            dict(name='div', attrs={'class':'navi_top_container'})]
    feeds =  [
                   ('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
-                   ('iX', 'http://www.heise.de/ix/news/news.rdf'),
+                   ('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
                      ('Technology Review', 'http://www.heise.de/tr/news-atom.xml'),
                   ('mobil', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
                   ('Security', 'http://www.heise.de/security/news/news-atom.xml'),
                   ('Netze', 'http://www.heise.de/netze/rss/netze-atom.xml'),
                   ('Open Source', 'http://www.heise.de/open/news/news-atom.xml'),
                   ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
                   ('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
-                   ('Autos', 'http://www.heise.de/autos/rss/news.rdf'),
+                   ('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
-                   ('Mac & i', 'http://www.heise.de/mac-and-i/news.rdf'),
+                   ('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
                   ('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
                   ('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
                   ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
                   ('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
                   ('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
                   ('iX', 'http://www.heise.de/ix/news/news.rdf'),
                   ('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'),
                   ('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
                   ('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
                   ('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
-                   ('Blog: The World of IT', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
+                   ('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
-                   ('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
+                   ('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')]
             ]
    def print_version(self, url):
        return url + '?view=print'
--- a/recipes/helsingin_sanomat.recipe
+++ b/recipes/helsingin_sanomat.recipe
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1298137661(BasicNewsRecipe):
  title          = u'Helsingin Sanomat'
  __author__ = 'oneillpt'
-  language = 'fi'
+  language              = 'fi'
  oldest_article = 7
  max_articles_per_feed = 100
  no_stylesheets = True
@ -11,21 +11,12 @@ class AdvancedUserRecipe1298137661(BasicNewsRecipe):
  conversion_options = {
                         'linearize_tables' : True
                       }
-  remove_tags = [
+  keep_only_tags = [dict(name='div', attrs={'id':'main-content'}),
-                  dict(name='a', attrs={'id':'articleCommentUrl'}),
+              dict(name='div', attrs={'class':'contentNewsArticle'})]
                  dict(name='p', attrs={'class':'newsSummary'}),
                  dict(name='div', attrs={'class':'headerTools'})
                ]
-  feeds          = [(u'Uutiset - HS.fi', u'http://www.hs.fi/uutiset/rss/'), (u'Politiikka - HS.fi', u'http://www.hs.fi/politiikka/rss/'),
+  feeds          = [(u'Uutiset - HS.fi', u'http://www.hs.fi/uutiset/rss/')
 , (u'Politiikka - HS.fi', u'http://www.hs.fi/politiikka/rss/'),
                     (u'Ulkomaat - HS.fi', u'http://www.hs.fi/ulkomaat/rss/'), (u'Kulttuuri - HS.fi', u'http://www.hs.fi/kulttuuri/rss/'),
                     (u'Kirjat - HS.fi', u'http://www.hs.fi/kulttuuri/kirjat/rss/'), (u'Elokuvat - HS.fi', u'http://www.hs.fi/kulttuuri/elokuvat/rss/')
                     ]
  def print_version(self, url):
    j = url.rfind("/")
    s = url[j:]
    i = s.rfind("?ref=rss")
    if i > 0:
      s = s[:i]
    return "http://www.hs.fi/tulosta" + s
--- a/recipes/histmag.recipe
+++ b/recipes/histmag.recipe
@ -4,56 +4,20 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, matek09, matek09@gmail.com'
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class Histmag(BasicNewsRecipe):
    title          = u'Histmag'
    oldest_article = 7
    max_articles_per_feed = 100
    cover_url='http://histmag.org/grafika/loga/histmag-logo-2-300px.png'
    __author__ = 'matek09'
    description = u"Artykuly historyczne i publicystyczne"
    encoding = 'utf-8'
    #preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),(re.compile(r'<span>'), lambda match: '<br><br><span>')]
    no_stylesheets = True
    language = 'pl'
    remove_javascript = True
    keep_only_tags=[dict(id='article')]
    remove_tags=[dict(name = 'p', attrs = {'class' : 'article-tags'})]
-	title = u'Histmag'
+    feeds          = [(u'Wszystkie', u'http://histmag.org/rss/wszystkie.xml'), (u'Wydarzenia', u'http://histmag.org/rss/wydarzenia.xml'), (u'Recenzje', u'http://histmag.org/rss/recenzje.xml'), (u'Artykuły historyczne', u'http://histmag.org/rss/historia.xml'), (u'Publicystyka', u'http://histmag.org/rss/publicystyka.xml')]
 	__author__ = 'matek09'
 	description = u"Artykuly historyczne i publicystyczne"
 	encoding = 'utf-8'
 	no_stylesheets = True
 	language = 'pl'
 	remove_javascript = True
 	#max_articles_per_feed = 1
 	remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'article'}))
 	remove_tags_after = dict(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
 	#keep_only_tags =[]
 	#keep_only_tags.append(dict(name = 'h2'))
 	#keep_only_tags.append(dict(name = 'p'))
 	remove_tags =[]
 	remove_tags.append(dict(name = 'p', attrs = {'class' : 'podpis'}))
 	remove_tags.append(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
 	remove_tags.append(dict(name = 'img', attrs = {'src' : 'style/buttons/wesprzyjnas-1.jpg'}))
 	preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),
 						(re.compile(r'<span>'), lambda match: '<br><br><span>')]
 	extra_css = '''
 					.left {font-size: x-small}
 					.right {font-size: x-small}
 				'''
 	def find_articles(self, soup):
 		articles = []
 		for div in soup.findAll('div', attrs={'class' : 'text'}):
 			articles.append({
 				'title' : self.tag_to_string(div.h3.a),
 				'url'   : 'http://www.histmag.org/' + div.h3.a['href'],
 				'date'  : self.tag_to_string(div.next('p')).split('|')[0],
 				'description' : self.tag_to_string(div.next('p', podpis=False)),
 				})
 		return articles
 	def parse_index(self):
 		soup = self.index_to_soup('http://histmag.org/?arc=4&dx=0')
 		feeds = []
 		feeds.append((u"Artykuly historyczne", self.find_articles(soup)))
 		soup = self.index_to_soup('http://histmag.org/?arc=5&dx=0')
 		feeds.append((u"Artykuly publicystyczne", self.find_articles(soup)))
 		soup = self.index_to_soup('http://histmag.org/?arc=1&dx=0')
 		feeds.append((u"Wydarzenia", self.find_articles(soup)))
 		return feeds
--- a/recipes/historia_pl.recipe
+++ b/recipes/historia_pl.recipe
@ -8,6 +8,15 @@ class Historia_org_pl(BasicNewsRecipe):
    category       = 'history'
    language       = 'pl'
    oldest_article = 8
    remove_empty_feeds=True
    max_articles_per_feed = 100
-    feeds          = [(u'Artykuły', u'http://www.historia.org.pl/index.php?format=feed&type=rss')]
+    feeds          = [(u'Wszystkie', u'http://www.historia.org.pl/index.php?format=feed&type=rss'),
 		(u'Wiadomości', u'http://www.historia.org.pl/index.php/wiadomosci.feed?type=rss'),
 		(u'Publikacje', u'http://www.historia.org.pl/index.php/publikacje.feed?type=rss'),
 		(u'Publicystyka', u'http://www.historia.org.pl/index.php/publicystyka.feed?type=rss'),
 		(u'Recenzje', u'http://historia.org.pl/index.php/recenzje.feed?type=rss'),
 		(u'Kultura i sztuka', u'http://www.historia.org.pl/index.php/kultura-i-sztuka.feed?type=rss'),
 		(u'Rekonstykcje', u'http://www.historia.org.pl/index.php/rekonstrukcje.feed?type=rss'),
 		(u'Projekty', u'http://www.historia.org.pl/index.php/projekty.feed?type=rss'),
 		(u'Konkursy'), (u'http://www.historia.org.pl/index.php/konkursy.feed?type=rss')]
--- a/recipes/honvedelem.recipe
+++ b/recipes/honvedelem.recipe
@ -1,50 +0,0 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.web.feeds.news import BasicNewsRecipe
 class HunMilNews(BasicNewsRecipe):
    title          = u'Honvedelem.hu'
    oldest_article = 3
    description = u'Katonah\xedrek'
    language = 'hu'
    lang = 'hu'
    encoding = 'windows-1250'
    category = 'news, military'
    no_stylesheets         = True
    __author__ = 'Devilinside'
    max_articles_per_feed = 16
    no_stylesheets = True
    keep_only_tags = [dict(name='div', attrs={'class':'cikkoldal_cikk_cim'}),
 dict(name='div', attrs={'class':'cikkoldal_cikk_alcim'}),
 dict(name='div', attrs={'class':'cikkoldal_datum'}),
 dict(name='div', attrs={'class':'cikkoldal_lead'}),
 dict(name='div', attrs={'class':'cikkoldal_szoveg'}),
 dict(name='img', attrs={'class':'ajanlo_kep_keretes'}),
        ]
    feeds          = [(u'Misszi\xf3k', u'http://www.honvedelem.hu/rss_b?c=22'),
 (u'Aktu\xe1lis hazai h\xedrek', u'http://www.honvedelem.hu/rss_b?c=3'),
 (u'K\xfclf\xf6ldi h\xedrek', u'http://www.honvedelem.hu/rss_b?c=4'),
 (u'A h\xf3nap t\xe9m\xe1ja', u'http://www.honvedelem.hu/rss_b?c=6'),
 (u'Riport', u'http://www.honvedelem.hu/rss_b?c=5'),
 (u'Portr\xe9k', u'http://www.honvedelem.hu/rss_b?c=7'),
 (u'Haditechnika', u'http://www.honvedelem.hu/rss_b?c=8'),
 (u'Programok, esem\xe9nyek', u'http://www.honvedelem.hu/rss_b?c=12')
        ]
--- a/recipes/huntechnet.recipe
+++ b/recipes/huntechnet.recipe
@ -1,41 +0,0 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.web.feeds.news import BasicNewsRecipe
 class HunTechNet(BasicNewsRecipe):
     title          = u'TechNet'
     oldest_article = 3
     description = u'Az ut\xf3bbi 3 nap TechNet h\xedrei'
     language = 'hu'
     lang = 'hu'
     encoding = 'utf-8'
     __author__ = 'Devilinside'
     max_articles_per_feed = 30
     timefmt = ' [%Y, %b %d, %a]'
     remove_tags_before = dict(name='div', attrs={'id':'c-main'})
     remove_tags = [dict(name='div', attrs={'class':'wrp clr'}), 
 {'class' : ['screenrdr','forum','print','startlap','text_small','text_normal','text_big','email']},
                                   ]
     keep_only_tags = [dict(name='div', attrs={'class':'cikk_head box'}),dict(name='div', attrs={'class':'cikk_txt box'})]
     feeds          = [(u'C\xedmlap',
 u'http://www.technet.hu/rss/cimoldal/'), (u'TechTud',
 u'http://www.technet.hu/rss/techtud/'), (u'PDA M\xe1nia',
 u'http://www.technet.hu/rss/pdamania/'), (u'Telefon',
 u'http://www.technet.hu/rss/telefon/'), (u'Sz\xe1m\xedt\xf3g\xe9p',
 u'http://www.technet.hu/rss/notebook/'), (u'GPS',
 u'http://www.technet.hu/rss/gps/')]
--- a/recipes/icons/b365realitatea.png
+++ b/recipes/icons/b365realitatea.png
--- a/recipes/icons/catavencii.png
+++ b/recipes/icons/catavencii.png
--- a/recipes/icons/formulaas.png
+++ b/recipes/icons/formulaas.png
--- a/recipes/icons/infra_pl.png
+++ b/recipes/icons/infra_pl.png
--- a/recipes/icons/la_republica.png
+++ b/recipes/icons/la_republica.png
--- a/recipes/icons/metro_news_nl.png
+++ b/recipes/icons/metro_news_nl.png
--- a/recipes/icons/skylife.png
+++ b/recipes/icons/skylife.png
--- a/recipes/il_giornale.recipe
+++ b/recipes/il_giornale.recipe
@ -1,8 +1,8 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__author__    = 'Gabriele Marini, based on Darko Miletic'
+__author__    = 'Gambarini, based on Darko Miletic'
 __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
-description   = 'Italian daily newspaper - 19-04-2010'
+description   = 'Italian daily newspaper - 09-11-2011'
 '''
 http://www.ilgiornale.it/
@ -11,7 +11,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.web.feeds.news import BasicNewsRecipe
 class IlGiornale(BasicNewsRecipe):
-    __author__        = 'Marini Gabriele'
+    __author__        = 'GAMBARINI'
    description   = 'Italian daily newspaper'
    cover_url      = 'http://www.ilgiornale.it/img_v1/logo.gif'
@ -23,9 +23,8 @@ class IlGiornale(BasicNewsRecipe):
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 7
-    max_articles_per_feed = 50
+    max_articles_per_feed = 100
    use_embedded_content  = False
    recursion             = 100
    no_stylesheets        = True
    conversion_options = {'linearize_tables':True}
@ -38,11 +37,11 @@ class IlGiornale(BasicNewsRecipe):
    def print_version(self, url):
        raw = self.browser.open(url).read()
        soup = BeautifulSoup(raw.decode('utf8', 'replace'))
-        all_print_tags = soup.find('div', {'style':'float:left; width:35%;'})
+        all_print_tags = soup.find('div', {'id':'print_article'})
-        print_link = all_print_tags.contents[1]
+        print_link = all_print_tags.a
-        if all_print_tags is None:
+        if print_link is None:
           return url
-        return  print_link['href']
+        return  'http://www.ilgiornale.it' + print_link['href']
    feeds = [
--- a/recipes/in_gr.recipe
+++ b/recipes/in_gr.recipe
@ -0,0 +1,34 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class ingr(BasicNewsRecipe):
    title                  = 'in.gr'
    __author__             = 'Stelios'
    description            = 'News from Greece'
 #    max_articles_per_feed  = 100
    oldest_article         = 4
    publisher              = 'in.gr'
    category               = 'news, GR'
    language               = 'el'
    encoding               = 'utf8'
    no_stylesheets         = True
    use_embedded_content  = False
    remove_empty_feeds    = True
    encoding              = 'utf8'
    keep_only_tags = [
  dict(name='h1'),
  dict(name='div', attrs={'id' : ['in-news-article']})
                        ]
    remove_tags = [
 dict(name='em', attrs={'class' : ['credits']}),
 dict(name='div', attrs={'class' : ['article-tools-hor', 'promo-banners gAds', 'main', 'article-listen-player', 'article-tools-hor-bttm', 'tools-sec', 'article-tools', 'article-listen-player-ver']})
 ]
    feeds          = [
                 (u'\u0395\u03BB\u03BB\u03AC\u03B4\u03B1', 'http://rss.in.gr/feed/news/greece'),
 	(u'\u0395\u03B9\u03B4\u03AE\u03C3\u03B5\u03B9\u03C2', 'http://rss.in.gr/feed/news'),
 	(u'\u039A\u03CC\u03C3\u03BC\u03BF\u03C2', 'http://rss.in.gr/feed/news/world'),
 	(u'\u0395\u03C0\u03B9\u03C3\u03C4\u03AE\u03BC\u03B7', 'http://rss.in.gr/feed/news/science'),
 	(u'\u03A0\u03BF\u03BB\u03B9\u03C4\u03B9\u03C3\u03BC\u03CC\u03C2', 'http://rss.in.gr/feed/news/culture')
 	]
--- a/recipes/independent.recipe
+++ b/recipes/independent.recipe
@ -1,33 +1,60 @@
-__license__   = 'GPL v3'
+# adapted from old recipe by Darko Miletic <darko.miletic at gmail.com>
 __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.independent.co.uk
 '''
-from calibre.web.feeds.news import BasicNewsRecipe
+import re
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag, NavigableString
 class TheIndependentNew(BasicNewsRecipe):
    # flag to enable/disable article graphics on business pages/some others
    # eg http://www.independent.co.uk/news/world/europe/berlusconi-departure-fails-to-calm-the-markets-6259682.html
    # -max dimensions can be altered using the .pictureContainer img selector in the css
    _FETCH_ARTICLE_GRAPHICS = True
    #Flag to enable/disable image fetching (not business)
    _FETCH_IMAGES = True
     #used for converting rating to stars
    _STAR_URL = 'http://www.independent.co.uk/skins/ind/images/rating_star.png'
    _NO_STAR_URL = 'http://www.independent.co.uk/skins/ind/images/rating_star_grey.png'
    title                   = u'The Independent'
    __author__              = 'Will'
    description             = 'The latest in UK News and World News from The \
                               Independent. Wide range of international and local news, sports \
                               news, commentary and opinion pieces.Independent News - Breaking news \
                               that matters. Your daily comprehensive news source - The \
                               Independent Newspaper'
    publisher               = 'The Independent'
    category                = 'news, UK'
    no_stylesheets          = True
    use_embedded_content    = False
    remove_empty_feeds      = True
    language                = 'en_GB'
    publication_type        = 'newspaper'
    masthead_url            = 'http://www.independent.co.uk/independent.co.uk/editorial/logo/independent_Masthead.png'
    encoding                = 'utf-8'
    remove_tags             =[
                               dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
                               dict(attrs={'class' : ['autoplay','openBiogPopup']}),
                               dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}),
                               dict(attrs={'style' : re.compile('.*')}),
                             ]
    keep_only_tags          =[dict(attrs={'id':'main'})]
    recursions = 0
    # fixes non compliant html nesting and 'marks' article graphics links
    preprocess_regexps      = [
                                (re.compile('<span class="storyTop ">(?P<nested>.*?)</span>', re.DOTALL),
                                lambda match: '<div class="storyTop">' + match.group('nested') + '</div>'),
                                (re.compile('(<strong>.*?[Cc]lick.*?<a.*?((HERE)|([Hh]ere)).*?</strong>)', re.DOTALL),
                                lambda match: '<div class="article-graphic">' + match.group(0) + '</div>'),
                              ]
 class TheIndependent(BasicNewsRecipe):
    title                 = 'The Independent'
    __author__            = 'Darko Miletic'
    description           = 'Independent News - Breaking news, comment and features from The Independent newspaper'
    publisher             = 'The Independent'
    category              = 'news, politics, UK'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'en_GB'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.independent.co.uk/independent.co.uk/images/logo-london.png'
    extra_css             = """
                               h1{font-family: Georgia,serif }
                               body{font-family: Verdana,Arial,Helvetica,sans-serif}
                               img{margin-bottom: 0.4em; display:block}
                               .info,.caption,.credits{font-size: x-small}
                            """
    conversion_options = {
                          'comment'   : description
@ -36,51 +63,442 @@ class TheIndependent(BasicNewsRecipe):
                        , 'language'  : language
                        }
-    remove_tags      =[
+    extra_css             = """
-                        dict(name=['meta','link','object','embed','iframe','base','style'])
+                               h1{font-family: Georgia,serif }
-                        ,dict(attrs={'class':['related-articles','share','googleCols','article-tools','paging','googleArt']})
+                               body{font-family: Verdana,Arial,Helvetica,sans-serif}
-                        ,dict(attrs={'id':['newsVideoPlayer','yahoobook','google-intext']})
+                               img{margin-bottom: 0.4em; display:block}
-                      ]
+                               .starRating img {float: left}
-    keep_only_tags   =[dict(attrs={'id':'article'})]
+                               .starRating {margin-top:0.4em; display: block}
-    remove_attributes=['lang','onclick','width','xmlns:fb']
+                               .image {clear:left; font-size: x-small; color:#888888;}
                               .articleByTimeLocation {font-size: x-small; color:#888888;
                                margin-bottom:0.2em ; margin-top:0.2em ; display:block}
                                .subtitle {clear:left}
                               .column-1 h1 { color: #191919}
                               .column-1 h2 { color: #333333}
                               .column-1 h3 { color: #444444}
                               .column-1 p { color: #777777}
                               .column-1 p,a,h1,h2,h3 { margin: 0; }
                               .column-1 div{color:#888888; margin: 0;}
                               .articleContent {display: block; clear:left;}
                               .storyTop{}
                               .pictureContainer img { max-width: 400px; max-height: 400px;}
                            """
    oldest_article = 1
    max_articles_per_feed = 100
    _processed_urls = []
    feeds = [
              (u'UK'                 , u'http://www.independent.co.uk/news/uk/rss'                 )
             ,(u'World'              , u'http://www.independent.co.uk/news/world/rss'              )
             ,(u'Business'           , u'http://www.independent.co.uk/news/business/rss'           )
             ,(u'People'             , u'http://www.independent.co.uk/news/people/rss'             )
             ,(u'Science'            , u'http://www.independent.co.uk/news/science/rss'            )
             ,(u'Media'              , u'http://www.independent.co.uk/news/media/rss'              )
             ,(u'Education'          , u'http://www.independent.co.uk/news/education/rss'          )
             ,(u'Leading Articles'   , u'http://www.independent.co.uk/opinion/leading-articles/rss')
             ,(u'Comentators'        , u'http://www.independent.co.uk/opinion/commentators/rss'    )
             ,(u'Columnists'         , u'http://www.independent.co.uk/opinion/columnists/rss'      )
             ,(u'Letters'            , u'http://www.independent.co.uk/opinion/letters/rss'         )
             ,(u'Big Question'       , u'http://www.independent.co.uk/extras/big-question/rss'     )
             ,(u'Sport'              , u'http://www.independent.co.uk/sport/rss'                   )
             ,(u'Life&Style'         , u'http://www.independent.co.uk/life-style/rss'              )
             ,(u'Arts&Entertainment' , u'http://www.independent.co.uk/arts-entertainment/rss'      )
             ,(u'Travel'             , u'http://www.independent.co.uk/travel/rss'                  )
             ,(u'Money'              , u'http://www.independent.co.uk/money/rss'                   )
            ]
    def get_article_url(self, article):
-        return article.get('guid',  None)
+        url = super(self.__class__,self).get_article_url(article)
        title = article.get('title', None)
        if title and re.search("^Video:",title):
            return None
        #remove duplicates
        if not (url in self._processed_urls):
            self._processed_urls.append(url)
        else:
            url = None
        return url
    def preprocess_html(self, soup):
-        for item in soup.body.findAll(style=True):
+
-            del item['style']
+        #remove 'advertorial articles'
-        for item in soup.body.findAll(['author','preform']):
+        strapline = soup.find('div',attrs={'class' : re.compile('.*strapLine.*')})
-            item.name='span'
+        if strapline:
-        for item in soup.body.findAll('img'):
+            for para in strapline.findAll('p'):
-            if not item.has_key('alt'):
+                if len(para.contents) and isinstance(para.contents[0],NavigableString) \
-               item['alt'] = 'image'
+                and para.contents[0] == 'ADVERTORIAL FEATURE':
-        for item in soup.body.findAll('div', attrs={'class':['clear-o','body','photoCaption']}):
+                    return None
-            item.name = 'p'
+
-        for item in soup.body.findAll('div'):
+        items_to_extract = []
-            if not item.attrs and not item.contents:
+        slideshow_elements = []
-               item.extract()
+
-        soup2 = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        for item in soup.findAll(attrs={'class' : re.compile("widget.*")}):
-        soup2.body.replaceWith(soup.body)
+            remove = True
-        return soup2
+            pattern = re.compile('((articleContent)|(title))$')
            if (pattern.search(item['class'])) is not None:
                remove = False
            # corrections
            # story content always good
            pattern = re.compile('storyContent')
            if (pattern.search(item['class'])) is not None:
                remove = False
            #images
            pattern = re.compile('slideshow')
            if (pattern.search(item['class'])) is not None:
                if self._FETCH_IMAGES:
                    remove = False
                    slideshow_elements.append(item)
                else:
                    remove = True
            #social widgets always bad
            pattern = re.compile('socialwidget')
            if (pattern.search(item['class'])) is not None:
                remove = True
            if remove:
                items_to_extract.append(item)
        for item in items_to_extract:
            item.extract()
        items_to_extract = []
        if self._FETCH_IMAGES:
            for element in slideshow_elements:
                for item in element.findAll('a',attrs={'href' : re.compile('.*')}):
                    if item.img is not None:
                        #use full size image
                        img = item.findNext('img')
                        img['src'] = item['href']
                        #insert caption if available
                        if img.get('title') and (len(img['title']) > 1):
                            tag = Tag(soup,'h3')
                            text = NavigableString(img['title'])
                            tag.insert(0,text)
                            #picture before text
                            img.extract()
                            item.insert(0,img)
                            item.insert(1,tag)
                        # remove link
                        item.name = "div"
                        item["class"]='image'
                        del item["href"]
        #remove empty subtitles
        """
        currently the subtitle is located in first paragraph after
        sibling <h3 class="subtitle"> tag. This may be 'fixed' at
        some point.
        """
        subtitle = soup.find('h3',attrs={'class' : 'subtitle'})
        if subtitle is not None:
            subtitleText = subtitle.findNext('p')
            if subtitleText is not None:
                if len(subtitleText.contents[0]) <= 1 :
                    subtitleText.extract()
                    subtitle.extract()
        #replace rating numbers with stars
        for item in soup.findAll('div',attrs={ 'class' : 'starRating'}):
            if item is not None:
                soup2 = self._insertRatingStars(soup,item)
            if soup2 is not None:
                soup = soup2
        #remove empty paragraph tags in storyTop which can leave a space
        #between first paragraph and rest of story
        nested_content = False
        storyTop = soup.find('div',attrs={ 'class' : ['storyTop']})
        for item in storyTop.findAll('p'):
            for nested in item:
                if isinstance(nested, Tag):
                    nested_content = True
                    break
            if not nested_content and item.contents is not None and len(item.contents[0]) <= 1 :
                items_to_extract.append(item)
        for item in items_to_extract:
            item.extract()
        items_to_extract = []
        #remove line breaks immediately next to tags with default margins
        #to prevent double line spacing and narrow columns of text
        storyTop = soup.find('div',attrs={ 'class' : ['storyTop']})
        self._remove_undesired_line_breaks_from_tag(storyTop,soup)
        #replace article graphics link with the graphics themselves
        if self._FETCH_ARTICLE_GRAPHICS:
            items_to_insert = []
            for item in soup.findAll('div', attrs={'class' : ['article-graphic']}):
                strong = item.find('strong')
                if not strong:
                    continue
                for child in strong:
                    if isinstance(child,Tag):
                        if str(child.name) == 'a':
                            items_to_insert.extend(self._get_article_graphic(strong,child['href'],soup))
            for item in items_to_insert:
                item[0].replaceWith(item[1])
        for item in items_to_extract:
            item.extract()
        return soup
    def _get_article_graphic(self,old_item,url,soup):
        items_to_insert = []
        if re.search('\.jpg$',str(url)):
            div = Tag(soup,'div')
            div['class'] = 'pictureContainer'
            img = Tag(soup,'img')
            img['src'] = url
            img['alt'] = 'article graphic'
            div.insert(0,img)
            items_to_insert.append((old_item,div,))
            return items_to_insert
        soup2 = self.index_to_soup(url)
        for item in soup2.findAll('div',attrs={'class' : re.compile("widget picture article.*")}):
            items_to_insert.append((old_item,item),)
        return items_to_insert
    def _insertRatingStars(self,soup,item):
        if item.contents is None:
            return
        rating = item.contents[0]
        if not rating.isdigit():
            return None
        rating = int(item.contents[0])
        for i in range(1,6):
            star = Tag(soup,'img')
            if i <= rating:
                star['src'] = self._STAR_URL
            else:
                star['src'] = self._NO_STAR_URL
            star['alt'] = 'star number ' +  str(i)
            item.insert(i,star)
        #item.contents[0] = NavigableString('(' + str(rating) + ')')
        item.contents[0] = ''
    def postprocess_html(self,soup, first_fetch):
        #find broken images and remove captions
        items_to_extract = []
        for item in soup.findAll('div', attrs={'class' : 'image'}):
            img = item.findNext('img')
            if img and img.get('src'):
                # broken images still point to remote url
                pattern = re.compile('http://www.independent.co.uk.*')
                if pattern.match(img["src"]) is not None:
                    caption = img.findNextSibling('h3')
                    if caption is not None:
                        items_to_extract.append(caption)
                    items_to_extract.append(img)
        for item in items_to_extract:
            item.extract()
        return soup
    def _recurisvely_linearise_tag_tree(
        self,
        item,
        linearised= None,
        count=0,
        limit = 100
        ):
        linearised = linearised or []
        count = count + 1
        if count > limit:
            return linearised
        if not (isinstance(item,Tag)):
            return linearised
        for nested in item:
            linearised.append(nested)
            linearised = self._recurisvely_linearise_tag_tree(nested,linearised, count)
        return linearised
    def _get_previous_tag(self,current_index, tag_tree):
        if current_index == 0:
            return None
        else:
            return tag_tree[current_index - 1]
    def _get_next_tag(self,current_index, tag_tree):
        if current_index < len(tag_tree) - 1:
            return tag_tree[current_index + 1]
        else:
            return None
    def _list_match(self,test_str, list_regex):
        for regex in list_regex:
            match = re.match(regex, test_str)
            if match is not None:
                return True
        return False
    def _remove_undesired_line_breaks_from_tag(self,parent,soup):
        if parent is None:
            return
        tag_tree = self._recurisvely_linearise_tag_tree(parent)
        items_to_remove = []
        for item in tag_tree:
            if item == u'\n':
               items_to_remove.append(item)
               continue;
        for item in items_to_remove:
            tag_tree.remove(item)
        spaced_tags = [r'p', r'h\d', r'blockquote']
        tags_to_extract = []
        tags_to_replace = []
        for (i, tag) in enumerate(tag_tree):
            if isinstance(tag, Tag):
                if str(tag) == '<br />':
                    previous_tag = self._get_previous_tag(i, tag_tree)
                    if isinstance(previous_tag, Tag):
                        previous_tag_is_spaced = previous_tag is not None\
                             and self._list_match(str(previous_tag.name),
                                spaced_tags)
                    else:
                        previous_tag_is_spaced = False
                    next_tag = self._get_next_tag(i, tag_tree)
                    if isinstance(next_tag, Tag):
                        next_tag_is_spaced = next_tag is not None\
                             and self._list_match(str(next_tag.name), spaced_tags)
                    else:
                        next_tag_is_spaced = False
                    if previous_tag_is_spaced or next_tag_is_spaced or i == 0\
                         or i == len(tag_tree) - 1:
                        tags_to_extract.append(tag)
                    else:
                        tags_to_replace.append((tag,NavigableString(' '),))
        for pair in tags_to_replace:
            pair[0].replaceWith(pair[1])
        for tag in tags_to_extract:
            tag.extract()
    feeds = [
        (u'News - UK',
         u'http://www.independent.co.uk/news/uk/?service=rss'),
        (u'News - World',
         u'http://www.independent.co.uk/news/world/?service=rss'),
        (u'News - Business',
         u'http://www.independent.co.uk/news/business/?service=rss'),
        (u'News - People',
         u'http://www.independent.co.uk/news/people/?service=rss'),
        (u'News - Science',
         u'http://www.independent.co.uk/news/science/?service=rss'),
        (u'News - Media',
         u'http://www.independent.co.uk/news/media/?service=rss'),
        (u'News - Education',
         u'http://www.independent.co.uk/news/education/?service=rss'),
        (u'News - Obituaries',
         u'http://www.independent.co.uk/news/obituaries/?service=rss'),
        (u'News - Corrections',
         u'http://www.independent.co.uk/news/corrections/?service=rss'
         ),
        (u'Opinion',
         u'http://www.independent.co.uk/opinion/?service=rss'),
        (u'Environment',
         u'http://www.independent.co.uk/environment/?service=rss'),
        (u'Sport - Athletics',
         u'http://www.independent.co.uk/sport/general/athletics/?service=rss'
         ),
        (u'Sport - Cricket',
         u'http://www.independent.co.uk/sport/cricket/?service=rss'),
        (u'Sport - Football',
         u'http://www.independent.co.uk/sport/football/?service=rss'),
        (u'Sport - Golf',
         u'http://www.independent.co.uk/sport/golf/?service=rss'),
        (u'Sport - Motor racing',
         u'http://www.independent.co.uk/sport/motor-racing/?service=rss'
         ),
        (u'Sport - Olympics',
         u'http://www.independent.co.uk/sport/olympics/?service=rss'),
        (u'Sport - Racing',
         u'http://www.independent.co.uk/sport/racing/?service=rss'),
        (u'Sport - Rugby League',
         u'http://www.independent.co.uk/sport/general/rugby-league/?service=rss'),
        (u'Sport - Rugby Union',
         u'http://www.independent.co.uk/sport/rugby/rugby-union/?service=rss'
         ),
        (u'Sport - Sailing',
         u'http://www.independent.co.uk/sport/general/sailing/?service=rss'
         ),
        (u'Sport - Tennis',
         u'http://www.independent.co.uk/sport/tennis/?service=rss'),
        (u'Sport - Others',
         u'http://www.independent.co.uk/sport/general/others/?service=rss'
         ),
        (u'Life & Style - Fashion',
         u'http://www.independent.co.uk/life-style/fashion/?service=rss'
         ),
        (u'Life & Style -Food & Drink',
         u'http://www.independent.co.uk/life-style/food-and-drink/?service=rss'
         ),
        (u'Life & Style - Health and Families',
         u'http://www.independent.co.uk/life-style/health-and-families/?service=rss'
         ),
        (u'Life & Style - House & Home',
         u'http://www.independent.co.uk/life-style/house-and-home/'),
        (u'Life & Style - History',
         u'http://www.independent.co.uk/life-style/history/?service=rss'
         ),
        (u'Life & Style - Gadgets & Tech',
         u'http://www.independent.co.uk/life-style/gadgets-and-tech/?service=rss'
         ),
        (u'Life & Style - Motoring',
         u'http://www.independent.co.uk/life-style/motoring/?service=rss'
         ),
        (u'Arts & Ents - Art',
         u'http://www.independent.co.uk/arts-entertainment/art/?service=rss'
         ),
        (u'Arts & Ents - Architecture',
         u'http://www.independent.co.uk/arts-entertainment/architecture/?service=rss'
         ),
        (u'Arts & Ents - Music',
         u'http://www.independent.co.uk/arts-entertainment/music/?service=rss'
         ),
        (u'Arts & Ents - Classical',
         u'http://www.independent.co.uk/arts-entertainment/classical/?service=rss'
         ),
        (u'Arts & Ents - Films',
         u'http://www.independent.co.uk/arts-entertainment/films/?service=rss'
         ),
        (u'Arts & Ents - TV',
         u'http://www.independent.co.uk/arts-entertainment/tv/?service=rss'
         ),
        (u'Arts & Ents - Theatre and Dance',
         u'http://www.independent.co.uk/arts-entertainment/theatre-dance/?service=rss'
         ),
        (u'Arts & Ents - Comedy',
         u'http://www.independent.co.uk/arts-entertainment/comedy/?service=rss'
         ),
        (u'Arts & Ents - Books',
         u'http://www.independent.co.uk/arts-entertainment/books/?service=rss'
         ),
        (u'Travel', u'http://www.independent.co.uk/travel/?service=rss'
         ),
        (u'Money', u'http://www.independent.co.uk/money/?service=rss'),
        (u'IndyBest',
         u'http://www.independent.co.uk/extras/indybest/?service=rss'),
        ]
--- a/recipes/infra_pl.recipe
+++ b/recipes/infra_pl.recipe
@ -0,0 +1,17 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class INFRA(BasicNewsRecipe):
    title          = u'INFRA'
    oldest_article = 7
    max_articles_per_feed = 100
    __author__        = 'fenuks'
    description   = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
    cover_url      = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
    category       = 'UFO'
    language       = 'pl'
    max_articles_per_feed = 100
    no_stylesheers=True
    remove_tags_before=dict(name='h2', attrs={'class':'contentheading'})
    remove_tags_after=dict(attrs={'class':'pagenav'})
    remove_tags=[dict(attrs={'class':'pagenav'})]
    feeds          = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')]
--- a/recipes/japan_news.recipe
+++ b/recipes/japan_news.recipe
@ -0,0 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class NewsOnJapan(BasicNewsRecipe):
    title          = u'News On Japan'
    language       = 'en'
    __author__     = 'Krittika Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25
    use_embedded_content = False
    no_stylesheets = True
    auto_cleanup = True
    feeds          = [
 ('News',
 'http://newsonjapan.com/rss/top.xml'),
 ]
--- a/recipes/japan_times.recipe
+++ b/recipes/japan_times.recipe
@ -44,7 +44,11 @@ class JapanTimes(BasicNewsRecipe):
        return rurl.partition('?')[0]
    def print_version(self, url):
-        return url.replace('/cgi-bin/','/print/')
+        if '/rss/' in url:
            return url.replace('.jp/rss/','.jp/print/')
        if '/text/' in url:
            return url.replace('.jp/text/','.jp/print/')
        return url
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
--- a/recipes/korben.recipe
+++ b/recipes/korben.recipe
@ -0,0 +1,20 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1318619728(BasicNewsRecipe):
    title          = u'Korben'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Korben', u'http://feeds2.feedburner.com/KorbensBlog-UpgradeYourMind')]
    def get_masthead_url(self):
        masthead = 'http://korben.info/wp-content/themes/korben-steaw/hab/logo.png'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
--- a/recipes/korea_herald.recipe
+++ b/recipes/korea_herald.recipe
@ -10,9 +10,9 @@ class KoreaHerald(BasicNewsRecipe):
    language = 'en'
    description = u'Korea Herald News articles'
    __author__	= 'Seongkyoun Yoo'
-    oldest_article = 10
+    oldest_article = 15
    recursions = 3
-    max_articles_per_feed = 10
+    max_articles_per_feed = 15
    no_stylesheets         = True
    keep_only_tags    = [
 						dict(id=['contentLeft', '_article'])
@ -25,7 +25,6 @@ class KoreaHerald(BasicNewsRecipe):
       ]
    feeds = [
 	('All News','http://www.koreaherald.com/rss/020000000000.xml'),
    ('National','http://www.koreaherald.com/rss/020100000000.xml'),
    ('Business','http://www.koreaherald.com/rss/020200000000.xml'),
    ('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
--- a/recipes/kstar.recipe
+++ b/recipes/kstar.recipe
@ -1,7 +1,7 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1282101454(BasicNewsRecipe):
-    title = 'Kansascity Star'
+    title = 'Kansas City Star'
    language = 'en'
    __author__ = 'TonytheBookworm'
    description = 'www.kansascity.com feed'
--- a/recipes/kyungyhang
+++ b/recipes/kyungyhang
@ -0,0 +1,37 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
 '''
 Profile to download The Kyungyhang
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Kyungyhang(BasicNewsRecipe):
    title          = u'Kyungyhang'
    language = 'ko'
    description = u'The Kyungyhang Shinmun articles'
    __author__	= 'Seongkyoun Yoo'
    oldest_article = 20
    recursions = 2
    max_articles_per_feed = 20
    no_stylesheets         = True
    remove_javascript     = True
    keep_only_tags    = [
                        dict(name='div', attrs ={'class':['article_title_wrap']}),
                        dict(name='div', attrs ={'class':['article_txt']})
                        ]
    remove_tags_after = dict(id={'sub_bottom'})
    remove_tags = [
       dict(name='iframe'),
       dict(id={'TdHot'}),
       dict(name='div', attrs={'class':['btn_list','bline','linebottom','bestArticle']}),
       dict(name='dl', attrs={'class':['CL']}),
       dict(name='ul', attrs={'class':['tab']}),
       ]
    feeds = [
 	('All News','http://www.khan.co.kr/rss/rssdata/total_news.xml'),
           ]
--- a/recipes/l_espresso.recipe
+++ b/recipes/l_espresso.recipe
@ -11,7 +11,7 @@ __description__ = 'Italian weekly magazine'
 from calibre.web.feeds.news import BasicNewsRecipe
 class Espresso(BasicNewsRecipe):
-    __author__     = 'Lorenzo Vigentini, Gabriele Marini'
+    __author__     = 'Lorenzo Vigentini, Gabriele Marini, Krittika Goyal'
    description    = 'Italian weekly magazine'
    cover_url      = 'http://espresso.repubblica.it/images/logo_espresso.gif'
@ -26,10 +26,9 @@ class Espresso(BasicNewsRecipe):
    oldest_article        = 16
    max_articles_per_feed = 100
    use_embedded_content  = False
    recursion             = 10
    remove_javascript     = True
    no_stylesheets = True
    auto_cleanup = True
    feeds          = [
@ -42,36 +41,3 @@ class Espresso(BasicNewsRecipe):
                       (u'Chiesa: HomePage', u'http://data.kataweb.it/rss/chiesa/homepage/it'),
                       (u'Chiesa: Speciali e Focus', u'http://data.kataweb.it/rss/chiesa/speciali_e_focus/it')
                    ]
    def print_version(self,url):
        print url[7:25]
        if url[7:25] == 'temi.repubblica.it':
          return url + '/?printpage=undefined'
        elif url[7:25] == 'www.chiesa.espress':
          return url
        return url + '/&print=true'
    keep_only_tags     = [
                            dict(name='div', attrs={'class':['testo','copertina','occhiello','firma','didascalia','content-second-right','detail-articles','titolo-local','generic-articles']}),
                            dict(name='div', attrs={'class':['generic-articles','summary','detail-articles']}),
                            dict(name='div', attrs={'id':['content-second-right','content2']})
                          ]
    remove_tags        = [
                            dict(name='div',attrs={'class':['servizi','aggiungi','label-web','bottom-mobile','box-abbonamenti','box-cerca','big','little','stampaweb']}),
                            dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left','menutext']}),
                            dict(name='ul',attrs={'id':'user-utility'}),
                            dict(name=['script','noscript','iframe'])
                         ]
 #    extra_css = '''
 #                h1 {font-family:Times New Roman,"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:24px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
 #                h2 {font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
 #                h3 {color:#333333;font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
 #                h4 {color:#333333; font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
 #                h5 {color:#333333; font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
 #                .firma {color:#333333;font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;}
 #                .testo {font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
 #                '''
--- a/recipes/la_republica.recipe
+++ b/recipes/la_republica.recipe
@ -1,51 +1,77 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__    = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
-description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version'
+description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version'
 '''
 http://www.repubblica.it/
 '''
 import re
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
 class LaRepubblica(BasicNewsRecipe):
-    __author__        = 'Lorenzo Vigentini, Gabriele Marini'
+    title                   = 'La Repubblica'
-    description   = 'Italian daily newspaper'
+    __author__              = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
    description             = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
    masthead_url            = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
    publisher               = 'Gruppo editoriale L\'Espresso'
    category                = 'News, politics, culture, economy, general interest'
    language                = 'it'
    timefmt                 = '[%a, %d %b, %Y]'
    oldest_article          = 5
    encoding                = 'utf8'
    use_embedded_content    = False
    no_stylesheets          = True
    publication_type        = 'newspaper'
    articles_are_obfuscated = True    
    temp_files              = []    
    extra_css               = """
                               img{display: block}
                              """
-    cover_url      = 'http://www.repubblica.it/images/homepage/la_repubblica_logo.gif'
+    remove_attributes = ['width','height','lang','xmlns:og','xmlns:fb']
    title          = u'La Repubblica'
    publisher      = 'Gruppo editoriale L\'Espresso'
    category       = 'News, politics, culture, economy, general interest'
-    language       = 'it'
+    preprocess_regexps = [
-    timefmt        = '[%a, %d %b, %Y]'
+        (re.compile(r'.*?<head>', re.DOTALL|re.IGNORECASE), lambda match: '<head>'),
-
+        (re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE), lambda match: '<head><title>'),
-    oldest_article = 5
+        (re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE), lambda match: '</title></head>')
-    max_articles_per_feed = 100
+    ]
    use_embedded_content  = False
    recursion             = 10
    remove_javascript = True
    no_stylesheets    = True
    def get_article_url(self, article):
-        link = article.get('id', article.get('guid', None))
+        link = BasicNewsRecipe.get_article_url(self, article)
-        if link is None:
+        if link and not '.repubblica.it/' in link:
-            return article
+            link2 = article.get('id', article.get('guid', None))
-        return link
+            if link2:
                link = link2
        return link.rpartition('?')[0]        
-    keep_only_tags     = [dict(name='div', attrs={'class':'articolo'}),
+    def get_obfuscated_article(self, url):
-                          dict(name='div', attrs={'class':'body-text'}),
+        count = 0
-#                          dict(name='div', attrs={'class':'page-content'}),
+        while (count < 10):
            try:
                response = self.browser.open(url)
                html = response.read()
                count = 10
            except:
                print "Retrying download..."
            count += 1        
        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
    keep_only_tags     = [
                          dict(attrs={'class':'articolo'}),
                          dict(attrs={'class':'body-text'}),
                          dict(name='p', attrs={'class':'disclaimer clearfix'}),
-                          dict(name='div', attrs={'id':'contA'})
+                          dict(attrs={'id':'contA'})
                         ]
    remove_tags        = [
-                            dict(name=['object','link']),
+                            dict(name=['object','link','meta','iframe','embed']),
                            dict(name='span',attrs={'class':'linkindice'}),
                            dict(name='div', attrs={'class':'bottom-mobile'}),
                            dict(name='div', attrs={'id':['rssdiv','blocco']}),
@ -76,3 +102,11 @@ class LaRepubblica(BasicNewsRecipe):
                       (u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
                      ]
    def preprocess_html(self, soup):
        for item in soup.findAll(['hgroup','deresponsabilizzazione','per']):
            item.name = 'div'
            item.attrs = []            
        for item in soup.findAll(style=True):
            del item['style']           
        return soup
--- a/recipes/lepoint.recipe
+++ b/recipes/lepoint.recipe
@ -0,0 +1,75 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
 '''
 LePoint.fr
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class lepoint(BasicNewsRecipe):
    title                  = 'Le Point'
    __author__             = 'calibre'
    description            = 'Actualités'
    encoding               = 'utf-8'
    publisher              = 'LePoint.fr'
    category               = 'news, France, world'
    language               = 'fr'
    use_embedded_content   = False
    timefmt                = ' [%d %b %Y]'
    max_articles_per_feed  = 15
    no_stylesheets         = True
    remove_empty_feeds     = True
    filterDuplicates       = True
    extra_css = '''
                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    .chapo {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
                    .info_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .media_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    remove_tags = [
            dict(name='iframe'),
            dict(name='div', attrs={'class':['entete_chroniqueur']}),
            dict(name='div', attrs={'class':['col_article']}),
            dict(name='div', attrs={'class':['signature_article']}),
            dict(name='div', attrs={'class':['util_font util_article']}),
            dict(name='div', attrs={'class':['util_article bottom']})
    ]
    keep_only_tags    = [dict(name='div', attrs={'class':['page_article']})]
    remove_tags_after  = dict(name='div', attrs={'class':['util_article bottom']})
    feeds = [
        (u'À la une', 'http://www.lepoint.fr/rss.xml'),
        ('International', 'http://www.lepoint.fr/monde/rss.xml'),
        ('Tech/Web', 'http://www.lepoint.fr/high-tech-internet/rss.xml'),
        ('Sciences', 'http://www.lepoint.fr/science/rss.xml'),
        ('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
        (u'Socièté', 'http://www.lepoint.fr/societe/rss.xml'),
        ('Politique', 'http://www.lepoint.fr/politique/rss.xml'),
        (u'Médias', 'http://www.lepoint.fr/medias/rss.xml'),
        ('Culture', 'http://www.lepoint.fr/culture/rss.xml'),
        (u'Santé', 'http://www.lepoint.fr/sante/rss.xml'),
        ('Sport', 'http://www.lepoint.fr/sport/rss.xml')
    ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    def get_masthead_url(self):
        masthead = 'http://www.lepoint.fr/images/commun/logo.png'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
--- a/recipes/letsgetcritical.recipe
+++ b/recipes/letsgetcritical.recipe
@ -0,0 +1,94 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class LetsGetCritical(BasicNewsRecipe):
    title          = u"Let's Get Critical"
    description    = 'Curation / aggregation of criticisms of the arts and culture '
    language = 'en'
    __author__     = 'barty on mobileread.com forum'
    max_articles_per_feed = 100
    no_stylesheets = False
    timefmt        = ' [%a, %d %b, %Y]'
    oldest_article = 365
    auto_cleanup   = True
    INDEX          = 'http://www.letsgetcritical.org'
    CATEGORIES     = [
        # comment out categories you don't want
        # (user friendly name, system name, max number of articles to load)
        ('Architecture','architecture',30),
        ('Art','art',30),
        ('Books','books',30),
        ('Design','design',30),
        ('Digital','digital',30),
        ('Food','food',30),
        ('Movies','movies',30),
        ('Music','music',30),
        ('Television','television',30),
        ('Other articles','',10)
        ]
    def parse_index(self):
        self.cover_url = 'http://www.letsgetcritical.org/wp-content/themes/lets_get_critical/images/lgc.jpg'
        feeds = []
        seen_urls = set([])
        regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
        for category in self.CATEGORIES:
            (cat_name, tag, max_articles) = category
            tagurl = '' if tag=='' else '/category/'+tag.lower()
            self.log('Reading category:', cat_name)
            articles = []
            pageno = 1
            while len(articles) < max_articles and pageno < 100:
                page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
                pageno += 1
                self.log('\tReading page:', page)
                try:
                    soup = self.index_to_soup(page)
                except:
                    break
                posts = soup.findAll('div',attrs={'class':'post_multi'})
                if len(posts) == 0:
                    break
                for post in posts:
                    dt = post.find('div',attrs={'class':'title'})
                    atag = dt.find('a')
                    url = atag['href']
                    # skip promotionals and duplicate
                    if url.startswith('http://letsgetcritical') or url.startswith('/') or url in seen_urls:
                        continue
                    seen_urls.add(url)
                    title = self.tag_to_string(atag)
                    self.log('\tFound article:', title)
                    self.log('\t', url)
                    desc = post.find('blockquote')
                    desc = self.tag_to_string(desc) if desc else ''
                    m = regex.match( url)
                    if m:
                        desc = "[%s] %s" %  (m.group(2), desc)
                    #self.log('\t', desc)
                    date = ''
                    p = post.previousSibling
                    # navigate up sibling to find date
                    while p:
                        if hasattr(p,'class') and p['class'] == 'singledate':
                            date = self.tag_to_string(p)
                            break
                        p = p.previousSibling
                    articles.append({'title':title,'url':url,'description':desc,'date':date})
                    if len(articles) >= max_articles:
                        break
            if articles:
                feeds.append((cat_name, articles))
        return feeds
--- a/recipes/lexpress.recipe
+++ b/recipes/lexpress.recipe
@ -0,0 +1,73 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
 '''
 Lexpress.fr
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class lepoint(BasicNewsRecipe):
    title                  = 'L\'express'
    __author__             = 'calibre'
    description            = 'Actualités'
    encoding               = 'cp1252'
    publisher              = 'LExpress.fr'
    category               = 'Actualité, France, Monde'
    language               = 'fr'
    use_embedded_content   = False
    timefmt                = ' [%d %b %Y]'
    max_articles_per_feed  = 15
    no_stylesheets         = True
    remove_empty_feeds     = True
    filterDuplicates       = True
    extra_css = '''
                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    .current_parent, p.heure, .ouverture {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    #contenu-article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                    .entete { font-weiht:bold;}
                '''
    remove_tags = [
            dict(name='iframe'),
            dict(name='div', attrs={'class':['barre-outil-fb']}),
            dict(name='div', attrs={'class':['barre-outils']}),
            dict(id='bloc-sommaire'),
            dict(id='footer-article')
    ]
    keep_only_tags    = [dict(name='div', attrs={'class':['bloc-article']})]
    remove_tags_after  = dict(id='content-article')
    feeds = [
        (u'À la une', 'http://www.lexpress.fr/rss/alaune.xml'),
        ('International', 'http://www.lexpress.fr/rss/monde.xml'),
        ('Tech/Web', 'http://www.lexpress.fr/rss/high-tech.xml'),
        (u'Sciences/Santé', 'http://www.lexpress.fr/rss/science-et-sante.xml'),
        (u'Envronnement', 'http://www.lexpress.fr/rss/environnement.xml'),
        ('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
        (u'Socièté', 'http://www.lexpress.fr/rss/societe.xml'),
        ('Politique', 'http://www.lexpress.fr/rss/politique.xml'),
        (u'Médias', 'http://www.lexpress.fr/rss/medias.xml'),
        ('Culture', 'http://www.lexpress.fr/rss/culture.xml'),
        ('Sport', 'http://www.lexpress.fr/rss/sport.xml')
    ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    def get_masthead_url(self):
        masthead = 'http://static.lexpress.fr/imgstat/logo_lexpress.gif'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
--- a/recipes/liberation.recipe
+++ b/recipes/liberation.recipe
@ -9,39 +9,72 @@ liberation.fr
 from calibre.web.feeds.news import BasicNewsRecipe
 class Liberation(BasicNewsRecipe):
    title                 = u'Liberation'
-    __author__            = 'Darko Miletic'
+    __author__            = 'calibre'
-    description           = 'News from France'
+    description           = 'Actualités'
-    language = 'fr'
+    category               = 'Actualités, France, Monde'
    language              = 'fr'
-    oldest_article        = 7
+    use_embedded_content   = False
-    max_articles_per_feed = 100
+    timefmt                = ' [%d %b %Y]'
-    no_stylesheets        = True
+    max_articles_per_feed  = 15
-    use_embedded_content  = False
+    no_stylesheets         = True
    remove_empty_feeds     = True
    filterDuplicates       = True
-    html2lrf_options = ['--base-font-size', '10']
+    extra_css = '''
                    h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
                    h4, h5, h2.rubrique,  {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .mna-body, entry-body  {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    keep_only_tags    = [
-                           dict(name='h1')
+                  dict(name='div', attrs={'class':'article'})
-                          #,dict(name='div', attrs={'class':'object-content text text-item'})
+                  ,dict(name='div', attrs={'class':'text-article m-bot-s1'})
-                          ,dict(name='div', attrs={'class':'article'})
+                  ,dict(name='div', attrs={'class':'entry'})
-                          #,dict(name='div', attrs={'class':'articleContent'})
+                  ,dict(name='div', attrs={'class':'col_contenu'})
-                          ,dict(name='div', attrs={'class':'entry'})
+    ]
-                        ]
+
-    remove_tags_after = [ dict(name='div',attrs={'class':'toolbox extra_toolbox'}) ]
+    remove_tags_after = [
        dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']})
        ,dict(name='p',attrs={'class':['chapo']})
        ,dict(id='_twitter_facebook')
    ]
    remove_tags    = [
-                        dict(name='p', attrs={'class':'clear'})
+                        dict(name='iframe')
-                       ,dict(name='ul', attrs={'class':'floatLeft clear'})
+                        ,dict(name='a', attrs={'class':'lnk-comments'})
-                       ,dict(name='div', attrs={'class':'clear floatRight'})
+                        ,dict(name='div', attrs={'class':'toolbox'})
-                       ,dict(name='object')
+                        ,dict(name='ul', attrs={'class':'share-box'})
-                       ,dict(name='div', attrs={'class':'toolbox'})
+                        ,dict(name='ul', attrs={'class':'tool-box'})
-                       ,dict(name='div', attrs={'class':'cartridge cartridge-basic-bubble cat-zoneabo'})
+                        ,dict(name='ul', attrs={'class':'rub'})
-                       #,dict(name='div', attrs={'class':'clear block block-call-items'})
+                        ,dict(name='p',attrs={'class':['chapo']})
-                       ,dict(name='div', attrs={'class':'block-content'})
+                        ,dict(name='p',attrs={'class':['tag']})
                        ,dict(name='div',attrs={'class':['blokLies']})
                        ,dict(name='div',attrs={'class':['alire']})
                        ,dict(id='_twitter_facebook')
                     ]
    feeds          = [
-                         (u'La une', u'http://www.liberation.fr/rss/laune')
+                         (u'La une', u'http://rss.liberation.fr/rss/9/')
-                        ,(u'Monde' , u'http://www.liberation.fr/rss/monde')
+                        ,(u'Monde' , u'http://www.liberation.fr/rss/10/')
-                        ,(u'Sports', u'http://www.liberation.fr/rss/sports')
+                        ,(u'Économie', u'http://www.liberation.fr/rss/13/')
                        ,(u'Politiques', u'http://www.liberation.fr/rss/11/')
                        ,(u'Société', u'http://www.liberation.fr/rss/12/')
                        ,(u'Cinéma', u'http://www.liberation.fr/rss/58/')
                        ,(u'Écran', u'http://www.liberation.fr/rss/53/')
                        ,(u'Sports', u'http://www.liberation.fr/rss/12/')
                     ]
    def get_masthead_url(self):
        masthead = 'http://s0.libe.com/libe/img/common/logo-liberation-150.png'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
--- a/recipes/los_tiempos_bo.recipe
+++ b/recipes/los_tiempos_bo.recipe
@ -22,7 +22,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
    publication_type      = 'newspaper'
    delay                 = 1
    remove_empty_feeds    = True
-    cover_url             = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_md_1.jpg')
+    cover_url             = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_gd_1.jpg')
    masthead_url          = 'http://www.lostiempos.com/img_stat/logo_tiempos_sin_beta.jpg'
    extra_css             = """ body{font-family: Arial,Helvetica,sans-serif }
                                img{margin-bottom: 0.4em}
--- a/recipes/men24_gr.recipe
+++ b/recipes/men24_gr.recipe
@ -0,0 +1,43 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Men24(BasicNewsRecipe):
    title          = 'Men24.gr'
    __author__             = 'Stelios'
    description            = 'Greek Mens portal'
    oldest_article = 14
    max_articles_per_feed = 100
    language = 'el'
    cover_url = 'http://www.men24.gr/ast/img/men24Logo.jpg'
    category               = 'magazines, GR'
    language               = 'el'
    encoding               = 'windows-1253'
    no_stylesheets         = True
    use_embedded_content  = False
    remove_empty_feeds    = True
    extra_css       = '''
                        .artPrintTitle{font-family :Arial,Helvetica,sans-serif; font-weight: bold; font-size:large;}
                        .artPrintSubtitle{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
                     '''
    remove_tags = [
  dict(name='td', attrs={'class':['artPrintCategory']}),
  dict(name='table', attrs={'class':['footer']}),
  dict(name='img')
 ]
    feeds = [
        (u'\u038C\u03BB\u03B5\u03C2 \u03BF\u03B9 \u03B5\u03B9\u03B4\u03AE\u03C3\u03B5\u03B9\u03C2', 'http://www.men24.gr/svc/rss/lastNews/'),
        (u'\u03A3\u03C4\u03C5\u03BB', 'http://www.men24.gr/svc/rss/categoryNews/?category=style'),
        (u'Fitness', 'http://www.men24.gr/svc/rss/categoryNews/?category=fitness'),
        (u'Gadgets', 'http://www.men24.gr/svc/rss/categoryNews/?category=gadgets'),
        (u'\u0394\u03B9\u03B1\u03C3\u03BA\u03AD\u03B4\u03B1\u03C3\u03B7', 'http://www.men24.gr/svc/rss/categoryNews/?category=fun'),
        (u'\u03A7\u03C1\u03AE\u03BC\u03B1 \u03BA\u03B1\u03B9 \u039A\u03B1\u03C1\u03B9\u03AD\u03C1\u03B1', 'http://www.men24.gr/svc/rss/categoryNews/?category=money'),
        (u'Special Edition', 'http://www.men24.gr/svc/rss/categoryNews/?category=special'),
        (u'\u0388\u03C1\u03C9\u03C4\u03B1\u03C2 \u03BA\u03B1\u03B9 Sex', 'http://www.men24.gr/svc/rss/categoryNews/?category=love'),
        (u'\u0386\u03BD\u03C4\u03C1\u03B5\u03C2 \u03C4\u03BF\u03C5 24', 'http://www.men24.gr/svc/rss/categoryNews/?category=men'),
        (u'\u0393\u03C5\u03BD\u03B1\u03AF\u03BA\u03B5\u03C2', 'http://www.men24.gr/svc/rss/categoryNews/?category=women'),
        (u'\u039F\u03B4\u03B7\u03B3\u03BF\u03AF', 'http://www.men24.gr/svc/rss/categoryNews/?category=guides'),
        (u'\u03A4\u03B6\u03CC\u03B3\u03BF\u03C2', 'http://www.men24.gr/svc/rss/categoryNews/?category=gamble')
 ]
    def print_version(self, url):
        return url.replace('.asp', '.print.asp')
--- a/recipes/merco_press.recipe
+++ b/recipes/merco_press.recipe
@ -0,0 +1,27 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class MercoPress(BasicNewsRecipe):
    title = u'Merco Press'
    description = u"Read News, Stories and Insight Analysis from Latin America and Mercosur. Politics, Economy, Business and Investments in South America."
    cover_url = 'http://en.mercopress.com/web/img/en/mercopress-logo.gif'
    __author__ = 'Russell Phillips'
    language = 'en'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    extra_css = 'img{padding-bottom:1ex; display:block; text-align: center;}'
    remove_tags = [dict(name='a')]
    feeds = [('Antarctica', 'http://en.mercopress.com/rss/antarctica'),
        ('Argentina', 'http://en.mercopress.com/rss/argentina'),
        ('Brazil', 'http://en.mercopress.com/rss/brazil'),
        ('Falkland Islands', 'http://en.mercopress.com/rss/falkland-islands'),
        ('International News', 'http://en.mercopress.com/rss/international'),
        ('Latin America', 'http://en.mercopress.com/rss/latin-america'),
        ('Mercosur', 'http://en.mercopress.com/rss/mercosur'),
        ('Paraguay', 'http://en.mercopress.com/rss/paraguay'),
        ('United States', 'http://en.mercopress.com/rss/united-states'),
        ('Uruguay://en.mercopress.com/rss/uruguay')]
--- a/recipes/metro_news_nl.recipe
+++ b/recipes/metro_news_nl.recipe
@ -1,41 +1,94 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 from calibre.utils.magick import Image
 ''' Version 1.2, updated cover image to match the changed website.
 added info date on title
 version 1.4 Updated tags, delay and added autoclean 22-09-2011
 version 1.5 Changes due to changes in site
 version 1.6 Added css, removed auto cleanup, added buitenland section, added use_embedded_content, added remove_attributes
    Added some processing on pictures
    Removed links in html
    Removed extre white characters
    changed handling of self closing span
 Version 1.7 11-11-2011 Changed oldest_article back to 1.5
    changed è into &egrave;
    updated remove tags
    removed keep_only tags
 '''
 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title = u'Metro Nieuws NL'
-# Version 1.2, updated cover image to match the changed website.
+    oldest_article = 1.5
 # added info date on title
    oldest_article = 2
    max_articles_per_feed = 100
    __author__     = u'DrMerry'
    description    = u'Metro Nederland'
    language       = u'nl'
    simultaneous_downloads = 5
    timeout = 2
    #delay          = 1
-    auto_cleanup = True
+    center_navbar  = True
-    auto_cleanup_keep = '//div[@class="article-image-caption-2column"]|//div[@id="date"]'
+    #auto_cleanup = True
    #auto_cleanup_keep = '//div[@class="article-image-caption-2column"]/*|//div[@id="date"]/*|//div[@class="article-image-caption-3column"]/*'
    timefmt        = ' [%A, %d %b %Y]'
    no_stylesheets = True
    remove_javascript = True
    remove_empty_feeds = True
    cover_url      = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
    publication_type = 'newspaper'
-    remove_tags_before = dict(name='div', attrs={'id':'date'})
+    remove_tags_before = dict(id='date')
-    remove_tags_after = dict(name='div', attrs={'id':'column-1-3'})
+    remove_tags_after = dict(name='div', attrs={'class':'article-body'})
    encoding              = 'utf-8'
-    extra_css = 'body{font-size:12px} #date, .article-image-caption {font-size: 0.583em} h2 {font-size: 0.917em} p.small, span, li, li span span, p, b, i, u, p.small.article-paragraph,  p.small.article-paragraph p, p.small.article-paragraph span, p span, span {font-size: 0.833em} h1 {font-size: 1em}'
+    remove_attributes = ['style', 'font', 'width', 'height']
    use_embedded_content = False
    conversion_options = {
        'authors'        : 'Metro Nederland',
        'author_sort'    : 'Metro Nederland',
        'publisher'      : 'DrMerry/Metro Nederland'
    }
    extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
        #date {clear: both;margin-left: 19px;font-size: 11px;font-weight: 300;color: #616262;height: 15px;}\
        .article-box-fact.module-title {clear:both;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
        h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
        .article-body p{padding-bottom:10px;}div.column-1-3{margin-left: 19px;padding-right: 9px;}\
        div.column-1-2 {display: inline;padding-right: 7px;}\
        p.article-image-caption {font-size: 12px;font-weight: 300;color: #616262;margin-top: 5px;} \
        p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
        div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
        div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
        img {border:0px;} .img-mask {position:absolute;top:0px;left:0px;}'
-    remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
+    remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap', 'related-links'
        'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links',
-        'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', 'article-page-auto-pushes', 'footer-edit']}),
+        'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools',
-        dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar']}),
+        'article1','article-page-auto-pushes', 'footer-edit','clear']}),
        dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar', 'share-and-byline']}),
        dict(name='iframe')]
    preprocess_regexps = [(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>|<!--.*?-->)', re.DOTALL|re.IGNORECASE),lambda match: ''),
        (re.compile(r'(&nbsp;|\s\s)+\s*', re.DOTALL|re.IGNORECASE),lambda match: ' '),
        (re.compile(r'([\s>])([^\s>]+)(<span[^>]+) />', re.DOTALL|re.IGNORECASE),
            lambda match: match.group(1) + match.group(3) + '>' + match.group(2) + '</span>'),
        ]
    def postprocess_html(self, soup, first):
        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
            iurl = tag['src']
            img = Image()
            img.open(iurl)
            img.trim(0)
            img.save(iurl)
        return soup
    feeds = [
        (u'Binnenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-3'),
        (u'Economie', u'http://www.metronieuws.nl/rss.xml?c=1278070988-0'),
        (u'Den Haag', u'http://www.metronieuws.nl/rss.xml?c=1289013337-3'),
        (u'Rotterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-2'),
        (u'Amsterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-1'),
        (u'Buitenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-4'),
        (u'Columns', u'http://www.metronieuws.nl/rss.xml?c=1277377288-17'),
        (u'Entertainment', u'http://www.metronieuws.nl/rss.xml?c=1277377288-2'),
        (u'Dot', u'http://www.metronieuws.nl/rss.xml?c=1283166782-12'),
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@ -4,24 +4,29 @@ __copyright__ = '2010-2011, Eddie Lau'
 # Region - Hong Kong, Vancouver, Toronto
 __Region__ = 'Hong Kong'
 # Users of Kindle 3 with limited system-level CJK support
-# please replace the following "True" with "False".
+# please replace the following "True" with "False". (Default: True)
 __MakePeriodical__ = True
-# Turn below to True if your device supports display of CJK titles
+# Turn below to True if your device supports display of CJK titles (Default: False)
 __UseChineseTitle__ = False
-# Set it to False if you want to skip images
+# Set it to False if you want to skip images (Default: True)
 __KeepImages__ = True
-# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source
+# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
 __UseLife__ = True
-# (HK only) It is to disable the column section which is now a premium content
+# (HK only) It is to disable premium content (Default: False)
-__InclCols__ = False
+__InclPremium__ = False
-# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats
+# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
-__ParsePFF__ = False
+__ParsePFF__ = True
-# (HK only) Turn below to True if you wish hi-res images
+# (HK only) Turn below to True if you wish hi-res images (Default: False)
 __HiResImg__ = False
 # Override the date returned by the program if specifying a YYYYMMDD below
 __Date__ = ''
 '''
 Change Log:
 2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
 2011/10/19: fix a bug in txt source parsing
 2011/10/17: disable fetching of premium content, also improved txt source parsing
 2011/10/04: option to get hi-res photos for the articles
 2011/09/21: fetching "column" section is made optional. 
 2011/09/18: parse "column" section stuff from source text file directly.
@ -72,7 +77,7 @@ class MPRecipe(BasicNewsRecipe):
                          dict(attrs={'class':['content']}),  # for content from txt
                          dict(attrs={'class':['photo']}),
                          dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}),  # content in printed version of life.mingpao.com
-                          dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}), # images for source from life.mingpao.com
+                          dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
                          dict(attrs={'class':['images']})   # for images from txt
                          ]
        if __KeepImages__:
@ -169,13 +174,22 @@ class MPRecipe(BasicNewsRecipe):
        return dt_local
    def get_fetchdate(self):
-        return self.get_dtlocal().strftime("%Y%m%d")
+        if __Date__ <> '':
            return __Date__
        else:
            return self.get_dtlocal().strftime("%Y%m%d")
    def get_fetchformatteddate(self):
-        return self.get_dtlocal().strftime("%Y-%m-%d")
+        if __Date__ <> '':
            return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
        else:
            return self.get_dtlocal().strftime("%Y-%m-%d")
    def get_fetchday(self):
-        return self.get_dtlocal().strftime("%d")
+        if __Date__ <> '':
            return __Date__[6:8]
        else:
            return self.get_dtlocal().strftime("%d")
    def get_cover_url(self):
        if __Region__ == 'Hong Kong':
@ -208,11 +222,14 @@ class MPRecipe(BasicNewsRecipe):
                                           (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
                                          ]:
-                    articles = self.parse_section2(url, keystr)
+                    if __InclPremium__ == True:
                        articles = self.parse_section2_txt(url, keystr)
                    else:
                        articles = self.parse_section2(url, keystr)
                    if articles:
                        feeds.append((title, articles))
-                if __InclCols__ == True:
+                if __InclPremium__ == True:
                    # parse column section articles directly from .txt files
                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
                                              ]:
@ -253,7 +270,7 @@ class MPRecipe(BasicNewsRecipe):
                #    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
                for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
-                    articles = self.parse_section2(url, keystr)
+                    articles = self.parse_section2_txt(url, keystr)
                    if articles:
                        feeds.append((title, articles))
@ -270,11 +287,11 @@ class MPRecipe(BasicNewsRecipe):
                for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
                                          ]:
-                    articles = self.parse_section2(url, keystr)
+                    articles = self.parse_section2_txt(url, keystr)
                    if articles:
                        feeds.append((title, articles))
-                if __InclCols__ == True:
+                if __InclPremium__ == True:
                    # parse column section articles directly from .txt files
                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
                                              ]:
@ -333,7 +350,7 @@ class MPRecipe(BasicNewsRecipe):
            url = 'http://news.mingpao.com/' + dateStr + '/' +url
            # replace the url to the print-friendly version
            if __ParsePFF__ == True:
-                if url.rfind('Redirect') <> -1:
+                if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
                    url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
                    url = re.sub('%2F.*%2F', '/', url)
                    title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
@ -349,6 +366,8 @@ class MPRecipe(BasicNewsRecipe):
    # parse from life.mingpao.com
    def parse_section2(self, url, keystr):
        br = mechanize.Browser()
        br.set_handle_redirect(False)
        self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href=True)
@ -359,9 +378,13 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(i)
            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
-                url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
+                try: 
-                current_articles.append({'title': title, 'url': url, 'description': ''})
+                    br.open_novisit(url)
-                included_urls.append(url)
+                    url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
                    current_articles.append({'title': title, 'url': url, 'description': ''})
                    included_urls.append(url)
                except:
 				    print 'skipping a premium article'
        current_articles.reverse()
        return current_articles
@ -467,53 +490,8 @@ class MPRecipe(BasicNewsRecipe):
    # preprocess those .txt and javascript based files
    def preprocess_raw_html(self, raw_html, url):
-        #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
+        new_html = raw_html
-        if __HiResImg__ == True:
+        if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
            # TODO: add a _ in front of an image url
            if url.rfind('news.mingpao.com') > -1:
                imglist =  re.findall('src="?.*?jpg"', raw_html)
                br = mechanize.Browser()
                br.set_handle_redirect(False)
                for img in imglist:
                    gifimg = img.replace('jpg"', 'gif"')
                    try:
                        br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
                        raw_html = raw_html.replace(img, gifimg)
                    except:
                        # find the location of the first _
                        pos = img.find('_')
                        if pos > -1:
                            # if found, insert _ after the first _
                            newimg = img[0:pos] + '_' + img[pos:]
                            raw_html = raw_html.replace(img, newimg)
                        else:
                            # if not found, insert _ after "
                            raw_html = raw_html.replace(img[1:], '"_' + img[1:])
            elif url.rfind('life.mingpao.com') > -1:
                imglist = re.findall('src=\'?.*?jpg\'', raw_html)
                br = mechanize.Browser()
                br.set_handle_redirect(False)
                #print 'Img list: ', imglist, '\n'
                for img in imglist:
                    gifimg = img.replace('jpg\'', 'gif\'')
                    try:
                        #print 'Original: ', url
                        #print 'To append: ', "/../" + gifimg[5:len(gifimg)-1]
                        gifurl = re.sub(r'dailynews.*txt', '', url)
                        #print 'newurl: ', gifurl + gifimg[5:len(gifimg)-1]
                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
                        #print 'URL: ', url + "/../" + gifimg[5:len(gifimg)-1]
                        #br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
                        raw_html = raw_html.replace(img, gifimg)
                    except:
                        #print 'GIF not found'
                        pos = img.rfind('/')
                        newimg = img[0:pos+1] + '_' + img[pos+1:]
                        #print 'newimg: ', newimg
                        raw_html = raw_html.replace(img, newimg)
        if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
            return raw_html
        else:
            if url.rfind('_print.htm') <> -1:
                # javascript based file
                splitter = re.compile(r'\n')
@ -548,7 +526,7 @@ class MPRecipe(BasicNewsRecipe):
                        photo = photo.replace('</td>', '<br>')
                        photo = photo.replace('class="photo"', '')
                        new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
-                return new_raw_html + '</body></html>'
+                new_html = new_raw_html + '</body></html>'
            else: 
                # .txt based file
                splitter = re.compile(r'\n') # Match non-digits
@ -557,27 +535,104 @@ class MPRecipe(BasicNewsRecipe):
                title_started = False
                met_article_start_char = False
                for item in splitter.split(raw_html):
                    item = item.strip()
                    if item.startswith(u'\u3010'):
                        met_article_start_char = True
                        new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
                    else:
                        if next_is_img_txt == False:
-                            if item.startswith('='):
+                            if item.startswith("=@"):
                                print 'skip movie link'
                            elif item.startswith("=?"):
                                next_is_img_txt = True
-                                new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
+                                new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
                            elif item.startswith('=='):
                                next_is_img_txt = True
                                if False:
                                    # TODO: check existence of .gif first
                                    newimg = '_' + item[2:].strip() + '.jpg'
                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
                                else:
                                    new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
                            elif item.startswith('='):
                                next_is_img_txt = True
                                if False:
                                    # TODO: check existence of .gif first
                                    newimg = '_' + item[1:].strip() + '.jpg'
                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
                                else:
                                    new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
                            else:
-                                if met_article_start_char == False:
+                                if next_is_img_txt == False and met_article_start_char == False:
-                                    if title_started == False:
+                                    if item <> '':
-                                        new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
+                                        if title_started == False:
-                                        title_started = True
+                                            #print 'Title started at ', item
-                                    else:
+                                            new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
-                                        new_raw_html = new_raw_html + item + '\n'
+                                            title_started = True
                                        else:
                                            new_raw_html = new_raw_html + item + '\n'
                                else:
                                    new_raw_html = new_raw_html + item + '<p>\n'
                        else:
                            next_is_img_txt = False
                            new_raw_html = new_raw_html + item + '\n'
-                return new_raw_html + '</div></body></html>'
+                new_html = new_raw_html + '</div></body></html>'
        #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
        if __HiResImg__ == True:
            # TODO: add a _ in front of an image url
            if url.rfind('news.mingpao.com') > -1: 
                imglist =  re.findall('src="?.*?jpg"', new_html)
                br = mechanize.Browser()
                br.set_handle_redirect(False)
                for img in imglist:
                    gifimg = img.replace('jpg"', 'gif"')
                    try: 
                        br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
                        new_html = new_html.replace(img, gifimg)
                    except: 
                        # find the location of the first _
                        pos = img.find('_')
                        if pos > -1:
                            # if found, insert _ after the first _
                            newimg = img[0:pos] + '_' + img[pos:]
                            new_html = new_html.replace(img, newimg)
                        else: 
                            # if not found, insert _ after "
                            new_html = new_html.replace(img[1:], '"_' + img[1:])
            elif url.rfind('life.mingpao.com') > -1:
                imglist = re.findall('src=\'?.*?jpg\'', new_html)
                br = mechanize.Browser()
                br.set_handle_redirect(False)
                #print 'Img list: ', imglist, '\n'
                for img in imglist:
                    #print 'Found img: ', img
                    gifimg = img.replace('jpg\'', 'gif\'')
                    try:
                        gifurl = re.sub(r'dailynews.*txt', '', url)
                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
                        new_html = new_html.replace(img, gifimg)
                    except:
                        pos = img.rfind('/')
                        newimg = img[0:pos+1] + '_' + img[pos+1:]
                        new_html = new_html.replace(img, newimg)
                # repeat with src quoted by double quotes, for text parsed from src txt
                imglist = re.findall('src="?.*?jpg"', new_html)
                for img in imglist:
                    #print 'Found img: ', img
                    gifimg = img.replace('jpg"', 'gif"')
                    try:
                        #print 'url', url
                        pos = url.rfind('/')
                        gifurl = url[:pos+1]
                        #print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
                        new_html = new_html.replace(img, gifimg)
                    except:
                        pos = img.find('"')
                        newimg = img[0:pos+1] + '_' + img[pos+1:]
                        #print 'Use hi-res img', newimg
                        new_html = new_html.replace(img, newimg)
        return new_html
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
@ -678,7 +733,7 @@ class MPRecipe(BasicNewsRecipe):
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
-                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else ('Untitled Article'),
+                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
                                    play_order=po, author=auth, description=desc)
                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
--- a/recipes/new_london_day.recipe
+++ b/recipes/new_london_day.recipe
@ -8,7 +8,7 @@ class AdvancedUserRecipe1294342201(BasicNewsRecipe):
    title          = u'New London Day'
    __author__  = 'Being'
    description = 'State, local and business news from New London, CT'
-    language = 'en_GB'
+    language = 'en'
    oldest_article = 1
    max_articles_per_feed = 200
--- a/recipes/newsbeast.recipe
+++ b/recipes/newsbeast.recipe
@ -0,0 +1,48 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class newsbeast(BasicNewsRecipe):
    title                  = 'Newsbeast'
    __author__             = 'Stelios'
    description            = 'News from Greece'
    oldest_article         = 2
    max_articles_per_feed  = 100
    publisher              = 'newsbeast'
    category               = 'news, GR'
    language               = 'el'
    encoding               = 'utf8'
    no_stylesheets         = True
    use_embedded_content  = False
    remove_empty_feeds    = True
    encoding              = 'utf8'
    keep_only_tags = [
  dict(name='div', attrs={'class' : ['article-title']}),
 # dict(name='img', attrs={'class' : ['article_photo']}),
 #If enabled feeds exceede 15MB
  dict(name='div', attrs={'class' : ['txt']})
 ]
    remove_tags = [
  dict(name='table', attrs={'id':['artFoot']}),
  dict(name='img'),
 #If removed feeds exceede 15MB
  dict(name='p', attrs={'class':['article-details']})
 ]
    feeds          = [
                 (u'\u0395\u03BB\u03BB\u03AC\u03B4\u03B1', 'http://www.newsbeast.gr/feeds/greece'),
 	(u'\u039A\u03CC\u03C3\u03BC\u03BF\u03C2', 'http://www.newsbeast.gr/feeds/world'),
 	(u'\u03A0\u03BF\u03BB\u03B9\u03C4\u03B9\u03BA\u03AE', 'http://www.newsbeast.gr/feeds/politiki'),
 	(u'\u039F\u03B9\u03BA\u03BF\u03BD\u03BF\u03BC\u03AF\u03B1', 'http://www.newsbeast.gr/feeds/financial'),
 	(u'\u0391\u03B8\u03BB\u03B7\u03C4\u03B9\u03BA\u03AC', 'http://www.newsbeast.gr/feeds/sports'),
 	(u'\u039A\u03BF\u03B9\u03BD\u03C9\u03BD\u03AF\u03B1', 'http://www.newsbeast.gr/feeds/society'),
 	(u'\u03A0\u03B5\u03C1\u03B9\u03B2\u03AC\u03BB\u03BB\u03BF\u03BD', 'http://www.newsbeast.gr/feeds/environment'),
 	(u'Media', 'http://www.newsbeast.gr/feeds/media'),
 	(u'\u0394\u03B9\u03B1\u03C3\u03BA\u03AD\u03B4\u03B1\u03C3\u03B7', 'http://www.newsbeast.gr/feeds/entertainment'),
 	(u'Lifestyle', 'http://www.newsbeast.gr/feeds/lifestyle'),
 	(u'\u03A4\u03B5\u03C7\u03BD\u03BF\u03BB\u03BF\u03B3\u03AF\u03B1', 'http://www.newsbeast.gr/feeds/technology'),
 	(u'\u0391\u03C5\u03C4\u03BF\u03BA\u03AF\u03BD\u03B7\u03C4\u03BF', 'http://www.newsbeast.gr/feeds/car'),
 	(u'\u0393\u03C5\u03BD\u03B1\u03AF\u03BA\u03B1', 'http://www.newsbeast.gr/feeds/woman'),
 	(u'\u03A5\u03B3\u03B5\u03AF\u03B1', 'http://www.newsbeast.gr/feeds/health'),
 	(u'\u03A0\u03BF\u03BB\u03B9\u03C4\u03B9\u03C3\u03BC\u03CC\u03C2', 'http://www.newsbeast.gr/feeds/culture'),
 	(u'\u038C,\u03C4\u03B9 \u03BD\u03B1 \u03BD\u03B1\u03B9', 'http://www.newsbeast.gr/feeds/weird')
 	]
--- a/recipes/nin.recipe
+++ b/recipes/nin.recipe
@ -1,16 +1,12 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.nin.co.rs
 '''
 import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from contextlib import closing
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre import entity_to_unicode
 class Nin(BasicNewsRecipe):
    title                  = 'NIN online'
@ -29,6 +25,7 @@ class Nin(BasicNewsRecipe):
    use_embedded_content   = False
    language               = 'sr'
    publication_type       = 'magazine'
    masthead_url           = 'http://www.nin.co.rs/img/head/logo.jpg'
    extra_css              = """
                                 @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
                                 body{font-family: Verdana, Lucida, sans1, sans-serif}
@ -72,64 +69,18 @@ class Nin(BasicNewsRecipe):
    def get_cover_url(self):
        cover_url = None
        soup = self.index_to_soup(self.INDEX)
-        link_item = soup.find('img',attrs={'width':'100','border':'0'})
+        for item in soup.findAll('a', href=True):
-        if link_item:
+            if item['href'].startswith('/pages/issue.php?id='):
-           cover_url = self.PREFIX + link_item['src']
+               simg = item.find('img')
               if simg:
                   return self.PREFIX + item.img['src']
        return cover_url
-    def parse_index(self):
+    feeds          = [(u'NIN Online', u'http://www.nin.co.rs/misc/rss.php?feed=RSS2.0')]
        articles = []
        count = 0
        soup = self.index_to_soup(self.INDEX)
        for item in soup.findAll('a',attrs={'class':'lmeninavFont'}):
            count = count +1
            if self.test and count > 2:
               return articles
            section  = self.tag_to_string(item)
            feedlink = self.PREFIX + item['href']
            feedpage = self.index_to_soup(feedlink)
            self.report_progress(0, _('Fetching feed')+' %s...'%(section))
            inarts   = []
            for art in feedpage.findAll('span',attrs={'class':'artTitle'}):
                alink = art.parent
                url   = self.PREFIX + alink['href']
                title = self.tag_to_string(art)
                sparent = alink.parent
                alink.extract()
                description = self.tag_to_string(sparent)
                date = strftime(self.timefmt)
                inarts.append({
                                  'title'      :title
                                 ,'date'       :date
                                 ,'url'        :url
                                 ,'description':description
                                })
            articles.append((section,inarts))
        return articles
-    def index_to_soup(self, url_or_raw, raw=False):
+    def get_article_url(self, article):
-        if re.match(r'\w+://', url_or_raw):
+        url = BasicNewsRecipe.get_article_url(self, article)
-            open_func = getattr(self.browser, 'open_novisit', self.browser.open)
+        return url.replace('.co.yu', '.co.rs')
            with closing(open_func(url_or_raw)) as f:
                _raw = f.read()
            if not _raw:
                raise RuntimeError('Could not fetch index from %s'%url_or_raw)
        else:
            _raw = url_or_raw
        if raw:
            return _raw
        if not isinstance(_raw, unicode) and self.encoding:
            if callable(self.encoding):
                _raw = self.encoding(_raw)
            else:
                _raw = _raw.decode(self.encoding, 'replace')
        massage = list(BeautifulSoup.MARKUP_MASSAGE)
        enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding
        massage.append((re.compile(r'&(\S+?);'), lambda match:
            entity_to_unicode(match, encoding=enc)))
        massage.append((re.compile(r'[\x00-\x08]+'), lambda match:
            ''))
        return BeautifulSoup(_raw, markupMassage=massage)
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
--- a/recipes/novaya_gazeta.recipe
+++ b/recipes/novaya_gazeta.recipe
@ -10,9 +10,8 @@ class AdvancedUserRecipe1286819935(BasicNewsRecipe):
    remove_attributes = ['style']
    language = 'ru'
-    feeds = [(u'Articles', u'http://www.novayagazeta.ru/rss_number.xml')]
+    feeds = [(u'Articles', u'http://www.novayagazeta.ru/rss/all.xml')]
    def print_version(self, url):
-        return url + '?print=true'
+        return '%s%s' % (url, '?print=1')
--- a/recipes/omgubuntu.recipe
+++ b/recipes/omgubuntu.recipe
@ -0,0 +1,20 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1318619832(BasicNewsRecipe):
    title          = u'OmgUbuntu'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Omg Ubuntu', u'http://feeds.feedburner.com/d0od')]
    def get_masthead_url(self):
        masthead = 'http://cdn.omgubuntu.co.uk/wp-content/themes/omgubuntu/images/logo.png'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
--- a/recipes/osnews_pl.recipe
+++ b/recipes/osnews_pl.recipe
@ -23,7 +23,7 @@ class OSNewsRecipe(BasicNewsRecipe):
    oldest_article = 7
    max_articles_per_feed = 100
-
+    cover_url='http://osnews.pl/wp-content/themes/osnews/img/logo.png'
    extra_css = '''
        .news-heading {font-size:150%}
        .newsinformations li {display:inline;}
@ -44,7 +44,9 @@ class OSNewsRecipe(BasicNewsRecipe):
        dict(name = 'div', attrs = {'class' : 'sociable'}),
        dict(name = 'div', attrs = {'class' : 'post_prev'}),
        dict(name = 'div', attrs = {'class' : 'post_next'}),
-        dict(name = 'div', attrs = {'class' : 'clr'})
+        dict(name = 'div', attrs = {'class' : 'clr'}),
        dict(name = 'div', attrs = {'class' : 'tw_button'}),
        dict(name = 'div', attrs = {'style' : 'width:56px;height:60px;float:left;margin-right:10px'})
    ]
    preprocess_regexps = [(re.compile(u'</span>Komentarze: \(?[0-9]+\)? ?<span'), lambda match: '</span><span')]
--- a/recipes/penguin_news.recipe
+++ b/recipes/penguin_news.recipe
@ -0,0 +1,17 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class MercoPress(BasicNewsRecipe):
    title          = u'Penguin News'
    description = u"Penguin News: the Falkland Islands' only newspaper."
    cover_url = 'http://www.penguin-news.com/templates/rt_syndicate_j15/images/logo/light/logo1.png'
    language = 'en'
    __author__ = 'Russell Phillips'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    extra_css  = 'img{padding-bottom:1ex; display:block; text-align: center;}'
    feeds = [(u'Penguin News - Falkland Islands', u'http://www.penguin-news.com/index.php?format=feed&amp;type=rss')]
--- a/recipes/phoronix.recipe
+++ b/recipes/phoronix.recipe
@ -0,0 +1,47 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
 '''
 Fetch phoronix.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class cdnet(BasicNewsRecipe):
    title                  = 'Phoronix'
    __author__             = 'calibre'
    description            = 'Actualités Phoronix'
    encoding               = 'utf-8'
    publisher              = 'Phoronix.com'
    category               = 'news, IT, linux'
    language               = 'en'
    use_embedded_content   = False
    timefmt                = ' [%d %b %Y]'
    max_articles_per_feed  = 25
    no_stylesheets         = True
    remove_empty_feeds     = True
    filterDuplicates       = True
    extra_css = '''
                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    h2 {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .KonaBody {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    remove_tags = []
    remove_tags_before = dict(id='phxcms_content_phx')
    remove_tags_after  = dict(name='div', attrs={'class':'KonaBody'})
    feeds =  [('Phoronix', 'http://feeds.feedburner.com/Phoronix')]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/protagon.recipe
+++ b/recipes/protagon.recipe
@ -0,0 +1,26 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class protagon(BasicNewsRecipe):
    title                  = 'Protagon'
    __author__             = 'Stelios'
    description            = 'Opinion articles in Greek'
    oldest_article         = 7
    max_articles_per_feed  = 100
    publisher              = 'Various'
    category               = 'GR'
    language               = 'el'
    encoding               = 'utf8'
    no_stylesheets         = True
    use_embedded_content  = False
    remove_empty_feeds    = True
    keep_only_tags = [
     dict(name='h1', attrs={'id' : ['title']}),
     dict(name='div', attrs={'class' : ['freetext']})
 ]
    feeds          = [
                     (u'\u0398\u03AD\u03BC\u03B1\u03C4\u03B1', 'http://www.protagon.gr/rss?i=protagon.el.8emata')
 ]
--- a/recipes/radikal_tr.recipe
+++ b/recipes/radikal_tr.recipe
@ -42,6 +42,9 @@ class Radikal_tr(BasicNewsRecipe):
              ,(u'Politika'    , u'http://www.radikal.com.tr/d/rss/Rss_98.xml'     )
              ,(u'Dis Haberler', u'http://www.radikal.com.tr/d/rss/Rss_100.xml'    )
              ,(u'Ekonomi'     , u'http://www.radikal.com.tr/d/rss/Rss_101.xml'    )
              ,(u'Radikal Iki'    , u'http://www.radikal.com.tr/d/rss/Rss_42.xml')
              ,(u'Radikal Hayat'  , u'http://www.radikal.com.tr/d/rss/Rss_41.xml' )
              ,(u'Radikal Kitap'    , u'http://www.radikal.com.tr/d/rss/Rss_40.xml'     )
            ]
    def print_version(self, url):
--- a/recipes/real_world_economics_review.recipe
+++ b/recipes/real_world_economics_review.recipe
@ -0,0 +1,19 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class Real_world_economics_review(BasicNewsRecipe):
    title          = u'Real-world economis review blog'
    oldest_article = 7
    max_articles_per_feed = 100
    use_embedded_content = False
    __author__ = 'Julio Map'
    language = 'en'
    no_stylesheets = True
    keep_only_tags = dict(name='div', attrs={'id':'main'})
    remove_tags = [dict(name='div', attrs={'id':'postpath'}),
        dict(name='div', attrs={'class':'robots-nocontent sd-block sd-social sd-social-icon-text sd-sharing'}),
        dict(name='div', attrs={'class':'sharedaddy sd-sharing-enabled'})
        ]
    feeds          = [(u'Real-World Economics Review Blog', u'http://rwer.wordpress.com/feed/')]
--- a/recipes/rstones.recipe
+++ b/recipes/rstones.recipe
@ -29,22 +29,7 @@ class RollingStones(BasicNewsRecipe):
    max_articles_per_feed = 25
    use_embedded_content  = False
    no_stylesheets = True
-
+    auto_cleanup = True
    remove_javascript     = True
    #####################################################################################
    # cleanup section                                                                   #
    #####################################################################################
    keep_only_tags       = [
                            dict(name='div', attrs={'class':['c65l']}),
                            dict(name='div', attrs={'id':['col1']}),
                           ]
    remove_tags = [
                    dict(name='div', attrs={'class': ['storyActions upper','storyActions lowerArticleNav']}),
                    dict(name='div', attrs={'id': ['comments','related']}),
                  ]
    feeds          = [
                       (u'News', u'http://www.rollingstone.com/siteServices/rss/allNews'),
@ -58,25 +43,7 @@ class RollingStones(BasicNewsRecipe):
-    def get_article_url(self, article):
+    def print_version(self, url):
-        return article.get('guid',  None)
+        return url +'?print=true'
    def append_page(self, soup, appendtag, position):
        '''
        Some are the articles are multipage so the below function
        will get the articles that have <next>
        '''
        pager = soup.find('li',attrs={'class':'next'})
        if pager:
           nexturl = pager.a['href']
           soup2 = self.index_to_soup(nexturl)
           texttag = soup2.find('div', attrs={'id':'storyTextContainer'})
           for it in texttag.findAll(style=True):
               del it['style']
           newpos = len(texttag.contents)
           self.append_page(soup2,texttag,newpos)
           texttag.extract()
           appendtag.insert(position,texttag)
--- a/recipes/salon.recipe
+++ b/recipes/salon.recipe
@ -11,17 +11,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class Salon_com(BasicNewsRecipe):
    title = 'Salon.com'
-    __author__ = 'cix3'
+    __author__ = 'Kovid Goyal'
    description = 'Salon.com - Breaking news, opinion, politics, entertainment, sports and culture.'
    timefmt = ' [%b %d, %Y]'
    language = 'en'
    oldest_article = 7
    max_articles_per_feed = 100
-
+    auto_cleanup = True
-    remove_tags = [dict(name='div', attrs={'class':['ad_content', 'clearfix']}), dict(name='hr'), dict(name='img')]
+    auto_cleanup_keep = '//div[@class="art"]'
-
+    remove_empty_feeds = True
    remove_tags_before = dict(name='h2')
    feeds = [
        ('News & Politics', 'http://feeds.salon.com/salon/news'),
@ -40,5 +39,5 @@ class Salon_com(BasicNewsRecipe):
            ]
    def print_version(self, url):
-        return url.replace('/index.html', '/print.html')
+        return url + '/print/'
--- a/recipes/science_aas.recipe
+++ b/recipes/science_aas.recipe
@ -27,12 +27,12 @@ class ScienceAAS(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open(self.LOGIN)
-            br.select_form(name='registered_users_form')
+            br.select_form(nr=1)
            br['username'] = self.username
            br['code'    ] = self.password
            br.submit()
        return br
-    keep_only_tags = [ dict(name='div', attrs={'id':'LegacyContent'}) ]
+    keep_only_tags = [ dict(name='div', attrs={'id':'content-block'}) ]
    feeds       = [(u"Science: Current Issue", u'http://www.sciencemag.org/rss/current.xml')]
--- a/recipes/science_news.recipe
+++ b/recipes/science_news.recipe
@ -40,7 +40,7 @@ class Sciencenews(BasicNewsRecipe):
                    ,dict(name='div', attrs={'class': 'embiggen'})
                  ]
-    feeds       = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')]
+    feeds       = [(u"Science News / News Items", u'http://sciencenews.org/index.php/feed/type/news/name/news.rss/view/feed/name/all.rss')]
    def get_cover_url(self):
        cover_url = None
--- a/recipes/sigma_live.recipe
+++ b/recipes/sigma_live.recipe
@ -0,0 +1,14 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class sigmalive(BasicNewsRecipe):
    title          = u'SigmaLive'
    __author__ = 'Stelios'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    category               = 'news, CY'
    description            = 'Cypriot News'
    language               = 'el'
    encoding               = 'utf8'
    feeds          = [(u'sigmalive', u'http://sigmalive.com/rss/latest')]
--- a/Show More
+++ b/Show More