Sync to trunk

2025-08-11 09:13:57 -04:00 · 2012-02-18 12:22:32 -05:00 · 2012-02-18 12:22:32 -05:00 · d4052dfbe5
commit d4052dfbe5
parent 02b8659639 a5627ae5cd
473 changed files with 264141 additions and 193124 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -5,7 +5,7 @@
 # Also, each release can have new and improved recipes.
 # - version: ?.?.?
-#   date: 2011-??-??
+#   date: 2012-??-??
 #
 #   new features:
 #     - title: 
@ -19,6 +19,432 @@
 #   new recipes:
 #     - title: 
 - version: 0.8.40
  date: 2012-02-17
  new features:
    - title: "Amazon metadata download: Support the new 'Book Description' section that Amazon publishes for some books. Also workaround the amazon US servers occasionally returning broken markup leading to calibre not finding any matches for books on Amazon."
    - title: "Kindle driver: Add an option to allow using page counts stored in a custom column. Go to Preferences->Plugins and customize the Kindle driver, to tell it to use a custom column to get page count data. See http://www.mobileread.com/forums/showpost.php?p=1963075&postcount=215 for details."
    - title: "Template language: Add a current_library_name() function that can be used to return the name of the currently opened library in calibre"
    - title: "Driver for Xperia Neo and PocketBook A10"
      tickets: [930788] 
  bug fixes:
    - title: "Fix regression in 0.8.36 that caused the calibredb command to not properly refresh format information in standalone calibre-server processes"
    - title: "Fix regression in 0.8.39 that broke getting covers from some epub files on OS X."
      tickets: [932507]
    - title: "Reading metadata from HTML files: Do not take a very long time for very large HTML files. Also fix reading metadata from meta tags with multiple spaces before the content attribute."
      tickets: [932262] 
    - title: "EPUB Output: Fix splitting breaking internal links in the epub, if the links pointed to files with URL unsafe characters in their file names."
      tickets: [929966]
    - title: "Fix auto adding not leaving languages field blank when book has no defined laguage"
      tickets: [930648] 
  improved recipes:
    - Samanyolu Haber
    - Kurier
    - Le devoir
    - Daily Mirror
    - Common Dreams
    - Pescanik
  new recipes:
    - title: Asian Review of Books 
      author: Darko Miletic
    - title: Albert Mohler, Desiring God, Living Stones and Resurgence 
      author: Peter Grungi
    - title: Novinite BG 
      author: M3 Web
    - title: Catholic Daily Readings 
      author: adoucette
    - title: Consortium News and Microwave and RF magazine
      author: kiavash
 - version: 0.8.39
  date: 2012-02-10
  new features:
    - title: "Auto-adding: Add an option to check for duplicates when auto adding."
      tickets: [926962]
    - title: "Content server: Export a second record via mDNS that points to the full OPDS feed in addition to the one pointing to the Stanza feed. The new record is of type _calibre._tcp."
      tickets: [929304]
    - title: "Allow specifying a set of categories that are not partitioned even if they contain a large number of items in the Tag Browser. Preference is available under Look & Feel->Tag Browser"
    - title: "Allow setting a URL prefix for the content server that run embedded in the calibre GUI as well."
      tickets: [928905] 
    - title: "Allow output of identifiers data in CSV/XML/BiBTeX catalogs"
      tickets: [927737] 
    - title: "Driver for Motorola Droid XT910, Nokia E71 and HTC EVO 3D."
      tickets: [928202, 927818, 929400]
    - title: "Cut down the time taken to launch worker processes by 40%"
    - title: "You can now configure the calibre settings for the currently connected device by right clicking on the device icon in the toolbar, instead of having to go through Preferences->Plugins"
  bug fixes:
    - title: "Auto-adding: Do not add incomplete files when files are downloaded directly into the auto add folder."
      tickets: [926578]
    - title: "When running multiple delete from device jobs, fix the device view sometimes marking the wrong books as being deleted, after the first delete job completes."
      tickets: [927972]
    - title: "MOBI Input: Handle files that have spurious closing </body> and/or </html> tags in their markup."
      tickets: [925833]
    - title: "RTF Input: Strip out false color specifications, as they cause artifacts when converted to MOBI"
  improved recipes:
    - Updated Postmedia publications
    - Foreign Affairs
    - Read It Later
    - Microwave Journal
    - taggeschau.de
  new recipes:
    - title: Vancouver Province and Windsor Star 
      author: Nick Redding
    - title: Onda Rock 
      author: faber1971
    - title: Il Manifesto 
      author: Giacomo Lacava 
 - version: 0.8.38
  date: 2012-02-03
  new features:
    - title: "Implement the ability to automatically add books to calibre from a specified folder."
      type: major
      description: "calibre can now watch a folder on your computer and instantly add any files you put there to the calibre library as new books. You can tell calibre which folder to watch via Preferences->Adding Books->Automatic Adding."
      tickets: [920249] 
    - title: "Conversion: When automatically inserting page breaks, do not put a page break before a <h1> or <h2> tag if it is immediately preceded by another <h1> or <h2> tag."
    - title: "Driver for EZReader T730 and Point-of-View PlayTab Pro"
      tickets: [923283, 922969]
  bug fixes:
    - title: "Fix device entry not visible in menubar even when it has been added via Preferences->Toolbars."
      tickets: [923175]
    - title: "Fix metadata plugboards not applied when auto sending news by email"
    - title: "Fix regression in 0.8.34 that broke recipes that used skip_ad_pages() but not get_browser(). "
      tickets: [923724] 
    - title: "Restore device support on FreeBSD, by using HAL"
      tickets: [924503]
    - title: "Get books: Show no more than 10 results from the Gandalf store"
    - title: "Content server: Fix metadata not being updated when sending for some MOBI files."
      tickets: [923130]
    - title: "Heuristic processing: Fix the italicize common patterns algorithm breaking on some HTML markup."
      tickets: [922317]
    - title: "When trying to find an ebook inside a zip file, do not fail if the zip file itself contains other zip files."
      tickets: [925670]
    - title: "EPUB Input: Handle EPUBs with duplicate entries in the manifest."
      tickets: [925831]
    - title: "MOBI Input: Handle files that have extra </html> tags sprinkled through out their markup."
      tickets: [925833]
  improved recipes:
    - Metro Nieuws NL
    - FHM UK
  new recipes:
    - title: Strange Horizons 
      author: Jim DeVona
    - title: Telegraph India and Live Mint 
      author: Krittika Goyal
    - title: High Country News 
      author: Armin Geller
    - title: Countryfile
      author: Dave Asbury
    - title: Liberation (subscription version) 
      author: Remi Vanicat
    - title: Various Italian news sources 
      author: faber1971
 - version: 0.8.37
  date: 2012-01-27
  new features:
    - title: "Allow calibre to be run simultaneously in two different user accounts on windows."
      tickets: [919856]
    - title: "Driver for Motorola Photon and Point of View PlayTab"
      tickets: [920582, 919080] 
    - title: "Add a checkbox to preferences->plugins to show only user installed plugins"
    - title: "Add a restart calibre button to the warning dialog that pops up after changing some preference that requires a restart"
  bug fixes:
    - title: "Fix regression in 0.8.36 that caused the remove format from book function to only delete the entry from the database and not delete the actual file from the disk"
      tickets: [921721]
    - title: "Fix regression in 0.8.36 that caused the calibredb command to not properly refresh the format information in the GUI"
      tickets: [919494] 
    - title: "E-book viewer: Preserve the current position more accurately when changing font size/other preferences."
      tickets: [912406]
    - title: "Conversion pipeline: Fix items in the <guide> that refer to files with URL unsafe filenames being ignored."
      tickets: [920804]
    - title: "Fix calibre not running on linux systems that set LANG to an empty string"
    - title: "On first run of calibre, ensure the columns are sized appropriately"
    - title: "MOBI Output: Do not collapse whitespace when setting the comments metadata in newly created MOBI files"
    - title: "HTML Input: Fix handling of files with ä characters in their filenames."
      tickets: [919931]
    - title: "Fix the sort on startup tweak ignoring more than three levels"
      tickets: [919584]
    - title: "Edit metadata dialog: Fix a bug that broke adding of a file to the book that calibre did not previously know about in the books directory while simultaneously changing the author or title of the book."
      tickets: [922003]
  improved recipes:
    - People's Daily
    - Plus Info
    - grantland.com
    - Eret es irodalom 
    - Sueddeutsche.de
  new recipes:
    - title: Mumbai Mirror 
      author: Krittika Goyal
    - title: Real Clear 
      author: TMcN
    - title: Gazeta Wyborcza 
      author: ravcio
    - title: The Daily News Egypt and al masry al youm 
      author: Omm Mishmishah
    - title: Klip.me 
      author: Ken Sun
 - version: 0.8.36
  date: 2012-01-20
  new features:
    - title: "Decrease startup time for large libraries with at least one composite custom column by reading format info on demand"
    - title: "When automatically deleting news older than x days, from the calibre library, only delete the book if it both has the tag News and the author calibre. This prevents accidental deletion of books tagged with News by the user."
    - title: "Driver for Infibeam Pi 2"
    - title: "Add a Tag Editor for tags like custom columns to the edit metadata dialog"
  bug fixes:
    - title: "E-book viewer: Fix regression in 0.8.35 that caused viewer to raise an error on books that did not define a language"
    - title: "Content server: Fix grouping for categories based on custom columns."
      tickets: [919011]
    - title: "Edit metadata dialog: When setting the series from a format or via metadata download, ensure that the series index is not automatically changed, when closing the dialog."
      tickets: [918751]
    - title: "When reading metadata from Topaz (azw1) files, handle non ascii metadata correctly."
      tickets: [917419]
    - title: "CHM Input: Do not choke on CHM files with non ascii internal filenames on windows."
      tickets: [917696]
    - title: "Fix reading metadata from CHM files with non-ascii titles"
    - title: "Fix HTML 5 parser choking on comments"
    - title: "If calibre is started from a directory that does not exist, automatically use the home directory as the working directory, instead of crashing"
    - title: "Fix iriver story HD Wi-Fi device and external SD card swapped"
      tickets: [916364] 
    - title: "Content server: Fix ugly URLs for specific format download in the book details and permalink panels"
    - title: "When adding FB2 files do not set the date field from the metadata in the file"
  improved recipes:
    - OReilly Premuim
    - Variety
    - Blic
    - New Journal of Physics
    - Der Tagesspiegel
  new recipes:
    - title: Tweakers.net 
      author: Roedi06
    - title: Village Voice 
      author: Barty
    - title: Edge.org Conversations 
      author: levien
    - title: Novi list - printed edition
      author: Darko Miletic
 - version: 0.8.35
  date: 2012-01-13
  new features:
    - title: "Metadata plugboards: Allow creation of plugboards for email delivery."
      tickets: [914012]
    - title: "Tweak EPUB: Also allow tweaking of HTMLZ files (when both EPUB and HTMLZ are present, EPUB is preferred, this can be changed via Preferences->Tweaks)."
    - title: "TXT Input: Support direct conversion of files with extensions .md, .markdown and .textile." 
      tickets: [912931]
    - title: "E-book viewer: Speed up the optional hyphenation algorithm by upgrading the hyphenator library calibre uses"
    - title: "Drivers for PocketBook 611, Motorola Razr Droid and Onyx Boox i62"
  bug fixes:
    - title: "MOBI Output: When converting a paragraph that contains only a non-breaking space into a line break, ignore paragraphs with height less than 2pt."
      tickets: [915150]
    - title: "MOBI Input: Handle MOBI files that specify anchor point exactly at pagebreaks. These are apparently produced by John Wiley and Sons."
      tickets: [914036]
    - title: "Fetch news dialog: The Download now button is no longer scrolled out of view on OS X for news sources that require credentials"
    - title: "Fix commas being removed from author names when generating filenames in the calibre library"
    - title: "ODT Input: Dont crash on empty links"
    - title: "ebook-convert: Allow use of leading ./ when specifying output file names."
      tickets: [913954] 
    - title: "Fix deleting of hierarchical searches broken in Tag Browser"
      tickets: [912345] 
    - title: "Metadata search and replace: Fix rendering error when choosing {template}"
      tickets: [913154]
    - title: "Fix calibre not starting when stray .po files are present in the working directory"
      tickets: [913054]
    - title: "Do not error out when getting metadata for authors if the author name has either ::: or :#: in it."
      tickets: [912713]
  improved recipes:
    - Pagina 12
    - USA Today
    - LWN Weekly
    - Seattle Times
    - San Jose Mercury
    - Grantland.com
  new recipes:
    - title: Lega Nerd and Pambianco
      author: faber1971
    - title: Various Turkish news sources 
      author: asalet_r
    - title: Microwave Journal 
      author: Kiavash
    - title: OReilly Premium 
      author: TechnoCat
    - title: Hamilton Spectator and Tillsonburg/Norfolk County 
      author: Eric Coolman
    - title: Opinion Bolivia 
      author: Piet van Oostrum
    - title: ideal.es 
      author: Josemi Liebana
    - title: Novilist Portal
      author: Darko Miletic
 - version: 0.8.34
  date: 2012-01-06
  new features:
    - title: "Apple driver: Set the year field in iTunes based on the published date in calibre." 
      tickets: [909050]
    - title: "EPUB Input: When converting a file that has entries in the manifest that do no exist, remove them, instead of aborting the conversion."
      tickets: [910933]
    - title: "Kindle driver: Ensure page counts are correctly sent to the device when connecting to Kindle 4/Touch."
      tickets: [910279]
    - title: "Allow user to set the number of recently viewed books shown in the dropdown menu of the view button, via a tweak in Preferences->Tweaks."
      tickets: [910292]
  bug fixes:
    - title: "Fix regression in 0.8.33 that caused calibre to crash when starting the Content Server, if the port the content server is trying to listen on is blocked/busy."
      tickets: [910512]
    - title: "MOBI Input: Fix regression that caused a mixup of images when the MOBI file header contains an incorrect first image index pointer."
      tickets: [911243]
    - title: "Do not remove leading and trailing spaces from the replace fields in the Search and Replace conversion options"
      tickets: [910523]
    - title: "Conversion pipeline: Fix regression in 0.8.31 that broke parsing of documents containing a self closing <title/> tag."
      tickets: [910325]
  improved recipes:
    - Kopalnia Wiedzy
    - Alternet
    - Tagesspiegel
    - Philadelphia Inquirer
    - Seattle Times
    - La Razon
  new recipes:
    - title: Various Italian news sources 
      author: faber1971
    - title: money.pl 
      author: intromatyk
    - title: Diario Rio Negro
      author: Darko Miletic.
    - title: FHM UK 
      author: Dave Asbury
 - version: 0.8.33
  date: 2011-12-30
--- a/recipes/al_masry_al_youm.recipe
+++ b/recipes/al_masry_al_youm.recipe
@ -0,0 +1,50 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
 '''
 abc.net.au/news
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class TheDailyNewsEG(BasicNewsRecipe):
    title          	   = u'al-masry al-youm'
    __author__             = 'Omm Mishmishah'
    description            = 'Independent News from Egypt'
    masthead_url           = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
    cover_url              = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
    auto_cleanup           = True
    oldest_article         = 7
    max_articles_per_feed  = 100
    no_stylesheets         = False
    #delay                  = 1
    use_embedded_content   = False
    encoding               = 'utf8'
    publisher              = 'Independent News Egypt'
    category               = 'News, Egypt, World'
    language               = 'en_EG'
    publication_type       = 'newsportal'
 #    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
 #Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
    preprocess_regexps     = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': False
                         }
    keep_only_tags = [dict(attrs={'class':['article section']})]
    remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
        'inline-content story left', 'inline-content map left contracted', 'published',
        'story-map', 'statepromo', 'topics', ]})]
    remove_attributes = ['width','height']
    feeds          = [(u'English News', u'http://www.almasryalyoum.com/en/rss_feed_term/113/rss.xml'),
                      (u'News Features', u'http://www.almasryalyoum.com/en/rss_feed_term/115/rss.xml'),
                      (u'Culture', u'http://www.almasryalyoum.com/en/rss_feed_term/133/rss.xml'),
                      (u'Cinema', u'http://www.almasryalyoum.com/en/rss_feed_term/134/rss.xml')
                     ]
--- a/recipes/albert_mohler.recipe
+++ b/recipes/albert_mohler.recipe
@ -0,0 +1,18 @@
 __license__   = 'GPL v3'
 __copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AlbertMohlersBlog(BasicNewsRecipe):
    title          = u'Albert Mohler\'s Blog'
    __author__ = 'Peter Grungi'
    language = 'en'
    oldest_article = 90
    max_articles_per_feed = 10
    auto_cleanup = True
    cover_url = 'http://www.albertmohler.com/wp-content/themes/albert-mohler-v5/img/logo-am-lg.gif'
    publisher = 'Albert Mohler'
    language = 'en'
    author = 'Albert Mohler'
    feeds          = [(u'Albert Mohler\'s Blog', u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]
--- a/recipes/alternet.recipe
+++ b/recipes/alternet.recipe
@ -36,3 +36,5 @@ class Alternet(BasicNewsRecipe):
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
    conversion_options = {'linearize_tables': True}
--- a/recipes/ap.recipe
+++ b/recipes/ap.recipe
@ -11,7 +11,6 @@ class AssociatedPress(BasicNewsRecipe):
    language = 'en'
    no_stylesheets = True
    max_articles_per_feed = 15
    html2lrf_options = ['--force-page-break-before-tag="chapter"']
    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
--- a/recipes/asianreviewofbooks.recipe
+++ b/recipes/asianreviewofbooks.recipe
@ -0,0 +1,51 @@
 __license__   = 'GPL v3'
 __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.asianreviewofbooks.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class AsianReviewOfBooks(BasicNewsRecipe):
    title                 = 'The Asian Review of Books'
    __author__            = 'Darko Miletic'
    description           = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.'
    publisher             = 'The Asian Review of Books'
    category              = 'literature, books, reviews, Asia'
    oldest_article        = 30
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    language              = 'en_HK'
    publication_type      = 'magazine'
    masthead_url          = 'http://www.asianreviewofbooks.com/new/images/mob_arb.png'
    extra_css             = """
                               body{font-family: serif}
                               .big {font-size: xx-large}
                               .bold {font-weight: bold}
                               .italic {font-style: italic}
                               .small {font-size: small}
                               img {display: block}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags       = [dict(name=['object','script','iframe','embed'])]
    remove_attributes = ['style', 'onclick']
    feeds             = [(u'Articles' , u'http://www.asianreviewofbooks.com/new/rss.php')]
    def print_version(self, url):
        root, sep, artid = url.rpartition('?ID=')
        return root + 'getarticle.php?articleID=' + artid + '&stats=web'
    def preprocess_raw_html(self, raw, url):
       return '<html><head><title>title</title></head><body>' + raw + '</body></html>'
--- a/recipes/beppe_grillo.recipe
+++ b/recipes/beppe_grillo.recipe
@ -0,0 +1,16 @@
 __license__   = 'GPL v3'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1327747616(BasicNewsRecipe):
    title          = u'Beppe Grillo'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Beppe Grillo', u'http://feeds.feedburner.com/beppegrillo/atom')]
    description   = 'Blog of the famous comedian and politician Beppe Grillo - v1.00 (28, January 2012)'
    __author__    = 'faber1971'
    language = 'it'
--- a/recipes/blic.recipe
+++ b/recipes/blic.recipe
@ -1,6 +1,6 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 blic.rs
 '''
@ -73,7 +73,10 @@ class Blic(BasicNewsRecipe):
    def print_version(self, url):
        return url + '/print'
-    def preprocess_html(self, soup):
+    def get_cover_url(self):
-        for item in soup.findAll(style=True):
+        soup = self.index_to_soup('http://www.blic.rs/')
-            del item['style']    
+        alink = soup.find('a', attrs={'id':'blic_naslovna_print'})
-        return soup
+        if alink:
           return 'http://www.blic.rs' + alink['href']
        return None
--- a/recipes/borba.recipe
+++ b/recipes/borba.recipe
@ -1,95 +0,0 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 borba.rs
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Borba(BasicNewsRecipe):
    title                 = 'Borba Online'
    __author__            = 'Darko Miletic'
    description           = 'Dnevne novine Borba Online'
    publisher             = 'IP Novine Borba'
    category              = 'news, politics, Serbia'    
    language = 'sr'
    lang                  = _('sr-Latn-RS')
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
    cover_url             = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
    INDEX                 = u'http://www.borba.rs/'
    extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .contentheading{font-size: x-large; font-weight: bold} .createdate{font-size: small; font-weight: bold} '
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : lang
                        , 'pretty_print'     : True
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    keep_only_tags = [dict(name='div', attrs={'class':'main'})]
    remove_tags_after = dict(name='div',attrs={'id':'written_comments_title'})
    remove_tags = [
                     dict(name=['object','link','iframe','base','img'])
                    ,dict(name='div',attrs={'id':'written_comments_title'})
                  ]
    feeds = [
               (u'Najnovije vesti', u'http://www.borba.rs/content/blogsection/28/105/')
              ,(u'Prvi plan'      , u'http://www.borba.rs/content/blogsection/4/92/'  )
              ,(u'Dogadjaji'      , u'http://www.borba.rs/content/blogsection/21/83/' )
              ,(u'Ekonomija'      , u'http://www.borba.rs/content/blogsection/5/35/'  )
              ,(u'Komentari'      , u'http://www.borba.rs/content/blogsection/23/94/' )
              ,(u'Svet'           , u'http://www.borba.rs/content/blogsection/7/36/'  )
              ,(u'Sport'          , u'http://www.borba.rs/content/blogsection/6/37/'  )
              ,(u'Fama'           , u'http://www.borba.rs/content/blogsection/25/89/' )
              ,(u'B2 Dodatak'     , u'http://www.borba.rs/content/blogsection/30/116/')
            ]
    def preprocess_html(self, soup):
        attribs = [  'style','font','valign'
                    ,'colspan','width','height'
                    ,'rowspan','summary','align'
                    ,'cellspacing','cellpadding'
                    ,'frames','rules','border'
                  ]
        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
            item.name = 'div'
            for attrib in attribs:
                if item.has_key(attrib):
                   del item[attrib]            
        return soup
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            for item in soup.findAll('a', attrs={'class':'contentpagetitle'}):
                url         = item['href']
                title       = self.tag_to_string(item)
                articles.append({
                                      'title'      :title
                                     ,'date'       :''
                                     ,'url'        :url
                                     ,'description':''
                                    })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
--- a/recipes/calgary_herald.recipe
+++ b/recipes/calgary_herald.recipe
@ -1,4 +1,5 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
@ -6,45 +7,76 @@ __license__   = 'GPL v3'
 www.canada.com
 '''
-from calibre.web.feeds.recipes import BasicNewsRecipe
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 class CanWestPaper(BasicNewsRecipe):
-    # un-comment the following three lines for the Calgary Herald
+    # un-comment the following four lines for the Victoria Times Colonist
 ##    title = u'Victoria Times Colonist'
 ##    url_prefix = 'http://www.timescolonist.com'
 ##    description = u'News from Victoria, BC'
 ##    fp_tag = 'CAN_TC'
    # un-comment the following four lines for the Vancouver Province
 ##    title = u'Vancouver Province'
 ##    url_prefix = 'http://www.theprovince.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VP'
    # un-comment the following four lines for the Vancouver Sun
 ##    title = u'Vancouver Sun'
 ##    url_prefix = 'http://www.vancouversun.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VS'
    # un-comment the following four lines for the Edmonton Journal
 ##    title = u'Edmonton Journal'
 ##    url_prefix = 'http://www.edmontonjournal.com'
 ##    description = u'News from Edmonton, AB'
 ##    fp_tag = 'CAN_EJ'
    # un-comment the following four lines for the Calgary Herald
    title = u'Calgary Herald'
    url_prefix = 'http://www.calgaryherald.com'
    description = u'News from Calgary, AB'
    fp_tag = 'CAN_CH'
-    # un-comment the following three lines for the Regina Leader-Post
+    # un-comment the following four lines for the Regina Leader-Post
-    #title = u'Regina Leader-Post'
+##    title = u'Regina Leader-Post'
-    #url_prefix = 'http://www.leaderpost.com'
+##    url_prefix = 'http://www.leaderpost.com'
-    #description = u'News from Regina, SK'
+##    description = u'News from Regina, SK'
 ##    fp_tag = ''
-    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    # un-comment the following four lines for the Saskatoon Star-Phoenix
-    #title = u'Saskatoon Star-Phoenix'
+##    title = u'Saskatoon Star-Phoenix'
-    #url_prefix = 'http://www.thestarphoenix.com'
+##    url_prefix = 'http://www.thestarphoenix.com'
-    #description = u'News from Saskatoon, SK'
+##    description = u'News from Saskatoon, SK'
 ##    fp_tag = ''
-    # un-comment the following three lines for the Windsor Star
+    # un-comment the following four lines for the Windsor Star
-    #title = u'Windsor Star'
+##    title = u'Windsor Star'
-    #url_prefix = 'http://www.windsorstar.com'
+##    url_prefix = 'http://www.windsorstar.com'
-    #description = u'News from Windsor, ON'
+##    description = u'News from Windsor, ON'
 ##    fp_tag = 'CAN_'
-    # un-comment the following three lines for the Ottawa Citizen
+    # un-comment the following four lines for the Ottawa Citizen
-    #title = u'Ottawa Citizen'
+##    title = u'Ottawa Citizen'
-    #url_prefix = 'http://www.ottawacitizen.com'
+##    url_prefix = 'http://www.ottawacitizen.com'
-    #description = u'News from Ottawa, ON'
+##    description = u'News from Ottawa, ON'
 ##    fp_tag = 'CAN_OC'
-    # un-comment the following three lines for the Montreal Gazette
+    # un-comment the following four lines for the Montreal Gazette
-    #title = u'Montreal Gazette'
+##    title = u'Montreal Gazette'
-    #url_prefix = 'http://www.montrealgazette.com'
+##    url_prefix = 'http://www.montrealgazette.com'
-    #description = u'News from Montreal, QC'
+##    description = u'News from Montreal, QC'
 ##    fp_tag = 'CAN_MG'
    language = 'en_CA'
    __author__ = 'Nick Redding'
    encoding = 'latin1'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
@ -64,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
-    def preprocess_html(self,soup):
+    def get_cover_url(self):
-        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        from datetime import timedelta, date
-        divtags = soup.findAll('div',attrs={'id':''})
+        if self.fp_tag=='':
-        if divtags:
+            return None
-            for div in divtags:
+        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
-                del(div['id'])
+        br = BasicNewsRecipe.get_browser()
        daysback=1
        try:
            br.open(cover)
        except:
            while daysback<7:
                cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
                br = BasicNewsRecipe.get_browser()
                try:
                    br.open(cover)
                except:
                    daysback = daysback+1
                    continue
                break
        if daysback==7:
            self.log("\nCover unavailable")
            cover = None
        return cover
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
        # Replace rsquo (\x92)
        fixed = re.sub("\x92","’",fixed)
        # Replace ldquo (\x93)
        fixed = re.sub("\x93","“",fixed)
        # Replace rdquo (\x94)
        fixed = re.sub("\x94","”",fixed)
        # Replace ndash (\x96)
        fixed = re.sub("\x96","–",fixed)
        # Replace mdash (\x97)
        fixed = re.sub("\x97","—",fixed)
        fixed = re.sub("&#x2019;","’",fixed)
        return fixed
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&'
            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
    def populate_article_metadata(self, article, soup, first):
        if first:
            picdiv = soup.find('body').find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
        xtitle = article.text_summary.strip()
        if len(xtitle) == 0:
            desc = soup.find('meta',attrs={'property':'og:description'})
            if desc is not None:
                article.summary = article.text_summary = desc['content']
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
    def preprocess_html(self, soup):
        return self.strip_anchors(soup)
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
@ -98,8 +196,6 @@ class CanWestPaper(BasicNewsRecipe):
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = atag['href']
                if not url.startswith('http:'):
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
--- a/recipes/catholic_daily_readings.recipe
+++ b/recipes/catholic_daily_readings.recipe
@ -0,0 +1,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1328971305(BasicNewsRecipe):
    title          = u'Catholic Daily Readings'
    language = 'en'
    __author__ = 'adoucette'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Daily Readings - USCCB', u'http://www.usccb.org/bible/readings/rss/'), (u'Daily Reflection - One Bread One Body', u'http://www.presentationministries.com/general/rss.asp'), (u'Mass Readings - Universalis', u'http://www.universalis.com/atommass3.xml'), (u'Saint Of The Day - CNA', u'http://feeds.feedburner.com/catholicnewsagency/saintoftheday')]
--- a/recipes/chicago_tribune.recipe
+++ b/recipes/chicago_tribune.recipe
@ -77,8 +77,18 @@ class ChicagoTribune(BasicNewsRecipe):
    def get_article_url(self, article):
-        print article.get('feedburner_origlink', article.get('guid', article.get('link')))
+        url = article.get('feedburner_origlink', article.get('guid', article.get('link')))
-        return article.get('feedburner_origlink', article.get('guid', article.get('link')))
+        if url.endswith('?track=rss'):
            url = url.partition('?')[0]
        return url
    def skip_ad_pages(self, soup):
        text = soup.find(text='click here to continue to article')
        if text:
            a = text.parent
            url = a.get('href')
            if url:
                return self.index_to_soup(url, raw=True)
    def postprocess_html(self, soup, first_fetch):
        # Remove the navigation bar. It was kept until now to be able to follow
--- a/recipes/common_dreams.recipe
+++ b/recipes/common_dreams.recipe
@ -1,38 +1,89 @@
 #!/usr/bin/env  python
 ##
 ## Title:        Common Dreams
 ##
 ## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
 # Feb 2012: Cleaned up the output to have only the main article
 __license__   = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
 '''
 commondreams.org
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class CommonDreams(BasicNewsRecipe):
    # Identify the recipe
    title          = u'Common Dreams'
-    description    = u'Progressive news and views'
+    description    = u'Breaking News & Views for the Progressive Community.'
    cover_url      = 'https://s3.amazonaws.com/s3.commondreams.org/images/common-dreams.png'
    __author__     = u'XanthanGum'
    language = 'en'
    # Format the text
    extra_css = '''
                 body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
                 h1{font-size: xx-large;}
                 h2{font-size: large;}
                '''
    # Pick no article older than seven days and limit the number of articles per feed to 100
    oldest_article = 7
    max_articles_per_feed = 100
-    # Remove everything before the article
+    no_stylesheets = True
    remove_javascript = True
-    remove_tags_before = dict(name = 'div', attrs = {'id':'node-header'})
+    # Flattens all the tables to make it compatible with Nook
    conversion_options = {'linearize_tables' : True}
-    # Remove everything after the article
+    remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
                          'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
    # Specify extra CSS - overrides ALL other CSS (IE. Added last).
    extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
                 .introduction, .first { font-weight: bold; } \
                 .cross-head { font-weight: bold; font-size: 125%; } \
                 .cap, .caption { display: block; font-size: 80%; font-style: italic; } \
                 .cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
                 .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
                    .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
                    font-size: 80%; font-style: italic; margin: 1px auto; } \
                 .story-date, .published { font-size: 80%; } \
                 table { width: 100%; } \
                 td img { display: block; margin: 5px auto; } \
                 ul { padding-top: 10px; } \
                 ol { padding-top: 10px; } \
                 li { padding-top: 5px; padding-bottom: 5px; } \
                 h1 { font-size: 175%; font-weight: bold; } \
                 h2 { font-size: 150%; font-weight: bold; } \
                 h3 { font-size: 125%; font-weight: bold; } \
                 h4, h5, h6 { font-size: 100%; font-weight: bold; }'
    # Remove the line breaks and float left/right and picture width/height.
    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'float:.*?'), lambda m: ''),
                              (re.compile(r'width:.*?px'), lambda m: ''),
                              (re.compile(r'height:.*?px'), lambda m: ''),
                              (re.compile(r'<a.*?>'), lambda m: ''),
                              (re.compile(r'</a>'), lambda m: ''),
                              ]
    # Main article is inside this tag
    keep_only_tags = [
                        dict(name='div', attrs={'id':lambda x: x and 'node-' in x}),
                     ]
    remove_tags    = [
                        dict(name='div', attrs={'class':'node-links clear-block'}),    # remove Share options
                     ]
    remove_tags_after = dict(name = 'div', attrs = {'class':'copyright-info'})
    # Identify the news feeds
-    feeds = [(u'Headlines', u'http://www.commondreams.org/feed/headlines_rss'),
+    feeds = [(u'Headlines', u'https://www.commondreams.org/feed/headlines_rss'),
-             (u'Further News Articles', u'http://www.commondreams.org/feed/further_rss'), 
+             (u'Further News Articles', u'https://www.commondreams.org/feed/further_rss'), 
-             (u'Views', u'http://www.commondreams.org/feed/views_rss'), 
+             (u'Views', u'https://www.commondreams.org/feed/views_rss'), 
-             (u'Progressive Newswire', u'http://www.commondreams.org/feed/newswire_rss')]
+             (u'Progressive Newswire', u'https://www.commondreams.org/feed/newswire_rss')]
    def print_version(self, url):
        url = url + '?print'
        return url
--- a/recipes/consortium_news.recipe
+++ b/recipes/consortium_news.recipe
@ -0,0 +1,71 @@
 #!/usr/bin/env  python
 ##
 ## Title:        Consortium News
 ##
 ## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
 # Feb 2012: Initial release
 __license__   = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
 '''
 consortiumnews.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class ConsortiumNews(BasicNewsRecipe):
    title            = u'Consortium News'
    publisher        = 'Copyright © 2012 Consortiumnews. All Rights Reserved.'
    language         = 'en'
    __author__ = 'kiavash'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_javascript = True
    conversion_options = {'linearize_tables' : True} # Flattens all the tables to make it compatible with Nook
    remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
                          'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
    # Specify extra CSS - overrides ALL other CSS (IE. Added last).
    extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
                 .introduction, .first { font-weight: bold; } \
                 .cross-head { font-weight: bold; font-size: 125%; } \
                 .cap, .caption { display: block; font-size: 80%; font-style: italic; } \
                 .cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
                 .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
                    .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
                    font-size: 80%; font-style: italic; margin: 1px auto; } \
                 .story-date, .published { font-size: 80%; } \
                 table { width: 100%; } \
                 td img { display: block; margin: 5px auto; } \
                 ul { padding-top: 10px; } \
                 ol { padding-top: 10px; } \
                 li { padding-top: 5px; padding-bottom: 5px; } \
                 h1 { font-size: 175%; font-weight: bold; } \
                 h2 { font-size: 150%; font-weight: bold; } \
                 h3 { font-size: 125%; font-weight: bold; } \
                 h4, h5, h6 { font-size: 100%; font-weight: bold; }'
    # Remove the line breaks and float left/right and picture width/height.
    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'float:.*?'), lambda m: ''),
                              (re.compile(r'width:.*?px'), lambda m: ''),
                              (re.compile(r'height:.*?px'), lambda m: ''),
                              (re.compile(r'<a.*?>'), lambda h1: ''),
                              (re.compile(r'</a>'), lambda h2: ''),
                              ]
    # Main article is inside this tag
    keep_only_tags = [dict(name='div', attrs={'id':lambda x: x and 'post-' in x})]
    remove_tags    = [
                        dict(name='div', attrs={'class':'sociable'}),    # remove 'Share this Article'
                        dict(name='p', attrs={'class':'tags'}),    # remove 'Tags: ... '
                     ]
    feeds          = [(u'Consortium News', u'http://feeds.feedburner.com/Consortiumnewscom')]
--- a/recipes/countryfile.recipe
+++ b/recipes/countryfile.recipe
@ -0,0 +1,25 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    title          = u'Countryfile.com'
    cover_url = 'http://www.buysubscriptions.com/static_content/the-immediate/en/images/covers/CFIL_maxi.jpg'
    __author__ = 'Dave Asbury'
    description           = 'The official website of Countryfile Magazine'
    # last updated 29/1/12
    language = 'en_GB'
    oldest_article = 30
    max_articles_per_feed = 25
    remove_empty_feeds = True
    no_stylesheets = True
    auto_cleanup = True
    #articles_are_obfuscated = True
    remove_tags    = [
                             # dict(attrs={'class' : ['player']}),
    ]
    feeds          = [
    (u'Homepage', u'http://www.countryfile.com/rss/home'),
    (u'Country News', u'http://www.countryfile.com/rss/news'),
            (u'Countryside', u'http://www.countryfile.com/rss/countryside'),
            ]
--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@ -5,7 +5,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    description = 'News as provide by The Daily Mirror -UK'
    __author__ = 'Dave Asbury'
-    # last updated 26/12/11
+    # last updated 11/2/12
    language = 'en_GB'
    cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
@ -14,35 +14,58 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    oldest_article = 1
-    max_articles_per_feed = 20
+    max_articles_per_feed = 5
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
    auto_cleanup = True
    #conversion_options = { 'linearize_tables' : True }
    #keep_only_tags = [
      #     dict(name='h1'),
      # dict(name='div',attrs={'id' : 'body-content'}),
       #dict(name='div',atts={'class' : 'article-body'}),
       #dict(attrs={'class' : ['article-attr','byline append-1','published']}),
       #dict(name='p'),
       # ]
    #remove_tags_after = [dict (name='div',attrs={'class' : 'related'})]
    remove_tags = [
           dict(name='title'),
           dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),
          # dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}),
           #dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}),
           #dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}),
          ]
   # preprocess_regexps = [
    #(re.compile(r'<dl class="q-search">.*?</dl>', re.IGNORECASE | re.DOTALL), lambda match: '')]
    preprocess_regexps = [
    (re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')]
    preprocess_regexps = [
    (re.compile(r'Advertisement >>', re.IGNORECASE | re.DOTALL), lambda match: '')]
    #preprocess_regexps = [
    #(re.compile(r'Sponsored Links', re.IGNORECASE | re.DOTALL), lambda match: '')]
    feeds          = [
-        (u'News', u'http://www.mirror.co.uk/news/rss.xml')
+        (u'UK News', u'http://feed43.com/0287771688643868.xml')
-        ,(u'Tech News', u'http://www.mirror.co.uk/news/technology/rss.xml')
+        ,(u'Tech News', u'http://feed43.com/2455520588350501.xml')
-        ,(u'Weird World','http://www.mirror.co.uk/news/weird-world/rss.xml')
+        ,(u'Weird World','http://feed43.com/0863800333634654.xml')
-        ,(u'Film Gossip','http://www.mirror.co.uk/celebs/film/rss.xml')
+        ,(u'Sport','http://feed43.com/7713243036546130.xml')
-        ,(u'Music News','http://www.mirror.co.uk/celebs/music/rss.xml')
+        ,(u'Sport : Boxing ','http://feed43.com/0414732220804255.xml')
-        ,(u'Celebs and Tv Gossip','http://www.mirror.co.uk/celebs/tv/rss.xml')
+        ,(u'Sport : Rugby Union','http://feed43.com/4710138762362383.xml')
-        ,(u'Sport','http://www.mirror.co.uk/sport/rss.xml')
+        ,(u'Sport : Other','http://feed43.com/4501416886323415.xml')
-        ,(u'Life Style','http://www.mirror.co.uk/life-style/rss.xml')
+        ,(u'TV and Film','http://feed43.com/5238302853765104.xml')
-        ,(u'Advice','http://www.mirror.co.uk/advice/rss.xml')
+        ,(u'Celebs','http://feed43.com/8770061048844683.xml')
-        ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
+        ,(u'Life Style : Family','http://feed43.com/4356170742410338.xml')
         ,(u'Travel','http://feed43.com/1436576006476607.xml')
           # example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
  ]
--- a/recipes/derin_dusunce.recipe
+++ b/recipes/derin_dusunce.recipe
@ -0,0 +1,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1324913694(BasicNewsRecipe):
    title          = u'Derin Dusunce'
    language = 'tr'
    __author__ = 'asalet_r'
    oldest_article = 7
    max_articles_per_feed = 20
    auto_cleanup = True
    feeds          = [(u'Derin D\xfc\u015f\xfcnce', u'http://www.derindusunce.org/feed/')]
--- a/recipes/desiring_god.recipe
+++ b/recipes/desiring_god.recipe
@ -0,0 +1,21 @@
 __license__   = 'GPL v3'
 __copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class DesiringGodEnglish(BasicNewsRecipe):
    title          = u'Desiring God'
    __author__ = 'Peter Grungi'
    language = 'en'
    cover_url = 'http://cdn0.desiringgod.org/images/layout/breadcrumbs_dg_mark.png'
    masthead_url = 'http://cdn0.desiringgod.org/images/layout/breadcrumbs_dg_mark.png'
    language = 'en'
    oldest_article = 7
    max_articles_per_feed = 50
    auto_cleanup = True
    publisher = 'Desiring God Ministries'
    author = 'Desiring God Ministries'
    feeds          = [(u'Desiring God Blog', u'http://feeds.feedburner.com/DGBlog?format=xml')]
--- a/recipes/dunya_bizim.recipe
+++ b/recipes/dunya_bizim.recipe
@ -0,0 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1324736687(BasicNewsRecipe):
    title          = u'D\xfcnya Bizim'
    language = 'tr'
    __author__ = 'asalet_r'
    oldest_article = 7
    max_articles_per_feed = 10
    auto_cleanup = True
    feeds          = [(u'Aktif \u0130mamlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=31'), (u'Ayr\u0131nt\u0131 Defteri', u'http://dunyabizim.com/servisler/rss.php?kategoriID=58'), (u'Baba Kitaplar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=4'), (u'Bu da Oldu', u'http://dunyabizim.com/servisler/rss.php?kategoriID=32'), (u'\xc7-al\u0131nt\u0131 Yaz\u0131lar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=33'), (u'Dar\xfclmedya', u'http://dunyabizim.com/servisler/rss.php?kategoriID=49'), (u'Gidenler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=59'), (u'G\xfczel Mekanlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=43'), (u'\u0130yi Haberler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=18'), (u'\u0130yi M\xfczikler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=2'), (u'Kalite Dergiler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=3'), (u'Konu\u015fa Konu\u015fa', u'http://dunyabizim.com/servisler/rss.php?kategoriID=24'), (u'M\xfcstesta G\xfczeller', u'http://dunyabizim.com/servisler/rss.php?kategoriID=65'), (u'O \u015eimdi Nerede?', u'http://dunyabizim.com/servisler/rss.php?kategoriID=52'), (u'Olsa Ke\u015fke', u'http://dunyabizim.com/servisler/rss.php?kategoriID=34'), (u'Orada Ne Oldu?', u'http://dunyabizim.com/servisler/rss.php?kategoriID=38'), (u'\xd6nemli Adamlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=1'), (u'Polemik', u'http://dunyabizim.com/servisler/rss.php?kategoriID=39'), (u'Sinema', u'http://dunyabizim.com/servisler/rss.php?kategoriID=23'), (u'Yalan Haber', u'http://dunyabizim.com/servisler/rss.php?kategoriID=40'), (u'Yeni \u015eeyler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=57'), (u'Zekeriya Sofras\u0131', u'http://dunyabizim.com/servisler/rss.php?kategoriID=60')]
--- a/recipes/dunya_bulteni.recipe
+++ b/recipes/dunya_bulteni.recipe
@ -0,0 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1321194347(BasicNewsRecipe):
    title          = u'D\xfcnya B\xfclteni'
    language = 'tr'
    __author__ = 'asalet_r'
    oldest_article = 7
    max_articles_per_feed = 50
    auto_cleanup = True
    feeds          = [(u'Tarih Dosyas\u0131', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=157'), (u'R\xf6portaj', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=153'), (u'Makale-Yorum', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=174'), (u'K\xfclt\xfcr-Sanat', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=66'), (u'Hayat\u0131n \u0130\xe7inden', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=200'), (u'Haber Analiz', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=123'), (u'Gezi-\u0130zlenim', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=90'), (u'Aile Sa\u011fl\u0131k E\u011fitim', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=75')]
--- a/recipes/edge_conversations.recipe
+++ b/recipes/edge_conversations.recipe
@ -0,0 +1,50 @@
 __license__   = 'GPL v3'
 __copyright__ = '2012 Levien van Zon <levien@zonnetjes.net>'
 '''
 Fetch Edge.org conversations
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class EdgeConversationRSS(BasicNewsRecipe):
    title          = u'Edge.org Conversations'
    __author__ = 'levien'
    language = 'en'
    description = '''Edge.org offers "open-minded, free ranging, intellectually
    playful ... an unadorned pleasure in curiosity, a collective expression of
    wonder at the living and inanimate world ... an ongoing and thrilling
    colloquium.'''
    oldest_article = 60
    max_articles_per_feed = 100
    no_stylesheets = True
    keep_only_tags = [dict(name='div', attrs={'class':'HomeLeftPannel IMGCTRL'}) ]
    remove_tags    = [
        dict(name='div',attrs={'class':'Logo'})
        ]
    feeds          = [(u'Edge RSS', u'http://edge.org/feeds/')]
    def print_version(self, url):
        return url.replace('conversation/', 'conversation.php?cid=')
    def parse_feeds(self):
        # Call parent's method.
        feeds = BasicNewsRecipe.parse_feeds(self)
        # Loop through all feeds.
        for feed in feeds:
            # Loop through all articles in feed.
            for article in feed.articles[:]:
            # Remove anything that is not a conversation, and remove PDF files as well...
                if not ('CONVERSATION' in article.title):
                    feed.articles.remove(article)
                elif 'pdf' in article.url:
                    feed.articles.remove(article)
        return feeds
--- a/recipes/edmonton_journal.recipe
+++ b/recipes/edmonton_journal.recipe
@ -1,4 +1,5 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
@ -6,45 +7,72 @@ __license__   = 'GPL v3'
 www.canada.com
 '''
-from calibre.web.feeds.recipes import BasicNewsRecipe
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 class CanWestPaper(BasicNewsRecipe):
-    # un-comment the following three lines for the Edmonton Journal
+    # un-comment the following four lines for the Victoria Times Colonist
 ##    title = u'Victoria Times Colonist'
 ##    url_prefix = 'http://www.timescolonist.com'
 ##    description = u'News from Victoria, BC'
 ##    fp_tag = 'CAN_TC'
    # un-comment the following four lines for the Vancouver Province
 ##    title = u'Vancouver Province'
 ##    url_prefix = 'http://www.theprovince.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VP'
    # un-comment the following four lines for the Vancouver Sun
 ##    title = u'Vancouver Sun'
 ##    url_prefix = 'http://www.vancouversun.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VS'
    # un-comment the following four lines for the Edmonton Journal
    title = u'Edmonton Journal'
    url_prefix = 'http://www.edmontonjournal.com'
    description = u'News from Edmonton, AB'
    fp_tag = 'CAN_EJ'
-    # un-comment the following three lines for the Calgary Herald
+    # un-comment the following four lines for the Calgary Herald
-    #title = u'Calgary Herald'
+##    title = u'Calgary Herald'
-    #url_prefix = 'http://www.calgaryherald.com'
+##    url_prefix = 'http://www.calgaryherald.com'
-    #description = u'News from Calgary, AB'
+##    description = u'News from Calgary, AB'
 ##    fp_tag = 'CAN_CH'
-    # un-comment the following three lines for the Regina Leader-Post
+    # un-comment the following four lines for the Regina Leader-Post
-    #title = u'Regina Leader-Post'
+##    title = u'Regina Leader-Post'
-    #url_prefix = 'http://www.leaderpost.com'
+##    url_prefix = 'http://www.leaderpost.com'
-    #description = u'News from Regina, SK'
+##    description = u'News from Regina, SK'
 ##    fp_tag = ''
-    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    # un-comment the following four lines for the Saskatoon Star-Phoenix
-    #title = u'Saskatoon Star-Phoenix'
+##    title = u'Saskatoon Star-Phoenix'
-    #url_prefix = 'http://www.thestarphoenix.com'
+##    url_prefix = 'http://www.thestarphoenix.com'
-    #description = u'News from Saskatoon, SK'
+##    description = u'News from Saskatoon, SK'
 ##    fp_tag = ''
-    # un-comment the following three lines for the Windsor Star
+    # un-comment the following four lines for the Windsor Star
-    #title = u'Windsor Star'
+##    title = u'Windsor Star'
-    #url_prefix = 'http://www.windsorstar.com'
+##    url_prefix = 'http://www.windsorstar.com'
-    #description = u'News from Windsor, ON'
+##    description = u'News from Windsor, ON'
 ##    fp_tag = 'CAN_'
-    # un-comment the following three lines for the Ottawa Citizen
+    # un-comment the following four lines for the Ottawa Citizen
-    #title = u'Ottawa Citizen'
+##    title = u'Ottawa Citizen'
-    #url_prefix = 'http://www.ottawacitizen.com'
+##    url_prefix = 'http://www.ottawacitizen.com'
-    #description = u'News from Ottawa, ON'
+##    description = u'News from Ottawa, ON'
 ##    fp_tag = 'CAN_OC'
-    # un-comment the following three lines for the Montreal Gazette
+    # un-comment the following four lines for the Montreal Gazette
-    #title = u'Montreal Gazette'
+##    title = u'Montreal Gazette'
-    #url_prefix = 'http://www.montrealgazette.com'
+##    url_prefix = 'http://www.montrealgazette.com'
-    #description = u'News from Montreal, QC'
+##    description = u'News from Montreal, QC'
 ##    fp_tag = 'CAN_MG'
    language = 'en_CA'
@ -68,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
-    def preprocess_html(self,soup):
+    def get_cover_url(self):
-        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        from datetime import timedelta, date
-        divtags = soup.findAll('div',attrs={'id':''})
+        if self.fp_tag=='':
-        if divtags:
+            return None
-            for div in divtags:
+        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
-                del(div['id'])
+        br = BasicNewsRecipe.get_browser()
        daysback=1
        try:
            br.open(cover)
        except:
            while daysback<7:
                cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
                br = BasicNewsRecipe.get_browser()
                try:
                    br.open(cover)
                except:
                    daysback = daysback+1
                    continue
                break
        if daysback==7:
            self.log("\nCover unavailable")
            cover = None
        return cover
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
        # Replace rsquo (\x92)
        fixed = re.sub("\x92","’",fixed)
        # Replace ldquo (\x93)
        fixed = re.sub("\x93","“",fixed)
        # Replace rdquo (\x94)
        fixed = re.sub("\x94","”",fixed)
        # Replace ndash (\x96)
        fixed = re.sub("\x96","–",fixed)
        # Replace mdash (\x97)
        fixed = re.sub("\x97","—",fixed)
        fixed = re.sub("&#x2019;","’",fixed)
        return fixed
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&'
            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
    def populate_article_metadata(self, article, soup, first):
        if first:
            picdiv = soup.find('body').find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
        xtitle = article.text_summary.strip()
        if len(xtitle) == 0:
            desc = soup.find('meta',attrs={'property':'og:description'})
            if desc is not None:
                article.summary = article.text_summary = desc['content']
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
    def preprocess_html(self, soup):
        return self.strip_anchors(soup)
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
--- a/recipes/elet_es_irodalom.recipe
+++ b/recipes/elet_es_irodalom.recipe
@ -1,16 +1,16 @@
 ################################################################################
 #Description:     http://es.hu/ RSS channel
 #Author:      Bigpapa (bigpapabig@hotmail.com)
-#Date:	  2010.12.01. - V1.0
+#Date:    2012.01.20. - V1.2
 ################################################################################
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class elet_es_irodalom(BasicNewsRecipe):
-    title                  = u'Elet es Irodalom'
+    title                  = u'\u00c9let \u00e9s Irodalom'
    __author__             = 'Bigpapa'
    oldest_article         = 7
-    max_articles_per_feed  = 20	# Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
+    max_articles_per_feed  = 30 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
    no_stylesheets         = True
    #delay                  = 1
    use_embedded_content   = False
@ -19,21 +19,32 @@ class elet_es_irodalom(BasicNewsRecipe):
    language               = 'hu'
    publication_type       = 'newsportal'
    extra_css              = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} '
    needs_subscription = 'optional'
    masthead_url = 'http://www.es.hu/images/logo.jpg'
    timefmt = ' [%Y %b %d, %a]'
 #Nem ide a kódba kell beleírni a hozzáférés adatait, hanem azt akkor adod meg, ha le akarod tölteni!
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open('http://www.es.hu/')
            br.select_form(name='userfrmlogin')
            br['cusername'] = self.username
            br['cpassword'] = self.password
            br.submit()
        return br
    keep_only_tags    = [
                       dict(name='div', attrs={'class':['doc_author', 'doc_title', 'doc']})
    ]
    remove_tags = [
     dict(name='a', attrs={'target':['_TOP']}),
    dict(name='div', attrs={'style':['float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}),
    ]
    feeds          = [
    (u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'),
    (u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'),
@ -44,5 +55,4 @@ class elet_es_irodalom(BasicNewsRecipe):
    (u'Vers', 'http://www.feed43.com/1737324675134275.xml'),
    (u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'),
    (u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml')
    ]
--- a/recipes/espn.recipe
+++ b/recipes/espn.recipe
@ -20,7 +20,7 @@ class ESPN(BasicNewsRecipe):
    use_embedded_content = False
    remove_javascript     = True
-    needs_subscription = True
+    needs_subscription = 'optional'
    encoding= 'ISO-8859-1'
    remove_tags_before = dict(name='font', attrs={'class':'date'})
@ -75,10 +75,9 @@ class ESPN(BasicNewsRecipe):
        return soup
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username and self.password:
            br.set_handle_refresh(False)
            url = ('https://r.espn.go.com/members/v3_1/login')
            raw = br.open(url).read()
@ -100,7 +99,6 @@ class ESPN(BasicNewsRecipe):
        return article.get('guid',  None)
    def print_version(self, url):
        if 'eticket' in url:
            return url.partition('&')[0].replace('story?', 'print?')
        match = re.search(r'story\?(id=\d+)', url)
--- a/recipes/fhm_uk.recipe
+++ b/recipes/fhm_uk.recipe
@ -0,0 +1,34 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    title          = u'FHM UK'
    description = 'Good News for Men'
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
    masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
    __author__ = 'Dave Asbury'
    # last updated 27/1/12
    language = 'en_GB'
    oldest_article = 28
    max_articles_per_feed = 12
    remove_empty_feeds = True
    no_stylesheets = True
    #auto_cleanup = True
    #articles_are_obfuscated = True
    keep_only_tags = [
               dict(name='h1'),
               dict(name='img',attrs={'id' : 'ctl00_Body_imgMainImage'}),
               dict(name='div',attrs={'id' : ['articleLeft']}),
                               dict(name='div',attrs={'class' : ['imagesCenterArticle','containerCenterArticle','articleBody']}),
        ]
    #remove_tags    = [
                              #dict(attrs={'class' : ['player']}),
    #]
    feeds          = [
    (u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
    (u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
    (u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
    (u'Gaming',u'http://feed43.com/0755006465351035.xml'),
            ]
--- a/recipes/foreignaffairs.recipe
+++ b/recipes/foreignaffairs.recipe
@ -3,10 +3,17 @@ import re
 from calibre.ptempfile import PersistentTemporaryFile
 class ForeignAffairsRecipe(BasicNewsRecipe):
    ''' there are three modifications:
    1) fetch issue cover
    2) toggle ignore premium articles
    3) extract proper section names, ie. "Comments", "Essay"
    by Chen Wei weichen302@gmx.com, 2012-02-05'''
    __license__  = 'GPL v3'
    __author__ = 'kwetal'
    language = 'en'
-    version = 1
+    version = 1.01
    title = u'Foreign Affairs (Subcription or (free) Registration)'
    publisher = u'Council on Foreign Relations'
@ -17,6 +24,9 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
    remove_javascript = True
    INDEX = 'http://www.foreignaffairs.com'
    FRONTPAGE = 'http://www.foreignaffairs.com/magazine'
    INCLUDE_PREMIUM = False
    remove_tags = []
    remove_tags.append(dict(name = 'base'))
@ -37,6 +47,12 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
    temp_files = []
    articles_are_obfuscated = True
    def get_cover_url(self):
        soup = self.index_to_soup(self.FRONTPAGE)
        div = soup.find('div', attrs={'class':'inthemag-issuebuy-cover'})
        img_url =  div.find('img')['src']
        return self.INDEX + img_url
    def get_obfuscated_article(self, url):
        br = self.get_browser()
        br.open(url)
@ -50,57 +66,46 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
        return self.temp_files[-1].name
    def parse_index(self):
        soup = self.index_to_soup('http://www.foreignaffairs.com/magazine')
        articles = []
        answer = []
-        content = soup.find('div', attrs = {'class': 'center-wrapper'})
+        soup = self.index_to_soup(self.FRONTPAGE)
        sec_start = soup.findAll('div', attrs={'class':'panel-separator'})
        for sec in sec_start:
            content = sec.nextSibling
            if content:
-            for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
+                section = self.tag_to_string(content.find('h2'))
                tag = div.find('div', attrs = {'class': 'views-field-title'})
                if tag:
                    a = tag.find('a')
                    if a:
                        title = self.tag_to_string(a)
                        url = self.INDEX + a['href']
                        author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
                        tag = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
                        # If they ever fix their markup, this will break :-(
                        summary = self.tag_to_string(tag.findNextSibling('p'))
                        description = author  + '<br/>' + summary
                        articles.append({'title': title, 'date': None, 'url': url, 'description': description})
                    else:
                        continue
                else:
                    continue
            answer.append(('Magazine', articles))
            ul = content.find('ul')
            if ul:
                articles = []
-                for li in ul.findAll('li'):
+
-                    tag = li.find('div', attrs = {'class': 'views-field-title'})
+                tags = []
                for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
                    tags.append(div)
                for li in content.findAll('li'):
                    tags.append(li)
                for div in tags:
                    title = url = description = author = None
                    if self.INCLUDE_PREMIUM:
                        found_premium = False
                    else:
                        found_premium = div.findAll('span', attrs={'class':
                                                               'premium-icon'})
                    if not found_premium:
                        tag = div.find('div', attrs={'class': 'views-field-title'})
                        if tag:
                            a = tag.find('a')
                            if a:
                                title = self.tag_to_string(a)
                                url = self.INDEX + a['href']
-                            description = ''
+                            author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
-                            tag = li.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'})
+                            tag_summary = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
-                            if tag:
+                            description = self.tag_to_string(tag_summary)
-                                description = self.tag_to_string(tag)
+                            articles.append({'title':title, 'date':None, 'url':url,
-
+                                     'description':description, 'author':author})
-                            articles.append({'title': title, 'date': None, 'url': url, 'description': description})
+                if articles:
-                        else:
+                    answer.append((section, articles))
                            continue
                    else:
                        continue
                answer.append(('Letters to the Editor', articles))
        return answer
    def preprocess_html(self, soup):
--- a/recipes/goal.recipe
+++ b/recipes/goal.recipe
@ -0,0 +1,13 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1325677767(BasicNewsRecipe):
    title          = u'Goal'
    oldest_article = 1
    language = 'it'
    max_articles_per_feed = 100
    auto_cleanup = True
    remove_tags_after = [dict(id='article_content')]
    feeds          = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')]
    __author__      = 'faber1971'
    description    = 'Sports news from Italy'
--- a/recipes/grantland.recipe
+++ b/recipes/grantland.recipe
@ -5,42 +5,37 @@ class GrantLand(BasicNewsRecipe):
    title          = u"Grantland"
    description    = 'Writings on Sports & Pop Culture'
    language       = 'en'
-	__author__     = 'Barty'
+    __author__     = 'barty on mobileread.com forum'
    max_articles_per_feed = 100
-	no_stylesheets = False
+    no_stylesheets = True
    # auto_cleanup is too aggressive sometimes and we end up with blank articles
    auto_cleanup   = False
    timefmt        = ' [%a, %d %b %Y]'
-	oldest_article = 365
+    oldest_article = 90
    cover_url      = 'http://cdn0.sbnation.com/imported_assets/740965/blog_grantland_grid_3.jpg'
    masthead_url   = 'http://a1.espncdn.com/prod/assets/grantland/grantland-logo.jpg'
    INDEX          = 'http://www.grantland.com'
    CATEGORIES     = [
-		# comment out categories you don't want
+        # comment out second line if you don't want older articles
        # (user friendly name, url suffix, max number of articles to load)
        ('Today in Grantland','',20),
        ('In Case You Missed It','incaseyoumissedit',35),
        ]
    remove_tags    = [
-		{'name':['head','style','script']},
+        {'name':['style','aside','nav','footer','script']},
-		{'id':['header']},
+        {'name':'h1','text':'Grantland'},
-		{'class':re.compile(r'\bside|\bad\b|floatright|tags')}
+        {'id':['header','col-right']},
        {'class':['connect_widget']},
        {'name':'section','class':re.compile(r'\b(ad|module)\b')},
        ]
 	remove_tags_before = {'class':'wrapper'}
 	remove_tags_after  = [{'id':'content'}]
    preprocess_regexps = [
-		# <header> tags with an img inside are just blog banners, don't need them
+        # remove blog banners
-		# note: there are other useful <header> tags so we don't want to just strip all of them
+        (re.compile(r'<a href="/blog/(?:(?!</a>).)+</a>', re.DOTALL|re.IGNORECASE), lambda m: ''),
 		(re.compile(r'<header class.+?<img .+?>.+?</header>', re.DOTALL|re.IGNORECASE),lambda m: ''),
 		# delete everything between the *last* <hr class="small" /> and </article>
 		(re.compile(r'<hr class="small"(?:(?!<hr class="small").)+</article>', re.DOTALL|re.IGNORECASE),lambda m: '<hr class="small" /></article>'),
        ]
 	extra_css = """cite, time { font-size: 0.8em !important; margin-right: 1em !important; }
 		img + cite { display:block; text-align:right}"""
    def parse_index(self):
        feeds = []
@ -54,36 +49,24 @@ class GrantLand(BasicNewsRecipe):
            page = "%s/%s" % (self.INDEX, tag)
            soup = self.index_to_soup(page)
 			headers = soup.findAll('h2' if tag=='' else 'h3')
-			for header in headers:
+            main = soup.find('div',id='col-main')
-				tag = header.find('a')
+            if main is None:
-				if tag is None or not hasattr(tag,'href'):
+                main = soup
-					continue
+
            for tag in main.findAll('a', href=re.compile(r'(story|post)/_/id/\d+')):
                url = tag['href']
 				if url.startswith('/'):
 					url = self.INDEX + url
                if url in seen_urls:
                    continue
-				seen_urls.add(url)
+                title = tag.string
-				title = self.tag_to_string(tag)
+                # blank title probably means <a href=".."><img /></a>.  skip
-				if 'Podcast:' in title or 'In Case You Missed It' in title:
+                if not title:
                    continue
 				desc = dt = ''
 				par = header.parent
 				#tag = par.find('cite')
 				#if tag is not None:
 				#	desc = '['+self.tag_to_string(tag) + '] '
 				tag = par.find('div')
 				if tag is not None:
 					desc = desc + self.tag_to_string(tag)
 					tag = tag.find('time')
 					if tag is not None:
 						dt = self.tag_to_string( tag)
                self.log('\tFound article:', title)
                self.log('\t', url)
-				articles.append({'title':title,'url':url,'description':desc,'date':dt})
+                articles.append({'title':title,'url':url})
                seen_urls.add(url)
                if len(articles) >= max_articles:
                    break
@ -91,6 +74,3 @@ class GrantLand(BasicNewsRecipe):
                feeds.append((cat_name, articles))
        return feeds
 	def print_version(self, url):
 		return url+'?view=print'
--- a/recipes/haksoz.recipe
+++ b/recipes/haksoz.recipe
@ -0,0 +1,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1324739199(BasicNewsRecipe):
    title          = u'Haks\xf6z'
    oldest_article = 7
    max_articles_per_feed = 20
    auto_cleanup = True
    language = 'tr'
    __author__ = 'asalet_r'
    feeds          = [(u'Haks\xf6z', u'http://www.haksozhaber.net/rss/')]
--- a/recipes/hamilton_spectator.recipe
+++ b/recipes/hamilton_spectator.recipe
@ -0,0 +1,58 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 '''
 Hamilton Spectator Calibre Recipe
 '''
 class HamiltonSpectator(BasicNewsRecipe):
    title = u'Hamilton Spectator'
    oldest_article = 2
    max_articles_per_feed = 100
    auto_cleanup = True
    __author__ = u'Eric Coolman'
    publisher = u'thespec.com'
    description = u'Ontario Canada Newspaper'
    category = u'News, Ontario, Canada'
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'en_CA'
    encoding = 'utf-8'
    feeds          = [
 		(u'Top Stories',u'http://www.thespec.com/rss?query=/&assetType=Article'),
 		(u'All News',u'http://www.thespec.com/rss?query=/news&assetType=Article'),
 		(u'Local',u'http://www.thespec.com/rss?query=/local&assetType=Article'),
 		(u'Ontario',u'http://www.thespec.com/rss?query=/ontario&assetType=Article'),
 		(u'Canada',u'http://www.thespec.com/rss?query=/canada&assetType=Article'),
 		(u'World News',u'http://www.thespec.com/rss?query=/world&assetType=Article'),
 		(u'Business',u'http://www.thespec.com/rss?query=/business&assetType=Article'),
 		(u'Crime',u'http://www.thespec.com/rss?query=/crime&assetType=Article'),
 		(u'All Sports',u'http://www.thespec.com/rss?query=/sports&assetType=Article'),
 		(u'Ticats',u'http://www.thespec.com/rss?query=/sports/ticats&assetType=Article'),
 		(u'Bulldogs',u'http://www.thespec.com/rss?query=/sports/bulldogs&assetType=Article'),
 		(u'High School Sports',u'http://www.thespec.com/rss?query=/sports/highschools&assetType=Article'),
 		(u'Local Sports',u'http://www.thespec.com/rss?query=/sports/local&assetType=Article'),
 		(u'What''s On',u'http://www.thespec.com/rss?query=/whatson&assetType=Article'),
 		(u'Arts and Entertainment',u'http://www.thespec.com/rss?query=/whatson/artsentertainment&assetType=Article'),
 		(u'Books',u'http://www.thespec.com/rss?query=/whatson/books&assetType=Article'),
 		(u'Movies',u'http://www.thespec.com/rss?query=/whatson/movies&assetType=Article'),
 		(u'Music',u'http://www.thespec.com/rss?query=/whatson/music&assetType=Article'),
 		(u'Restaurant Reviews',u'http://www.thespec.com/rss?query=/whatson/restaurants&assetType=Article'),
 		(u'Opinion',u'http://www.thespec.com/rss?query=/opinion&assetType=Article'),
 		(u'Opinion Columns',u'http://www.thespec.com/rss?query=/opinion/columns&assetType=Article'),
 		(u'Cartoons',u'http://www.thespec.com/rss?query=/opinion/cartoons&assetType=Article'),
 		(u'Letters',u'http://www.thespec.com/rss?query=/opinion/letters&assetType=Article'),
 		(u'Editorial',u'http://www.thespec.com/rss?query=/opinion/editorial&assetType=Article'),
 		(u'Community',u'http://www.thespec.com/rss?query=/community&assetType=Article'),
 		(u'Education',u'http://www.thespec.com/rss?query=/community/education&assetType=Article'),
 		(u'Faith',u'http://www.thespec.com/rss?query=/community/faith&assetType=Article'),
 		(u'Contests',u'http://www.thespec.com/rss?query=/community/contests&assetType=Article'),
 		(u'Living',u'http://www.thespec.com/rss?query=/living&assetType=Article'),
 		(u'Food',u'http://www.thespec.com/rss?query=/living/food&assetType=Article'),
 		(u'Health and Fitness',u'http://www.thespec.com/rss?query=/living/healthfitness&assetType=Article'),
 		(u'Your Home',u'http://www.thespec.com/rss?query=/living/home&assetType=Article'),
 		(u'Travel',u'http://www.thespec.com/rss?query=/living/travel&assetType=Article'),
 		(u'Family and Parenting',u'http://www.thespec.com/rss?query=/living/familyparenting&assetType=Article'),
 		(u'Style',u'http://www.thespec.com/rss?query=/living/style&assetType=Article')
 	]
--- a/recipes/high_country_news.recipe
+++ b/recipes/high_country_news.recipe
@ -0,0 +1,43 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
 '''
 Fetch High Country News
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class HighCountryNews(BasicNewsRecipe):
    title                 = u'High Country News'
    description           = u'News from the American West'
    __author__            = 'Armin Geller' # 2012-01-31
    publisher             = 'High Country News'
    timefmt               = ' [%a, %d %b %Y]'
    language              = 'en-Us'
    encoding              = 'UTF-8'
    publication_type      = 'newspaper'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    auto_cleanup          = True
    remove_javascript     = True
    use_embedded_content  = False
    masthead_url          = 'http://www.hcn.org/logo.jpg' # 2012-01-31 AGe add
    cover_source          = 'http://www.hcn.org'          # 2012-01-31 AGe add
    def get_cover_url(self):                              # 2012-01-31 AGe add
       cover_source_soup = self.index_to_soup(self.cover_source)
       preview_image_div = cover_source_soup.find(attrs={'class':' portaltype-Plone Site content--hcn template-homepage_view'})
       return preview_image_div.div.img['src']
    feeds = [
              (u'Most recent', u'http://feeds.feedburner.com/hcn/most-recent'),
              (u'Current Issue', u'http://feeds.feedburner.com/hcn/current-issue'),
              (u'Writers on the Range', u'http://feeds.feedburner.com/hcn/wotr'),
              (u'High Country Views', u'http://feeds.feedburner.com/hcn/HighCountryViews'),
             ]
    def print_version(self, url):
          return url + '/print_view'
--- a/recipes/icons/asianreviewofbooks.png
+++ b/recipes/icons/asianreviewofbooks.png
--- a/recipes/icons/metro_news_nl.png
+++ b/recipes/icons/metro_news_nl.png
--- a/recipes/icons/novilist_novine_hr.png
+++ b/recipes/icons/novilist_novine_hr.png
--- a/recipes/icons/novilist_portal_hr.png
+++ b/recipes/icons/novilist_portal_hr.png
--- a/recipes/icons/pescanik.png
+++ b/recipes/icons/pescanik.png
--- a/recipes/icons/rionegro.png
+++ b/recipes/icons/rionegro.png
--- a/recipes/icons/samanyolu_haber.png
+++ b/recipes/icons/samanyolu_haber.png
--- a/recipes/ideal_almeria.recipe
+++ b/recipes/ideal_almeria.recipe
@ -0,0 +1,68 @@
 # encoding: utf-8 -*-
 __license__     = 'GPL v3'
 __author__      = 'Josemi Liébana <office at josemi-liebana.com>'
 __copyright__   = 'Josemi Liébana'
 __version__     = 'v0.1'
 __date__        = '5 January 2012'
 '''
 www.ideal.es
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Ideal(BasicNewsRecipe):
    title                 = u'Ideal (Edición Almería)'
    __author__            = u'Josemi Liébana'
    description           = u'Noticias de Almería y el resto del mundo'
    publisher             = 'Ideal'
    category              = u'News, Politics, Spain, Almería'
    publication_type      = 'Newspaper'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
    masthead_url          = u'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
    cover_url             = u'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
    extra_css             = u' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    keep_only_tags = [
                         dict(attrs={'id':'title'})
                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
                     ]
    remove_tags = [dict(name='ul')]
    remove_attributes = ['width','height']
    feeds = [
              (u'Última Hora'       , u'http://www.ideal.es/almeria/rss/feeds/ultima.xml'           )
             ,(u'Portada'           , u'http://www.ideal.es/almeria/portada.xml'                    )
             ,(u'Local'             , u'http://www.ideal.es/almeria/rss/feeds/granada.xml'          )
             ,(u'Deportes'          , u'http://www.ideal.es/almeria/rss/feeds/deportes.xml'         )
             ,(u'Sociedad'          , u'http://www.ideal.es/almeria/rss/feeds/sociedad.xml'         )
             ,(u'Cultura'           , u'http://www.ideal.es/almeria/rss/feeds/cultura.xml'          )
             ,(u'Economía'          , u'http://www.ideal.es/almeria/rss/feeds/economia.xml'         )
             ,(u'Costa'             , u'http://www.ideal.es/almeria/rss/feeds/costa.xml'            )
             ,(u'Puerta Purchena'   , u'http://www.ideal.es/almeria/rss/feeds/puerta_purchena.xml'  )
             ,(u'Andalucía'         , u'http://www.ideal.es/almeria/rss/feeds/andalucia.xml'        )
             ,(u'España'            , u'http://www.ideal.es/almeria/rss/feeds/espana.xml'           )
             ,(u'Mundo'             , u'http://www.ideal.es/almeria/rss/feeds/internacional.xml'    )
             ,(u'Vivir'             , u'http://www.ideal.es/almeria/rss/feeds/vivir.xml'            )
             ,(u'Opinión'           , u'http://www.ideal.es/almeria/rss/feeds/opinion.xml'          )
             ,(u'Televisión'        , u'http://www.ideal.es/almeria/rss/feeds/television.xml'       )
             ,(u'Contraportada'     , u'http://www.ideal.es/almeria/rss/feeds/contraportada.xml'    )
            ]
--- a/recipes/ideal_granada.recipe
+++ b/recipes/ideal_granada.recipe
@ -0,0 +1,69 @@
 # encoding: utf-8 -*-
 __license__     = 'GPL v3'
 __author__      = 'Josemi Liébana <office at josemi-liebana.com>'
 __copyright__   = 'Josemi Liébana'
 __version__     = 'v0.1'
 __date__        = '5 January 2012'
 '''
 www.ideal.es
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Ideal(BasicNewsRecipe):
    title                 = u'Ideal (Edición Granada)'
    __author__            = u'Josemi Liébana'
    description           = u'Noticias de Granada y el resto del mundo'
    publisher             = 'Ideal'
    category              = 'News, Politics, Spain, Granada'
    publication_type      = 'Newspaper'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
    cover_url             = 'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    keep_only_tags = [
                         dict(attrs={'id':'title'})
                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
                     ]
    remove_tags = [dict(name='ul')]
    remove_attributes = ['width','height']
    feeds = [
              (u'Última Hora'       , u'http://www.ideal.es/granada/rss/feeds/ultima.xml'           )
             ,(u'Portada'           , u'http://www.ideal.es/granada/portada.xml'                    )
             ,(u'Local'             , u'http://www.ideal.es/granada/rss/feeds/granada.xml'          )
             ,(u'Deportes'          , u'http://www.ideal.es/granada/rss/feeds/deportes.xml'         )
             ,(u'Sociedad'          , u'http://www.ideal.es/granada/rss/feeds/sociedad.xml'         )
             ,(u'Cultura'           , u'http://www.ideal.es/granada/rss/feeds/cultura.xml'          )
             ,(u'Economía'          , u'http://www.ideal.es/granada/rss/feeds/economia.xml'         )
             ,(u'Costa'             , u'http://www.ideal.es/granada/rss/feeds/costa.xml'            )
             ,(u'La Carrera'        , u'http://www.ideal.es/granada/rss/feeds/la_carrera.xml'       )
             ,(u'Puerta Real'       , u'http://www.ideal.es/granada/rss/feeds/puerta_real.xml'      )
             ,(u'Andalucía'         , u'http://www.ideal.es/granada/rss/feeds/andalucia.xml'        )
             ,(u'España'            , u'http://www.ideal.es/granada/rss/feeds/espana.xml'           )
             ,(u'Mundo'             , u'http://www.ideal.es/granada/rss/feeds/internacional.xml'    )
             ,(u'Vivir'             , u'http://www.ideal.es/granada/rss/feeds/vivir.xml'            )
             ,(u'Opinión'           , u'http://www.ideal.es/granada/rss/feeds/opinion.xml'          )
             ,(u'Televisión'        , u'http://www.ideal.es/granada/rss/feeds/television.xml'       )
             ,(u'Contraportada'     , u'http://www.ideal.es/granada/rss/feeds/contraportada.xml'    )
            ]
--- a/recipes/ideal_jaen.recipe
+++ b/recipes/ideal_jaen.recipe
@ -0,0 +1,67 @@
 # encoding: utf-8 -*-
 __license__     = 'GPL v3'
 __author__      = 'Josemi Liébana <office at josemi-liebana.com>'
 __copyright__   = 'Josemi Liébana'
 __version__     = 'v0.1'
 __date__        = '5 January 2012'
 '''
 www.ideal.es
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Ideal(BasicNewsRecipe):
    title                 = u'Ideal (Edición Jaén)'
    __author__            = u'Josemi Liébana'
    description           = u'Noticias de Jaén y el resto del mundo'
    publisher             = 'Ideal'
    category              = u'News, Politics, Spain, Jaén'
    publication_type      = 'Newspaper'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
    cover_url             = 'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    keep_only_tags = [
                         dict(attrs={'id':'title'})
                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
                     ]
    remove_tags = [dict(name='ul')]
    remove_attributes = ['width','height']
    feeds = [
              (u'Última Hora'       , u'http://www.ideal.es/jaen/rss/feeds/ultima.xml'          )
             ,(u'Portada'           , u'http://www.ideal.es/jaen/portada.xml'                   )
             ,(u'Local'             , u'http://www.ideal.es/jaen/rss/feeds/granada.xml'         )
             ,(u'Deportes'          , u'http://www.ideal.es/jaen/rss/feeds/deportes.xml'        )
             ,(u'Sociedad'          , u'http://www.ideal.es/jaen/rss/feeds/sociedad.xml'        )
             ,(u'Cultura'           , u'http://www.ideal.es/jaen/rss/feeds/cultura.xml'         )
             ,(u'Economía'          , u'http://www.ideal.es/jaen/rss/feeds/economia.xml'        )
             ,(u'Costa'             , u'http://www.ideal.es/jaen/rss/feeds/costa.xml'           )
             ,(u'Andalucía'         , u'http://www.ideal.es/jaen/rss/feeds/andalucia.xml'       )
             ,(u'España'            , u'http://www.ideal.es/jaen/rss/feeds/espana.xml'          )
             ,(u'Mundo'             , u'http://www.ideal.es/jaen/rss/feeds/internacional.xml'   )
             ,(u'Vivir'             , u'http://www.ideal.es/jaen/rss/feeds/vivir.xml'           )
             ,(u'Opinión'           , u'http://www.ideal.es/jaen/rss/feeds/opinion.xml'         )
             ,(u'Televisión'        , u'http://www.ideal.es/jaen/rss/feeds/television.xml'      )
             ,(u'Contraportada'     , u'http://www.ideal.es/jaen/rss/feeds/contraportada.xml'   )
            ]
--- a/recipes/iktibas.recipe
+++ b/recipes/iktibas.recipe
@ -0,0 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1324739406(BasicNewsRecipe):
    title          = u'\u0130ktibas'
    language = 'tr'
    __author__ = 'asalet_r'
    oldest_article = 7
    max_articles_per_feed = 20
    auto_cleanup = True
    feeds          = [(u'\u0130ktibas', u'http://www.iktibasdergisi.com/rss/rss.xml')]
--- a/recipes/ilmanifesto.recipe
+++ b/recipes/ilmanifesto.recipe
@ -0,0 +1,110 @@
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 MANIFESTO_BASEURL = 'http://www.ilmanifesto.it/'
 class IlManifesto(BasicNewsRecipe):
  title          = 'Il Manifesto'
  __author__ = 'Giacomo Lacava'
  description = 'quotidiano comunista - ultima edizione html disponibile'
  publication_type = 'newspaper'
  publisher = 'il manifesto coop. editrice a r.l.'
  language = 'it'
  oldest_article = 2
  max_articles_per_feed = 100
  delay = 1
  no_stylesheets = True
  simultaneous_downloads = 5
  timeout = 30
  auto_cleanup = True
  remove_tags = [dict(name='div', attrs={'class':'column_1 float_left'})]
  remove_tags_before = dict(name='div',attrs={'class':'column_2 float_right'})
  remove_tags_after = dict(id='myPrintArea')
  manifesto_index = None
  manifesto_datestr = None
  def _set_manifesto_index(self):
    if self.manifesto_index == None:
      startUrl = MANIFESTO_BASEURL  + 'area-abbonati/in-edicola/'
      startSoup = self.index_to_soup(startUrl)
      lastEdition = startSoup.findAll('div',id='accordion_inedicola')[1].find('a')['href']
      del(startSoup)
      self.manifesto_index = MANIFESTO_BASEURL + lastEdition
      urlsplit = lastEdition.split('/')
      self.manifesto_datestr = urlsplit[-1]
      if urlsplit[-1] == '':
        self.manifesto_datestr = urlsplit[-2]
  def get_cover_url(self):
    self._set_manifesto_index()
    url = MANIFESTO_BASEURL + 'fileadmin/archivi/in_edicola/%sprimapagina.gif' % self.manifesto_datestr
    return url
  def parse_index(self):
    self._set_manifesto_index()
    soup = self.index_to_soup(self.manifesto_index)
    feedLinks =  soup.find('div',id='accordion_inedicola').findAll('a')
    result = []
    for feed in feedLinks:
      articles = []
      feedName = feed.find('h2').string
      feedUrl = MANIFESTO_BASEURL  + feed['href']
      feedSoup = self.index_to_soup(feedUrl)
      indexRoot = feedSoup.find('div',attrs={'class':'column1'})
      for div in indexRoot.findAll('div',attrs={'class':'strumenti1_inedicola'}):
        artLink =  div.find('a')
        if artLink is None: continue # empty div
        title = artLink.string
        url = MANIFESTO_BASEURL  + artLink['href']
        description = ''
        descNode = div.find('div',attrs={'class':'text_12'})
        if descNode is not None:
          description = descNode.string
        author = ''
        authNode = div.find('div',attrs={'class':'firma'})
        if authNode is not None:
          author = authNode.string
        articleText = ''
        article = {
          'title':title,
          'url':url,
          'date': strftime('%d %B %Y'),
          'description': description,
          'content': articleText,
          'author': author
          }
        articles.append(article)
      result.append((feedName,articles))
    return result
  def extract_readable_article(self, html, url):
    bs = BeautifulSoup(html)
    col1 = bs.find('div',attrs={'class':'column1'})
    content = col1.find('div',attrs={'class':'bodytext'})
    title = bs.find(id='titolo_articolo').string
    author = col1.find('span',attrs={'class':'firma'})
    subtitle = ''
    subNode = col1.findPrevious('div',attrs={'class':'occhiello_rosso'})
    if subNode is not None:
      subtitle = subNode
    summary = ''
    sommNode = bs.find('div',attrs={'class':'sommario'})
    if sommNode is not None:
      summary = sommNode
    template = "<html><head><title>%(title)s</title></head><body><h1>%(title)s</h1><h2>%(subtitle)s</h2><h3>%(author)s</h3><div style='font-size: x-large;'>%(summary)s</div><div>%(content)s</div></body></html>"
    del(bs)
    return template % dict(title=title,subtitle=subtitle,author=author,summary=summary,content=content)
--- a/recipes/indy_star.recipe
+++ b/recipes/indy_star.recipe
@ -1,16 +1,20 @@
-from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds.recipes import BasicNewsRecipe
-class AdvancedUserRecipe1234144423(BasicNewsRecipe):
+class IndianapolisStar(BasicNewsRecipe):
 	title                 = u'Indianapolis Star'
-    oldest_article = 5
+	oldest_article        = 10
 	auto_cleanup          = True
 	language              = 'en'
 	__author__            = 'Owen Kelly'
 	max_articles_per_feed = 100
 	cover_url = u'http://www2.indystar.com/frontpage/images/today.jpg'
-    
+	feeds = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss'),
-    feeds          = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss&mime=XML'), (u'News Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss&mime=XML'), (u'Business Headlines', u'http://www..indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss&mime=XML'), (u'Sports Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=SPORTS&template=rss&mime=XML'), (u'Lifestyle Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'), (u'Opinion Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML')]
+		(u'News Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss'),
 		(u'Business Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss'),
 		(u'Politics and Government', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS05&template=rss'),
 		(u'Lifestyle Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'),
 		(u'Opinion Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML')
 		]
 	def print_version(self, url):
 		return url + '&template=printart'
--- a/recipes/izdiham.com.recipe
+++ b/recipes/izdiham.com.recipe
@ -0,0 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1324158549(BasicNewsRecipe):
    title          = u'izdiham.com'
    language = 'tr'
    __author__ = 'asalet_r'
    oldest_article = 7
    max_articles_per_feed = 20
    auto_cleanup = True
    feeds          = [(u'\u0130zdiham', u'http://www.izdiham.com/index.php/feed')]
--- a/recipes/klip_me.recipe
+++ b/recipes/klip_me.recipe
@ -0,0 +1,72 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1299694372(BasicNewsRecipe):
    title                             = u'Klipme'
    __author__                  = 'Ken Sun'
    publisher                     = 'Klip.me'
    category                      = 'info, custom, Klip.me'
    oldest_article               = 365
    max_articles_per_feed = 100
    no_stylesheets        = True
    remove_javascript     = True
    remove_tags              = [
    dict(name='div', attrs={'id':'text_controls_toggle'})
    ,dict(name='script')
    ,dict(name='div', attrs={'id':'text_controls'})
    ,dict(name='div', attrs={'id':'editing_controls'})
    ,dict(name='div', attrs={'class':'bar bottom'})
     ]
    use_embedded_content  = False
    needs_subscription    = True
    INDEX                 = u'http://www.klip.me'
    LOGIN                 = INDEX + u'/fav/signin?callback=/fav'
    feeds          = [
            (u'Klip.me unread', u'http://www.klip.me/fav'),
            (u'Klip.me started', u'http://www.klip.me/fav?s=starred')
            ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None:
            br.open(self.LOGIN)
            br.select_form(nr=0)
            br['Email'] = self.username
            if self.password is not None:
               br['Passwd'] = self.password
            br.submit()
        return br
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            for item in soup.findAll('table',attrs={'class':['item','item new']}):
                atag = item.a
                if atag and atag.has_key('href'):
                    url         = atag['href']
                    articles.append({
                                     'url'        :url
                                    })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
    def print_version(self, url):
        return 'http://www.klip.me' + url
    def populate_article_metadata(self, article, soup, first):
        article.title  = soup.find('title').contents[0].strip()
    def postprocess_html(self, soup, first_fetch):
        for link_tag in soup.findAll(attrs={"id" : "story"}):
            link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>')
            print link_tag
        return soup
--- a/recipes/kopalniawiedzy.recipe
+++ b/recipes/kopalniawiedzy.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2011, Attis <attis@attis.one.pl>'
+__copyright__ = '2011 Attis <attis@attis.one.pl>, 2012 Tomasz Długosz <tomek3d@gmail.com>'
 __version__ = 'v. 0.1'
 import re
@ -10,7 +10,7 @@ class KopalniaWiedzy(BasicNewsRecipe):
        publisher      = u'Kopalnia Wiedzy'
        description    = u'Ciekawostki ze świata nauki i techniki'
        encoding       = 'utf-8'
-		__author__     = 'Attis'
+        __author__     = 'Attis & Tomasz Długosz'
        language       = 'pl'
        oldest_article = 7
        max_articles_per_feed = 100
@ -18,9 +18,9 @@ class KopalniaWiedzy(BasicNewsRecipe):
        remove_javascript     = True
        no_stylesheets        = True
-		remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'} }, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}]
+        remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}]
        remove_tags_after = dict(attrs={'class':'ad-square'})
-		keep_only_tags    = [dict(name="div", attrs={'id':'articleContent'})]
+        keep_only_tags    = [dict(name="div", attrs={'class':'article-text text-small'})]
        extra_css      = '.topimage {margin-top: 30px}'
        preprocess_regexps = [
--- a/recipes/kurier.recipe
+++ b/recipes/kurier.recipe
@ -13,9 +13,10 @@ class Kurier(BasicNewsRecipe):
    publisher             = 'KURIER'
    category              = 'news, politics, Austria'
    oldest_article        = 2
-    max_articles_per_feed = 200
+    max_articles_per_feed = 100
    timeout               = 30
    encoding              = None
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'de_AT'
    remove_empty_feeds    = True
@ -29,9 +30,11 @@ class Kurier(BasicNewsRecipe):
                        , 'language'  : language
                        }
-    remove_tags = [dict(attrs={'class':['functionsleiste','functions','social_positionierung','contenttabs','drucken','versenden','leserbrief','kommentieren','addthis_button']})]
+    remove_tags = [ dict(attrs={'id':['artikel_expand_symbol2','imgzoom_close2']}), 
                    dict(attrs={'class':['linkextern','functionsleiste','functions','social_positionierung','contenttabs','drucken','versenden','leserbrief','kommentieren','addthis_button']})
                   ]
    keep_only_tags    = [dict(attrs={'id':'content'})]
-    remove_tags_after = dict(attrs={'id':'author'})
+    remove_tags_after = [dict(attrs={'id':'author'})]
    remove_attributes = ['width','height']
    feeds = [
@ -41,7 +44,7 @@ class Kurier(BasicNewsRecipe):
              ,(u'Kultur'     , u'http://kurier.at/rss/kultur_kultur_rss.xml'   )
              ,(u'Freizeit'   , u'http://kurier.at/rss/freizeit_freizeit_rss.xml'   )
              ,(u'Wetter'     , u'http://kurier.at/rss/oewetter_rss.xml'   )
-              ,(u'Verkehr'    , u'http://kurier.at/rss/verkehr_rss.xml'   )
+              ,(u'Sport'      , u'http://kurier.at/newsfeed/detail/sport_rss.xml'   )
            ]
    def preprocess_html(self, soup):
--- a/recipes/la_razon_bo.recipe
+++ b/recipes/la_razon_bo.recipe
@ -25,6 +25,7 @@ class LaRazon_Bol(BasicNewsRecipe):
                                img{margin-bottom: 0.4em; display: block}
                                .meta{font-size: small; font-family: Arial,Helvetica,sans-serif}
                            """
    INDEX = 'http://www.la-razon.com/'
    conversion_options = {
                          'comment'   : description
@ -60,3 +61,10 @@ class LaRazon_Bol(BasicNewsRecipe):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    def get_cover_url(self):
        soup = self.index_to_soup(self.INDEX)
        lightbox = soup.find('div', attrs = {'class' : 'lightbox lightbox-frontpage'})
        return lightbox.img['src']
--- a/recipes/la_voce.recipe
+++ b/recipes/la_voce.recipe
@ -0,0 +1,15 @@
 __license__   = 'GPL v3'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1324114228(BasicNewsRecipe):
    title          = u'La Voce'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    masthead_url            = 'http://www.lavoce.info/binary/la_voce/testata/lavoce.1184661635.gif'
    feeds          = [(u'La Voce', u'http://www.lavoce.info/feed_rss.php?id_feed=1')]
    __author__    = 'faber1971'
    description   = 'Italian website on Economy - v1.01 (17, December 2011)'
    language = 'it'
--- a/recipes/ledevoir.recipe
+++ b/recipes/ledevoir.recipe
@ -1,8 +1,8 @@
 __license__   = 'GPL v3'
-__author__    = 'Lorenzo Vigentini'
+__author__    = 'Lorenzo Vigentini and Olivier Daigle'
-__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__copyright__ = '2012, Lorenzo Vigentini <l.vigentini at gmail.com>, Olivier Daigle <odaigle _at nuvucameras __dot__ com>'
 __version__     = 'v1.01'
-__date__        = '14, January 2010'
+__date__        = '12, February 2012'
 __description__   = 'Canadian Paper '
 '''
@ -26,11 +26,15 @@ class ledevoir(BasicNewsRecipe):
    encoding       = 'utf-8'
    timefmt        = '[%a, %d %b, %Y]'
-    max_articles_per_feed = 50
+    oldest_article = 1
    max_articles_per_feed = 200
    use_embedded_content  = False
    recursion             = 10
    needs_subscription    = 'optional'
    filterDuplicates = False
    url_list = []
    remove_javascript     = True
    no_stylesheets        = True
@ -38,7 +42,7 @@ class ledevoir(BasicNewsRecipe):
    keep_only_tags  = [
                        dict(name='div', attrs={'id':'article'}),
-                        dict(name='ul', attrs={'id':'ariane'})
+                        dict(name='div', attrs={'id':'colonne_principale'})
                    ]
    remove_tags     = [
@ -51,7 +55,7 @@ class ledevoir(BasicNewsRecipe):
    feeds          = [
                       (u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
-                       (u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
+                       (u'Édition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
                       (u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
                       (u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
                       (u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
@ -61,7 +65,7 @@ class ledevoir(BasicNewsRecipe):
                       (u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
                       (u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
                       (u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
-                       (u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
+                       (u'Art de vivre', 'http://www.ledevoir.com/rss/section/art-de-vivre.xml?id=50')
                     ]
    extra_css = '''
@ -85,8 +89,16 @@ class ledevoir(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open('http://www.ledevoir.com')
-            br.select_form(nr=1)
+            br.select_form(nr=0)
-            br['login[courriel]'] = self.username
+            br['login_popup[courriel]'] = self.username
-            br['login[password]'] = self.password
+            br['login_popup[password]'] = self.password
            br.submit()
        return br
    def print_version(self, url):
        if self.filterDuplicates:
            if url in self.url_list:
                return
        self.url_list.append(url)
        return url
--- a/recipes/lega_nerd.recipe
+++ b/recipes/lega_nerd.recipe
@ -0,0 +1,14 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1326135232(BasicNewsRecipe):
    title          = u'Lega Nerd'
    description = 'nerd / geek culture, pc, comics, music, culture'
    language = 'it'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Lega Nerd', u'http://feeds.feedburner.com/LegaNerd')]
    __author__      = 'faber1971'
 __version__     = 'v1.0'
 __date__        = '9, January 2011'
--- a/recipes/liberation_sub.recipe
+++ b/recipes/liberation_sub.recipe
@ -0,0 +1,103 @@
 #!/usr/bin/env  python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2012, Rémi Vanicat <vanicat at debian.org>'
 '''
 liberation.fr
 '''
 # The cleanning is from the Liberation recipe, by Darko Miletic
 from calibre.web.feeds.news import BasicNewsRecipe
 class Liberation(BasicNewsRecipe):
    title                 = u'Libération: Édition abonnés'
    __author__            = 'Rémi Vanicat'
    description           = u'Actualités'
    category              = 'Actualités, France, Monde'
    language              = 'fr'
    needs_subscription    = True
    use_embedded_content   = False
    no_stylesheets         = True
    remove_empty_feeds     = True
    extra_css = '''
                    h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
                    h4, h5, h2.rubrique,  {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .mna-body, entry-body  {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    keep_only_tags    = [
                  dict(name='div', attrs={'class':'article'})
                  ,dict(name='div', attrs={'class':'text-article m-bot-s1'})
                  ,dict(name='div', attrs={'class':'entry'})
                  ,dict(name='div', attrs={'class':'col_contenu'})
    ]
    remove_tags_after = [
        dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']})
        ,dict(name='p',attrs={'class':['chapo']})
        ,dict(id='_twitter_facebook')
    ]
    remove_tags    = [
                        dict(name='iframe')
                        ,dict(name='a', attrs={'class':'lnk-comments'})
                        ,dict(name='div', attrs={'class':'toolbox'})
                        ,dict(name='ul', attrs={'class':'share-box'})
                        ,dict(name='ul', attrs={'class':'tool-box'})
                        ,dict(name='ul', attrs={'class':'rub'})
                        ,dict(name='p',attrs={'class':['chapo']})
                        ,dict(name='p',attrs={'class':['tag']})
                        ,dict(name='div',attrs={'class':['blokLies']})
                        ,dict(name='div',attrs={'class':['alire']})
                        ,dict(id='_twitter_facebook')
                     ]
    index           = 'http://www.liberation.fr/abonnes/'
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open('http://www.liberation.fr/jogger/login/')
            br.select_form(nr=0)
            br['email']    = self.username
            br['password'] = self.password
            br.submit()
        return br
    def parse_index(self):
        soup=self.index_to_soup(self.index)
        content = soup.find('div', { 'class':'block-content' })
        articles = []
        cat_articles = []
        for tag in content.findAll(recursive=False):
            if(tag['class']=='headrest headrest-basic-rounded'):
                cat_articles = []
                articles.append((tag.find('h5').contents[0],cat_articles))
            else:
                title = tag.find('h3').contents[0]
                url = tag.find('a')['href']
                print(url)
                descripion = tag.find('p',{ 'class':'subtitle' }).contents[0]
                article = {
                    'title': title,
                    'url': url,
                    'descripion': descripion,
                    'content': ''
                    }
                cat_articles.append(article)
        return articles
 # Local Variables:
 # mode: python
 # End:
--- a/recipes/livemint.recipe
+++ b/recipes/livemint.recipe
@ -1,41 +1,26 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.livemint.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class LiveMint(BasicNewsRecipe):
-    title                 = u'Livemint'
+    title          = u'Live Mint'
-    __author__            = 'Darko Miletic'
+    language       = 'en_IN'
-    description           = 'The Wall Street Journal'
+    __author__     = 'Krittika Goyal'
-    publisher             = 'The Wall Street Journal'
+    #encoding = 'cp1252'
-    category              = 'news, games, adventure, technology'
+    oldest_article = 1 #days
-    language = 'en'
+    max_articles_per_feed = 25
    use_embedded_content = True
    oldest_article        = 15
    max_articles_per_feed = 100
    no_stylesheets = True
-    encoding              = 'utf-8'
+    auto_cleanup = True
    use_embedded_content  = False
    extra_css             = ' #dvArtheadline{font-size: x-large} #dvArtAbstract{font-size: large} '
    keep_only_tags = [dict(name='div', attrs={'class':'innercontent'})]
-    remove_tags = [dict(name=['object','link','embed','form','iframe'])]
+    feeds          = [
 ('Latest News',
 'http://www.livemint.com/StoryRss.aspx?LN=Latestnews'),
 ('Gallery',
 'http://www.livemint.com/GalleryRssfeed.aspx'),
 ('Top Stories',
 'http://www.livemint.com/StoryRss.aspx?ts=Topstories'),
 ('Banking',
 'http://www.livemint.com/StoryRss.aspx?Id=104'),
 ]
    feeds = [(u'Articles', u'http://www.livemint.com/SectionRssfeed.aspx?Mid=1')]
    def print_version(self, url):
        link = url
        msoup = self.index_to_soup(link)
        mlink = msoup.find(attrs={'id':'ctl00_bodyplaceholdercontent_cntlArtTool_printUrl'})
        if mlink:
           link = 'http://www.livemint.com/Articles/' + mlink['href'].rpartition('/Articles/')[2]
        return link
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/living_stones.recipe
+++ b/recipes/living_stones.recipe
@ -0,0 +1,25 @@
 __license__   = 'GPL v3'
 __copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class LivingStonesPastorsBlog(BasicNewsRecipe):
    title          = u'Living Stones Pastors Blog'
    __author__ = 'Peter Grungi'
    language = 'en'
    oldest_article = 90
    max_articles_per_feed = 10
    auto_cleanup = True
    cover_url = 'http://blogs.livingstonesreno.com/wp-content/uploads/2011/08/blogBGRD_norepeat.jpg'
    masthead_url = 'http://www.livingstonesreno.com/podcast/LSpodcastnew.jpg'
    publisher = 'Living Stones Church of Reno, NV'
    language = 'en'
    author = 'Living Stones Church of Reno, NV'
    feeds          = [(u'LS Blog', u'http://blogs.livingstonesreno.com/feed?utm_source=calibre&utm_medium=rss')]
    def full_version(self, url):
        import re
        newurl = re.sub(r'\?.*','',url)
        return newurl
--- a/recipes/lwn_weekly.recipe
+++ b/recipes/lwn_weekly.recipe
@ -14,8 +14,11 @@ class WeeklyLWN(BasicNewsRecipe):
    description = 'Weekly summary of what has happened in the free software world.'
    __author__ = 'Davide Cavalca'
    language = 'en'
    site_url = 'http://lwn.net'
-    cover_url = 'http://lwn.net/images/lcorner.png'
+    extra_css = 'pre,code,samp,kbd,tt { font-size: 80% }\nblockquote {margin-left:0 }\n* { color: black }\n'
    cover_url = site_url + '/images/lcorner.png'
    #masthead_url = 'http://lwn.net/images/lcorner.png'
    publication_type = 'magazine'
@ -43,11 +46,29 @@ class WeeklyLWN(BasicNewsRecipe):
            br.submit()
        return br
    def print_version(self, url):
        # Strip off anchor
        url = url.split('#')[0]
        # Prepend site_url
        if url[0:len(self.site_url)] != self.site_url:
            url = self.site_url + url
        # Append printable URL parameter
        print_param = '?format=printable'
        if url[-len(print_param):] != print_param:
            url += print_param
        #import sys
        #print >>sys.stderr, "*** print_version(url):", url
        return url
    def parse_index(self):
        if self.username is not None and self.password is not None:
-            index_url = 'http://lwn.net/current/bigpage?format=printable'
+            index_url = self.print_version('/current/bigpage')
        else:
-            index_url = 'http://lwn.net/free/bigpage?format=printable'
+            index_url = self.print_version('/free/bigpage')
        soup = self.index_to_soup(index_url)
        body = soup.body
@ -56,19 +77,19 @@ class WeeklyLWN(BasicNewsRecipe):
        url_re = re.compile('^/Articles/')
        while True:
-            tag_title = body.findNext(name='p', attrs={'class':'SummaryHL'})
+            tag_title = body.findNext(attrs={'class':'SummaryHL'})
            if tag_title == None:
                break
-            tag_section = tag_title.findPrevious(name='p', attrs={'class':'Cat1HL'})
+            tag_section = tag_title.findPrevious(attrs={'class':'Cat1HL'})
            if tag_section == None:
                section = 'Front Page'
            else:
                section = tag_section.string
-            tag_section2 = tag_title.findPrevious(name='p', attrs={'class':'Cat2HL'})
+            tag_section2 = tag_title.findPrevious(attrs={'class':'Cat2HL'})
            if tag_section2 != None:
-                if tag_section2.findPrevious(name='p', attrs={'class':'Cat1HL'}) == tag_section:
+                if tag_section2.findPrevious(attrs={'class':'Cat1HL'}) == tag_section:
                    section = "%s: %s" %(section, tag_section2.string)
            if section not in articles.keys():
@ -94,9 +115,10 @@ class WeeklyLWN(BasicNewsRecipe):
            if tag_url == None:
                break
            article = dict(
                title=self.tag_to_string(tag_title),
-                url= 'http://lwn.net' + tag_url['href'].split('#')[0] + '?format=printable',
+                url=tag_url['href'],
                description='', content='', date='')
            articles[section].append(article)
--- a/recipes/macity.recipe
+++ b/recipes/macity.recipe
@ -0,0 +1,23 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1325766771(BasicNewsRecipe):
    title          = u'Macity'
    language = 'it'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    def get_article_url(self, article):
        link = BasicNewsRecipe.get_article_url(self, article)
        if link.split('/')[-1]=="story01.htm":
            link=link.split('/')[-2]
            a=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'L'      , 'N'   , 'S'   ]
            b=['0', '.', '/', '?', '-', '=', '&', '_', 'http://', '.com', 'www.']
            for i in range(0,len(a)):
                link=link.replace('0'+a[-i],b[-i])
        return link
    feeds          = [(u'Macity', u'http://www.macitynet.it.feedsportal.com/c/33714/f/599513/index.rss')]
    __author__      = 'faber1971'
    description = 'Apple and hi-tech news'
--- a/recipes/marketing_magazine.recipe
+++ b/recipes/marketing_magazine.recipe
@ -0,0 +1,16 @@
 __license__   = 'GPL v3'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1327062445(BasicNewsRecipe):
    title          = u'Marketing Magazine'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    remove_javascript = True
    masthead_url            = 'http://www.simrendeogun.com/wp-content/uploads/2011/06/New-Marketing-Magazine-Logo.jpg'
    feeds          = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]
    __author__    = 'faber1971'
    description   = 'Collection of Italian marketing websites - v1.00 (28, January 2012)'
    language = 'it'
--- a/recipes/metro_news_nl.recipe
+++ b/recipes/metro_news_nl.recipe
@ -38,18 +38,23 @@ except:
    removed keep_only tags
 Version 1.8 26-11-2022
   added remove tag: article-slideshow
 Version 1.9 31-1-2012
   removed some left debug settings
      extended timeout from 2 to 10
      changed oldest article from 10 to 1.2
      changed max articles from 15 to 25
 '''
 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title = u'Metro Nieuws NL'
-    oldest_article = 10
+    oldest_article = 1.2
-    max_articles_per_feed = 15
+    max_articles_per_feed = 25
    __author__     = u'DrMerry'
    description    = u'Metro Nederland'
    language       = u'nl'
-    simultaneous_downloads = 5
+    simultaneous_downloads = 3
    masthead_url = 'http://blog.metronieuws.nl/wp-content/themes/metro/images/header.gif'
-    timeout = 2
+    timeout = 10
    center_navbar  = True
    timefmt        = ' [%A, %d %b %Y]'
    no_stylesheets = True
--- a/recipes/microwave_and_rf.recipe
+++ b/recipes/microwave_and_rf.recipe
@ -0,0 +1,217 @@
 #!/usr/bin/env  python
 ##
 ## Title:        Microwave and RF
 ##
 ## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
 # Feb 2012: Initial release
 __license__   = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
 '''
 mwrf.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.utils.magick import Image
 class Microwave_and_RF(BasicNewsRecipe):
    Convert_Grayscale = False # Convert images to gray scale or not
    # Add sections that want to be excluded from the magazine
    exclude_sections = []
    # Add sections that want to be included from the magazine
    include_sections = []
    title            = u'Microwave and RF'
    __author__       = 'kiavash'
    description      = u'Microwave and RF Montly Magazine'
    publisher        = 'Penton Media, Inc.'
    publication_type = 'magazine'
    site             = 'http://mwrf.com'
    language = 'en'
    asciiize = True
    timeout = 120
    simultaneous_downloads = 1 # very peaky site!
    # Main article is inside this tag
    keep_only_tags = [dict(name='table', attrs={'id':'prtContent'})]
    no_stylesheets = True
    remove_javascript = True
    # Flattens all the tables to make it compatible with Nook
    conversion_options = {'linearize_tables' : True}
    remove_tags    = [
                        dict(name='span', attrs={'class':'body12'}),
                     ]
    remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
                          'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
    # Specify extra CSS - overrides ALL other CSS (IE. Added last).
    extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
                 .introduction, .first { font-weight: bold; } \
                 .cross-head { font-weight: bold; font-size: 125%; } \
                 .cap, .caption { display: block; font-size: 80%; font-style: italic; } \
                 .cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
                 .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
                    .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
                    font-size: 80%; font-style: italic; margin: 1px auto; } \
                 .story-date, .published { font-size: 80%; } \
                 table { width: 100%; } \
                 td img { display: block; margin: 5px auto; } \
                 ul { padding-top: 10px; } \
                 ol { padding-top: 10px; } \
                 li { padding-top: 5px; padding-bottom: 5px; } \
                 h1 { font-size: 175%; font-weight: bold; } \
                 h2 { font-size: 150%; font-weight: bold; } \
                 h3 { font-size: 125%; font-weight: bold; } \
                 h4, h5, h6 { font-size: 100%; font-weight: bold; }'
    # Remove the line breaks and float left/right and picture width/height.
    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'float:.*?'), lambda m: ''),
                              (re.compile(r'width:.*?px'), lambda m: ''),
                              (re.compile(r'height:.*?px'), lambda m: '')
                              ]
    def print_version(self, url):
        url = re.sub(r'.html', '', url)
        url = re.sub('/ArticleID/.*?/', '/Print.cfm?ArticleID=', url)
        return url
    # Need to change the user agent to avoid potential download errors
    def get_browser(self, *args, **kwargs):
       from calibre import browser
       kwargs['user_agent'] = 'Mozilla/5.0 (Windows NT 5.1; rv:10.0) Gecko/20100101 Firefox/10.0'
       return browser(*args, **kwargs)
    def parse_index(self):
        # Fetches the main page of Microwave and RF
        soup = self.index_to_soup(self.site)
        # Searches the site for Issue ID link then returns the href address
        # pointing to the latest issue
        latest_issue = soup.find('a', attrs={'href':lambda x: x and 'IssueID' in x}).get('href')
        # Fetches the index page for of the latest issue
        soup = self.index_to_soup(latest_issue)
        # Finds the main section of the page containing cover, issue date and
        # TOC
        ts = soup.find('div', attrs={'id':'columnContainer'})
        # Finds the issue date
        ds = ' '.join(self.tag_to_string(ts.find('span', attrs={'class':'CurrentIssueSectionHead'})).strip().split()[-2:]).capitalize()
        self.log('Found Current Issue:', ds)
        self.timefmt = ' [%s]'%ds
        # Finds the cover image
        cover = ts.find('img', src = lambda x: x and 'Cover' in x)
        if cover is not None:
            self.cover_url = self.site + cover['src']
            self.log('Found Cover image:', self.cover_url)
        feeds = []
        article_info = []
        # Finds all the articles (tiles and links)
        articles = ts.findAll('a', attrs={'class':'commonArticleTitle'})
        # Finds all the descriptions
        descriptions = ts.findAll('span', attrs={'class':'commonCopy'})
        # Find all the sections
        sections = ts.findAll('span', attrs={'class':'kicker'})
        title_number = 0
        # Goes thru all the articles one by one and sort them out
        for section in sections:
            title_number = title_number + 1
            # Removes the unwanted sections
            if self.tag_to_string(section) in self.exclude_sections:
                continue
            # Only includes the wanted sections
            if self.include_sections:
                if self.tag_to_string(section) not in self.include_sections:
                    continue
            title = self.tag_to_string(articles[title_number])
            url = articles[title_number].get('href')
            if url.startswith('/'):
                    url = self.site + url
            self.log('\tFound article:', title, 'at', url)
            desc = self.tag_to_string(descriptions[title_number])
            self.log('\t\t', desc)
            article_info.append({'title':title, 'url':url, 'description':desc,
                                'date':self.timefmt})
        if article_info:
            feeds.append((self.title, article_info))
        #self.log(feeds)
        return feeds
    def postprocess_html(self, soup, first):
        if self.Convert_Grayscale:
            #process all the images
            for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
                iurl = tag['src']
                img = Image()
                img.open(iurl)
                if img < 0:
                    raise RuntimeError('Out of memory')
                img.type = "GrayscaleType"
                img.save(iurl)
        return soup
    def preprocess_html(self, soup):
        # Includes all the figures inside the final ebook
        # Finds all the jpg links
        for figure in soup.findAll('a', attrs = {'href' : lambda x: x and 'jpg' in x}):
            # makes sure that the link points to the absolute web address
            if figure['href'].startswith('/'):
                figure['href'] = self.site + figure['href']
            figure.name = 'img' # converts the links to img
            figure['src'] = figure['href'] # with the same address as href
            figure['style'] = 'display:block' # adds /n before and after the image
            del figure['href']
            del figure['target']
        # Makes the title standing out
        for title in soup.findAll('a', attrs = {'class': 'commonSectionTitle'}):
            title.name = 'h1'
            del title['href']
            del title['target']
        # Makes the section name more visible
        for section_name in soup.findAll('a', attrs = {'class': 'kicker2'}):
            section_name.name = 'h5'
            del section_name['href']
            del section_name['target']
        # Removes all unrelated links
        for link in soup.findAll('a', attrs = {'href': True}):
            link.name = 'font'
            del link['href']
            del link['target']
        return soup
--- a/recipes/money_pl.recipe
+++ b/recipes/money_pl.recipe
@ -0,0 +1,76 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class FocusRecipe(BasicNewsRecipe):
    __license__ = 'GPL v3'
    __author__ = u'intromatyk <intromatyk@gmail.com>'
    language = 'pl'
    version = 1
    title = u'Money.pl'
    category = u'News'
    description = u'Informacje finansowe z kraju i ze świata. Aktualne i archiwalne: notowania giełdowe, kursy walut, wskaźniki gospodarcze.'
    remove_empty_feeds= True
    no_stylesheets=True
    oldest_article = 1
    max_articles_per_feed = 100000
    recursions = 0
    no_stylesheets = True
    remove_javascript = True
    simultaneous_downloads = 2
    r = re.compile('.*(?P<url>http:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
    keep_only_tags =[]
    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'artykul'}))
    remove_tags    = [dict(name='ul', attrs={'class':'socialStuff'})]
    extra_css = '''
                    body {font-family: Arial,Helvetica,sans-serif ;}
                    h1{text-align: left;}
                    h2{font-size: medium; font-weight: bold;}
                    p.lead {font-weight: bold; text-align: left;}
                    .authordate {font-size: small; color: #696969;}
                    .fot{font-size: x-small; color: #666666;}
                    '''
    feeds          = [
                            ('Wiadomosci z kraju', 'http://money.pl.feedsportal.com/c/33900/f/612847/index.rss'),
                            ('Wiadomosci ze swiata', 'http://money.pl.feedsportal.com/c/33900/f/612848/index.rss'),
                            ('Gospodarka', 'http://money.pl.feedsportal.com/c/33900/f/612849/index.rss'),
                            ('Waluty', 'http://money.pl.feedsportal.com/c/33900/f/612850/index.rss'),
                            ('Gielda', 'http://money.pl.feedsportal.com/c/33900/f/612851/index.rss'),
                            ('Banki', 'http://money.pl.feedsportal.com/c/33900/f/612852/index.rss'),
                            ('Fundusze', 'http://money.pl.feedsportal.com/c/33900/f/612853/index.rss'),
                            ('Emerytury', 'http://money.pl.feedsportal.com/c/33900/f/612854/index.rss'),
                            ('Podatki', 'http://money.pl.feedsportal.com/c/33900/f/612855/index.rss'),
                            ('Ubezpieczenia', 'http://money.pl.feedsportal.com/c/33900/f/612856/index.rss'),
                            ('Poradniki', 'http://money.pl.feedsportal.com/c/33900/f/612857/index.rss'),
                            ('Raporty', 'http://money.pl.feedsportal.com/c/33900/f/612858/index.rss'),
                            ('Motoryzacja', 'http://money.pl.feedsportal.com/c/33900/f/612859/index.rss'),
                            ('Manager', 'http://money.pl.feedsportal.com/c/33900/f/612860/index.rss'),
                            ('Dla firm', 'http://money.pl.feedsportal.com/c/33900/f/612861/index.rss'),
                            ('Prawo', 'http://money.pl.feedsportal.com/c/33900/f/612862/index.rss'),
                            ('Nieruchomosci', 'http://money.pl.feedsportal.com/c/33900/f/612863/index.rss'),
                            ('Praca', 'http://money.pl.feedsportal.com/c/33900/f/612864/index.rss'),
                           ]
    def print_version(self, url):
     if url.count ('money.pl.feedsportal.com'):
            u = url.find('0Cartykul0C')
            u = 'http://www.m.money.pl/wiadomosci/artykul/' + url[u + 21:]
            u = u.replace('0C', '/')
            u = u.replace('A', '')
            u = u.replace ('0E','-')
            u = u.replace ('0P',';')
            u = u.replace ('0H',',')
            u = u.replace ('0B','.')
            u = u.replace (',0,',',-1,')
            u = u.replace('0Tutm0Isource0Frss0Gutm0Imedium0Frss0Gutm0Icampaign0Frss/story01.htm', '')
     else:
            u = url.replace('/nc/1','/do-druku/1')
     return u
--- a/recipes/montreal_gazette.recipe
+++ b/recipes/montreal_gazette.recipe
@ -1,4 +1,5 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
@ -6,15 +7,72 @@ __license__   = 'GPL v3'
 www.canada.com
 '''
-from calibre.web.feeds.recipes import BasicNewsRecipe
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 class CanWestPaper(BasicNewsRecipe):
-    # un-comment the following three lines for the Montreal Gazette
+    # un-comment the following four lines for the Victoria Times Colonist
 ##    title = u'Victoria Times Colonist'
 ##    url_prefix = 'http://www.timescolonist.com'
 ##    description = u'News from Victoria, BC'
 ##    fp_tag = 'CAN_TC'
    # un-comment the following four lines for the Vancouver Province
 ##    title = u'Vancouver Province'
 ##    url_prefix = 'http://www.theprovince.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VP'
    # un-comment the following four lines for the Vancouver Sun
 ##    title = u'Vancouver Sun'
 ##    url_prefix = 'http://www.vancouversun.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VS'
    # un-comment the following four lines for the Edmonton Journal
 ##    title = u'Edmonton Journal'
 ##    url_prefix = 'http://www.edmontonjournal.com'
 ##    description = u'News from Edmonton, AB'
 ##    fp_tag = 'CAN_EJ'
    # un-comment the following four lines for the Calgary Herald
 ##    title = u'Calgary Herald'
 ##    url_prefix = 'http://www.calgaryherald.com'
 ##    description = u'News from Calgary, AB'
 ##    fp_tag = 'CAN_CH'
    # un-comment the following four lines for the Regina Leader-Post
 ##    title = u'Regina Leader-Post'
 ##    url_prefix = 'http://www.leaderpost.com'
 ##    description = u'News from Regina, SK'
 ##    fp_tag = ''
    # un-comment the following four lines for the Saskatoon Star-Phoenix
 ##    title = u'Saskatoon Star-Phoenix'
 ##    url_prefix = 'http://www.thestarphoenix.com'
 ##    description = u'News from Saskatoon, SK'
 ##    fp_tag = ''
    # un-comment the following four lines for the Windsor Star
 ##    title = u'Windsor Star'
 ##    url_prefix = 'http://www.windsorstar.com'
 ##    description = u'News from Windsor, ON'
 ##    fp_tag = 'CAN_'
    # un-comment the following four lines for the Ottawa Citizen
 ##    title = u'Ottawa Citizen'
 ##    url_prefix = 'http://www.ottawacitizen.com'
 ##    description = u'News from Ottawa, ON'
 ##    fp_tag = 'CAN_OC'
    # un-comment the following four lines for the Montreal Gazette
    title = u'Montreal Gazette'
    url_prefix = 'http://www.montrealgazette.com'
    description = u'News from Montreal, QC'
    fp_tag = 'CAN_MG'
    language = 'en_CA'
@ -38,14 +96,81 @@ class CanWestPaper(BasicNewsRecipe):
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
-    def preprocess_html(self,soup):
+
-        #delete iempty id attributes--they screw up the TOC for unknow reasons
+    def get_cover_url(self):
-        divtags = soup.findAll('div',attrs={'id':''})
+        from datetime import timedelta, date
-        if divtags:
+        if self.fp_tag=='':
-            for div in divtags:
+            return None
-                del(div['id'])
+        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
        br = BasicNewsRecipe.get_browser()
        daysback=1
        try:
            br.open(cover)
        except:
            while daysback<7:
                cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
                br = BasicNewsRecipe.get_browser()
                try:
                    br.open(cover)
                except:
                    daysback = daysback+1
                    continue
                break
        if daysback==7:
            self.log("\nCover unavailable")
            cover = None
        return cover
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
        # Replace rsquo (\x92)
        fixed = re.sub("\x92","’",fixed)
        # Replace ldquo (\x93)
        fixed = re.sub("\x93","“",fixed)
        # Replace rdquo (\x94)
        fixed = re.sub("\x94","”",fixed)
        # Replace ndash (\x96)
        fixed = re.sub("\x96","–",fixed)
        # Replace mdash (\x97)
        fixed = re.sub("\x97","—",fixed)
        fixed = re.sub("&#x2019;","’",fixed)
        return fixed
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&'
            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
    def populate_article_metadata(self, article, soup, first):
        if first:
            picdiv = soup.find('body').find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
        xtitle = article.text_summary.strip()
        if len(xtitle) == 0:
            desc = soup.find('meta',attrs={'property':'og:description'})
            if desc is not None:
                article.summary = article.text_summary = desc['content']
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
    def preprocess_html(self, soup):
        return self.strip_anchors(soup)
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
--- a/recipes/mumbai_mirror.recipe
+++ b/recipes/mumbai_mirror.recipe
@ -0,0 +1,59 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class MumbaiMirror(BasicNewsRecipe):
    title          = u'Mumbai Mirror'
    oldest_article = 2
    max_articles_per_feed = 100
    __author__            = 'Krittika Goyal'
    description           = 'People Daily Newspaper'
    language = 'en_IN'
    category              = 'News, Mumbai, India'
    remove_javascript = True
    use_embedded_content   = False
    auto_cleanup = True
    no_stylesheets = True
    #encoding               = 'GB2312'
    conversion_options = {'linearize_tables':True}
    feeds          = [
 ('Cover Story',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=latest'),
 ('City Diary',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=citydiary'),
 ('Columnists',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=mmcolumnists'),
 ('Mumbai, The City',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=city'),
 ('Nation',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=nation'),
 ('Top Stories',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=topstories'),
 ('Business',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=business'),
 ('World',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=world'),
 (' Chai Time',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=chaitime'),
 ('Technology',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=technology'),
 ('Entertainment',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=entertainment'),
 ('Style',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=style'),
 ('Ask the Sexpert',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=askthesexpert'),
 ('Television',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=television'),
 ('Lifestyle',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=lifestyle'),
 ('Sports',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=sports'),
 ('Travel: Travelers Diary',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=travellersdiaries'),
 ('Travel: Domestic',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=traveldomestic'),
 ('Travel: International',
 'http://www.mumbaimirror.com/rssfeeds.aspx?feed=travelinternational')
 ]
--- a/recipes/mwjournal.recipe
+++ b/recipes/mwjournal.recipe
@ -0,0 +1,141 @@
 #!/usr/bin/env  python
 ##
 ## Title:        Microwave Journal
 ## Contact:      Kiavash (use Mobile Read)
 ##
 ## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
 ## Copyright:    Kiavash
 ##
 ## Written:      Jan 2012
 ## Last Edited:  Feb 2012
 ##
 # Feb 2012: New Recipe compatible with the MWJournal 2.0 website
 __license__   = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
 __copyright__   = 'Kiavash'
 __author__ = 'Kaivash'
 '''
 microwavejournal.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.utils.magick import Image
 class MWJournal(BasicNewsRecipe):
    title            = u'Microwave Journal'
    description      = u'Microwave Journal Monthly Magazine'
    publisher        = 'Horizon House'
    publication_type = 'magazine'
    INDEX            = 'http://www.microwavejournal.com/publications/'
    language = 'en'
    timeout = 30
    Convert_Grayscale = False # Convert images to gray scale or not
    keep_only_tags = [dict(name='div', attrs={'class':'record'})]
    no_stylesheets = True
    remove_javascript = True
    remove_tags    = [
                        dict(name='font', attrs={'class':'footer'}),    # remove fonts
                     ]
    remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
                          'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
    # Specify extra CSS - overrides ALL other CSS (IE. Added last).
    extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
                 .introduction, .first { font-weight: bold; } \
                 .cross-head { font-weight: bold; font-size: 125%; } \
                 .cap, .caption { display: block; font-size: 80%; font-style: italic; } \
                 .cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
                 .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
                    .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
                    font-size: 80%; font-style: italic; margin: 1px auto; } \
                 .story-date, .published { font-size: 80%; } \
                 table { width: 100%; } \
                 td img { display: block; margin: 5px auto; } \
                 ul { padding-top: 10px; } \
                 ol { padding-top: 10px; } \
                 li { padding-top: 5px; padding-bottom: 5px; } \
                 h1 { font-size: 175%; font-weight: bold; } \
                 h2 { font-size: 150%; font-weight: bold; } \
                 h3 { font-size: 125%; font-weight: bold; } \
                 h4, h5, h6 { font-size: 100%; font-weight: bold; }'
    # Remove the line breaks, href links and float left/right and picture width/height.
    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'<a.*?>'), lambda h1: ''),
                              (re.compile(r'</a>'), lambda h2: ''),
                              (re.compile(r'float:.*?'), lambda h3: ''),
                              (re.compile(r'width:.*?px'), lambda h4: ''),
                              (re.compile(r'height:.*?px'), lambda h5: '')
                              ]
    def print_version(self, url):
        return url.replace('/articles/', '/articles/print/')
    def parse_index(self):
        articles = []
        soup = self.index_to_soup(self.INDEX)
        ts = soup.find('div', attrs={'class':'box1 article publications-show'})
        ds = self.tag_to_string(ts.find('h2'))
        self.log('Found Current Issue:', ds)
        self.timefmt = ' [%s]'%ds
        cover = ts.find('img', src=True)
        if cover is not None:
            self.cover_url = 'http://www.microwavejournal.com' + cover['src']
            self.log('Found Cover image:', self.cover_url)
        feeds = []
        seen_titles = set([]) # This is used to remove duplicant articles
        sections = soup.find('div', attrs={'class':'box2 publication'})
        for section in sections.findAll('div', attrs={'class':'records'}):
            section_title = self.tag_to_string(section.find('h3'))
            self.log('Found section:', section_title)
            articles = []
            for post in section.findAll('div', attrs={'class':'record'}):
                h = post.find('h2')
                title = self.tag_to_string(h)
                if title.find('The MWJ Puzzler') >=0: #Let's get rid of the useless Puzzler!
                    continue
                if title in seen_titles:
                    continue
                seen_titles.add(title)
                a = post.find('a', href=True)
                url = a['href']
                if url.startswith('/'):
                    url = 'http://www.microwavejournal.com'+url
                abstract = post.find('div', attrs={'class':'abstract'})
                p = abstract.find('p')
                desc = None
                self.log('\tFound article:', title, 'at', url)
                if p is not None:
                    desc = self.tag_to_string(p)
                    self.log('\t\t', desc)
                articles.append({'title':title, 'url':url, 'description':desc,
                    'date':self.timefmt})
            if articles:
                feeds.append((section_title, articles))
        return feeds
    def postprocess_html(self, soup, first):
        if self.Convert_Grayscale:
            #process all the images
            for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
                iurl = tag['src']
                img = Image()
                img.open(iurl)
                if img < 0:
                    raise RuntimeError('Out of memory')
                img.type = "GrayscaleType"
                img.save(iurl)
        return soup
--- a/recipes/new_scientist.recipe
+++ b/recipes/new_scientist.recipe
@ -1,16 +1,35 @@
-__license__   = 'GPL v3'
+##
-__copyright__ = '2008-2010, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
+## Title:        Microwave Journal RSS recipe
 ## Contact:      AprilHare, Darko Miletic <darko.miletic at gmail.com>
 ##
 ## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
 ## Copyright:    2008-2010, AprilHare, Darko Miletic <darko.miletic at gmail.com>
 ##
 ## Written:      2008
 ## Last Edited:  Jan 2012
 ##
 '''
 01-19-2012: Added GrayScale Image conversion and Duplicant article removals
 '''
 __license__   = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
 __copyright__ = '2008-2012, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
 __version__     = 'v0.5.0'
 __date__        = '2012-01-19'
 __author__      = 'Darko Miletic'
 '''
 newscientist.com
 '''
 import re
 import urllib
 from calibre.utils.magick import Image
 from calibre.web.feeds.news import BasicNewsRecipe
 class NewScientist(BasicNewsRecipe):
    title                 = 'New Scientist - Online News w. subscription'
    __author__            = 'Darko Miletic'
    description           = 'Science news and science articles from New Scientist.'
    language              = 'en'
    publisher             = 'Reed Business Information Ltd.'
@ -39,6 +58,15 @@ class NewScientist(BasicNewsRecipe):
    keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})]
    # Whether to omit duplicates of articles (typically arsing when articles are indexed in
    # more than one section). If True, only the first occurance will be downloaded.
    filterDuplicates = True
    # Whether to convert images to grayscale for eInk readers.
    Convert_Grayscale = False
    url_list = []   # This list is used to check if an article had already been included.
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.open('http://www.newscientist.com/')
@ -80,6 +108,10 @@ class NewScientist(BasicNewsRecipe):
        return article.get('guid',  None)
    def print_version(self, url):
        if self.filterDuplicates:
            if url in self.url_list:
                return
        self.url_list.append(url)
        return url + '?full=true&print=true'
    def preprocess_html(self, soup):
@ -101,3 +133,16 @@ class NewScientist(BasicNewsRecipe):
                   tg.replaceWith(tstr)
        return soup
    # Converts images to Gray Scale
    def postprocess_html(self, soup, first):
        if self.Convert_Grayscale:
            #process all the images
            for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
                iurl = tag['src']
                img = Image()
                img.open(iurl)
                if img < 0:
                    raise RuntimeError('Out of memory')
                img.type = "GrayscaleType"
                img.save(iurl)
        return soup
--- a/recipes/njp.recipe
+++ b/recipes/njp.recipe
@ -1,14 +1,25 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
-__license__     = 'GPL v3'
+##
 ## Title:        New Journal of Physics
 ## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
 ## Copyright:    Chema Cort\xe9s
 ##
 ## Written:      Jan 2011
 ## Last Edited:  Jan 2012 - by Kiavash
 ##
 __license__   = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
 __copyright__   = u'Chema Cort\xe9s - 2011-01-05'
-__version__     = 'v0.01'
+__version__     = 'v0.5.0'
-__date__        = '2011-01-05'
+__date__        = '2012-01-13'
 '''
 njp.org
 '''
 import re	# Import the regular expressions module.
 from calibre.web.feeds.news import BasicNewsRecipe
 class NewJournalOfPhysics(BasicNewsRecipe):
@ -19,14 +30,60 @@ class NewJournalOfPhysics(BasicNewsRecipe):
    category       = 'physics, journal, science'
    language       = 'en'
    oldest_article = 30
    max_articles_per_feed = 100
    keep_only_tags = [dict(id=['fulltextContainer'])]
    no_stylesheets=True
    use_embedded_content=False
    feeds          = [(u'Latest Papers', u'http://iopscience.iop.org/1367-2630/?rss=1')]
    cover_url      = 'http://images.iop.org/journals_icons/Info/1367-2630/cover.gif'
    oldest_article = 7
    max_articles_per_feed = 30
    timeout = 30
    no_stylesheets = True
    use_embedded_content = False
    remove_javascript = True
    remove_empty_feeds = True
    asciiize = True	# Converts all none ascii characters to their ascii equivalents
    keep_only_tags = [
                        dict(id=['articleEvoContainer']),
                     ]
    remove_tags    = [
                        dict(name='div', attrs={'class':'affiliations'}), # Removes Shoow Affiliations
                        dict(name='div', attrs={'class':'abst-icon-links'}), # Removes Tags and PDF export
                        dict(name='p', attrs={'class':'studyimage'}),	# remove Studay image
                        dict(name='a', attrs={'class':'icon powerpoint'}),	# remove Export to PowerPoint Slide
                        dict(name='a', attrs={'title':'CrossRef'}),	# remove CrossRef icon
                        dict(name='a', attrs={'title':'PubMed'}),	# remove PubMed icon
                        dict(name='a', attrs={'e4f5426941':'true'}),	# remove cross ref image
                        dict(name='img', attrs={'src':''}),	# remove empty image
                        dict(name='a', attrs={'class':'closeChap'}),	# remove 'Close'
                        dict(name='ul', attrs={'class':'breadcrumbs'}),	# remove Top breadcrumbs
                     ]
    extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
                 .introduction, .first { font-weight: bold; } \
                 .cross-head { font-weight: bold; font-size: 125%; } \
                 .cap, .caption { display: block; font-size: 80%; font-style: italic; } \
                 .cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
                 .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
                    .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
                    font-size: 80%; font-style: italic; margin: 1px auto; } \
                 .story-date, .published { font-size: 80%; } \
                 table { width: 100%; } \
                 td img { display: block; margin: 5px auto; } \
                 ul { padding-top: 10px; } \
                 ol { padding-top: 10px; } \
                 li { padding-top: 5px; padding-bottom: 5px; } \
                 h1 { font-size: 175%; font-weight: bold; } \
                 h2 { font-size: 150%; font-weight: bold; } \
                 h3 { font-size: 125%; font-weight: bold; } \
                 h4, h5, h6 { font-size: 100%; font-weight: bold; }'
    # Remove the line breaks.
    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
                              ]
    def print_version(self, url):
-        return url+"/fulltext"
+        return url+"/article"
--- a/recipes/novilist_novine_hr.recipe
+++ b/recipes/novilist_novine_hr.recipe
@ -0,0 +1,100 @@
 __license__   = 'GPL v3'
 __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 novine.novilist.hr
 '''
 import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class NoviList_hr(BasicNewsRecipe):
    title                 = 'Novi List'
    __author__            = 'Darko Miletic'
    description           = 'Vijesti iz Hrvatske'
    publisher             = 'NOVI LIST d.d.'
    category              = 'Novi list, politika, hrvatski dnevnik, Novine, Hrvatska, Croatia, News, newspaper, Hrvatski,Primorje, dnevni list, Rijeka'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1250'
    use_embedded_content  = False
    language              = 'hr'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    needs_subscription    = True
    masthead_url          = 'http://novine.novilist.hr/images/system/novilist-logo.jpg'
    index                 = 'http://novine.novilist.hr/'
    extra_css             = """
                               @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
                               body{font-family: Geneva,Arial,Helvetica,Swiss,sans1,sans-serif }
                               img{display:block; margin-bottom: 0.4em; margin-top: 0.4em}
                               .nadnaslov,.podnaslov{font-size: small; display: block; margin-bottom: 1em}
                               .naslov{font-size: x-large; color: maroon; font-weight: bold; display: block; margin-bottom: 1em;}
                               p{display: block}
                            """
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        , 'linearize_tables' : True
                        }
    keep_only_tags = [
                       dict(name='td', attrs={'class':['nadnaslov', 'naslov', 'podnaslov']}),
                       dict(name='font', attrs={'face':'Geneva,Arial,Helvetica,Swiss'})
                     ]
    remove_tags = [dict(name=['meta', 'link', 'iframe', 'embed', 'object'])]
    remove_attributes=['border', 'lang', 'size', 'face', 'bgcolor']
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open(self.index + 'loginnow.asp')
            br.select_form(nr=0)
            br['username'] = self.username
            br['password'] = self.password
            br.submit()
        return br
    def parse_index(self):
        articles = []
        count = 0
        soup = self.index_to_soup(self.index)
        #cover url
        for alink in soup.findAll('a'):
           if alink['href'].startswith('images/clanci/DOC_'):
              self.cover_url = self.index + alink['href']
        #feeds
        for item in soup.findAll('td',attrs={'class':'tocrubrika'}):
            count = count +1
            if self.test and count > 2:
               return articles
            aitem = item.a
            section  = self.tag_to_string(aitem)
            feedlink = self.index + aitem['href']
            feedpage = self.index_to_soup(feedlink)
            self.report_progress(0, _('Fetching feed')+' %s...'%(section))
            inarts   = []
            for alink in feedpage.findAll('a',attrs={'class':'naslovlinkdesno'}):
                url   = self.index + alink['href']
                inarts.append({
                                  'title'      :self.tag_to_string(alink)
                                 ,'date'       :strftime(self.timefmt)
                                 ,'url'        :url
                                 ,'description':''
                                })
            if self.remove_empty_feeds:
                if inarts:
                    articles.append((section,inarts))
            else:
                articles.append((section,inarts))
        return articles
    def print_version(self, url):
        return url.replace('?WCI=Rubrike&','?WCI=Pretrazivac&')
--- a/recipes/novilist_portal_hr.recipe
+++ b/recipes/novilist_portal_hr.recipe
@ -0,0 +1,49 @@
 __license__   = 'GPL v3'
 __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.novilist.hr
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class NoviList_Portal_hr(BasicNewsRecipe):
    title                 = 'Novi List - online portal'
    __author__            = 'Darko Miletic'
    description           = 'Portal Novog Lista'
    publisher             = 'NOVI LIST d.d.'
    category              = 'Novi list, politika, hrvatski dnevnik, Novine, Hrvatska, Croatia, News, newspaper, Hrvatski,Primorje, dnevni list, Rijeka'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'hr'
    publication_type      = 'newsportal'
    masthead_url          = 'http://www.novilist.hr/design/novilist/images/logo-print.gif'
    extra_css             = """
                               body{font-family: Geneva,Arial,Helvetica,Swiss,sans-serif }
                               h1{font-family: Georgia,serif}
                               img{display:block; margin-bottom: 0.4em; margin-top: 0.4em}
                            """
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        , 'linearize_tables' : True
                        }
    keep_only_tags = [dict(name='div', attrs={'id':'content'})]
    remove_tags = [dict(name=['meta', 'link', 'iframe', 'embed', 'object'])]
    remove_attributes=['border', 'lang']
    feeds = [(u'Vijesti', u'http://www.novilist.hr/rss/feed/sve.xml')]
    def print_version(self, url):
        return url.replace('http://www.novilist.hr/','http://www.novilist.hr/layout/set/print/')
--- a/recipes/novinite_bg.recipe
+++ b/recipes/novinite_bg.recipe
@ -0,0 +1,26 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1329123365(BasicNewsRecipe):
    title          = u'Novinite.bg'
    __author__ = 'M3 Web'
    description = 'Real time provider of the latest news from Bulgaria and the world'
    category = 'Business, Politics, Society, Sports, Crime, Lifestyle, World, Health'
    oldest_article = 7
    max_articles_per_feed = 6
    language  = 'bg'
    encoding = 'windows-1251'
    no_stylesheets        = True
    remove_javascript = True
    keep_only_tags    = [dict(name='div', attrs={'id':'content'})]
    remove_tags = [dict(name='div', attrs={'id':'text_options'})]
    remove_tags = [dict(name='div', attrs={'id':'social_shares_top'})]
    remove_tags_after  = dict(id='textsize')
    feeds          = [(u'Business', u'http://novinite.bg/rss.php?category_id=1'),
 (u'Politics', u'http://novinite.bg/rss.php?category_id=2'),
 (u'Society', u'http://novinite.bg/rss.php?category_id=3'),
 (u'Sport', u'http://novinite.bg/rss.php?category_id=4'),
 (u'Crime', u'http://novinite.bg/rss.php?category_id=5'),
 (u'Lifestyle', u'http://novinite.bg/rss.php?category_id=6'),
 (u'Health', u'http://novinite.bg/rss.php?category_id=7'),
 (u'Other', u'http://novinite.bg/rss.php?category_id=10'),
 (u'World', u'http://novinite.bg/rss.php?category_id=9')]
--- a/recipes/onda_rock.recipe
+++ b/recipes/onda_rock.recipe
@ -0,0 +1,21 @@
 __license__   = 'GPL v3'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1328535130(BasicNewsRecipe):
    title          = u'Onda Rock'
    __author__    = 'faber1971'
    description   = 'Italian rock webzine'
    language = 'it'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = False
    remove_tags = [
                            dict(name='div', attrs={'id':['boxHeader','boxlinks_med','footer','boxinterviste','box_special_med','boxdiscografia_head','path']}),
                            dict(name='div', attrs={'align':'left'}),
                            dict(name='div', attrs={'style':'text-align: center'}),
                            ]
    no_stylesheets = True
    feeds          = [(u'Onda Rock', u'http://www.ondarock.it/feed.php')]
    masthead_url            = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/71135_45820579767_4993043_n.jpg'
--- a/recipes/opinion_bo.recipe
+++ b/recipes/opinion_bo.recipe
@ -0,0 +1,77 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2011, Piet van Oostrum <piet@vanoostrum.org>'
 '''
 www.opinion.com.bo
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class Opinion_Bol(BasicNewsRecipe):
    title                 = u'Opinión - Bolivia'
    __author__            = 'Piet van Oostrum'
    description           = u'Opinión diario de circulación nacional, Cochabamba, Bolivia'
    publisher             = 'Coboce Ltda - Editora Opinión'
    category              = 'news, politics, Bolivia'
    version               = 1
    oldest_article        = 1
    max_articles_per_feed = 20
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
    language              = 'es_BO'
    publication_type      = 'newspaper'
    delay                 = 1
    remove_empty_feeds    = True
    cover_url = strftime('http://www.opinion.com.bo/opinion/articulos/%Y/%m%d/fotos/portada_650.jpg')
    masthead_url = 'http://opinion.com.bo/opinion/articulos/imagenes/logo_opinion.gif'
    extra_css = """body{font-family: Helvetica,Arial,sans-serif}
              .seccion_encabezado_nota_inte{font-size: 1.1em;
                font-weight: bold;}
              .autor_nota_inte{color: #999999; font-size: 0.8em;
                margin-bottom: 0.5em; text-align: right;}
              .pie{font-size: 0.8em;}"""
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    keep_only_tags    = [dict(name='div', attrs={'class':'columna_izq_nota_intererior'})]
    remove_tags       = [dict(name=['meta','link','form','iframe','embed','object','style']),
                         dict(name='div', attrs={'class':'ocultar'})]
    remove_attributes = ['width','height']
    feeds = [
        (u'El País'      , u'http://www.opinion.com.bo/opinion/rss/el_pais_rss.xml'  )
       ,(u'Cochabamba'   , u'http://www.opinion.com.bo/opinion/rss/cochabamba_rss.xml'  )
       ,(u'Economía'     , u'http://www.opinion.com.bo/opinion/rss/economia_rss.xml'  )
       ,(u'Cultura'      , u'http://www.opinion.com.bo/opinion/rss/cultura_rss.xml'  )
       ,(u'Mundo'        , u'http://www.opinion.com.bo/opinion/rss/mundo_rss.xml'  )
       ,(u'Ciencia y Tecnología', u'http://www.opinion.com.bo/opinion/rss/ciencia_tecnologia_rss.xml' )
       ,(u'Policial'     , u'http://www.opinion.com.bo/opinion/rss/policial_rss.xml'  )
       ,(u'Editorial'    , u'http://www.opinion.com.bo/opinion/rss/editorial_rss.xml'    )
       ,(u'Subeditorial' , u'http://www.opinion.com.bo/opinion/rss/subeditorial_rss.xml'    )
       ,(u'Opinión'      , u'http://www.opinion.com.bo/opinion/rss/opinion_rss.xml'       )
       ,(u'Deportes'     , u'http://www.opinion.com.bo/opinion/rss/deportes_rss.xml')
       ,(u' Vida de hoy' , u'http://www.opinion.com.bo/opinion/rss/vidadehoy_rss.xml' )
            ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    # Filter out today's articles
    # maybe should take timezone into account
    today = strftime('/%Y/%m%d/')
    def get_article_url(self, article):
        link = article.link
        if self.today in link:
             return link
--- a/recipes/oreilly_premium.recipe
+++ b/recipes/oreilly_premium.recipe
@ -0,0 +1,197 @@
 # Talking Points is not grabbing everything.
 # The look is right, but only the last one added?
 import re
 import time
 from calibre.web.feeds.recipes import BasicNewsRecipe
 # Allows the Python soup converter, which makes parsing easier.
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 # strip ads and graphics
 # Current Column lacks a title.
 # Talking Points Memo - shorten title - Remove year and Bill's name
 # The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
 # Newsletters: Talking Points Memos covered by cat12
 class OReillyPremium(BasicNewsRecipe):
    title           = u'OReilly Premium'
    __author__      = 'TMcN'
    language = 'en'
    description     = 'Retrieves Premium and News Letter content from BillOReilly.com.  Requires a Bill OReilly Premium Membership.'
    cover_url       = 'http://images.billoreilly.com/images/headers/billgray_header.png'
    auto_cleanup    = True
    encoding        = 'utf8'
    needs_subscription = True
    no_stylesheets  = True
    oldest_article  = 20
    remove_javascript = True
    remove_tags     = [dict(name='img', attrs={})]
    # Don't go down
    recursions      = 0
    max_articles_per_feed = 2000
    debugMessages   = True
    # Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
    catList = [ ["TV Archives",         'https://www.billoreilly.com/show?action=tvShowArchive', 'a',    {'class':['showLinks','homeLinks']},                   []],
                ["No Spin Archives",    'https://www.billoreilly.com/blog?categoryID=7',         True,   {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
                ["Daily Briefings",     'http://www.billoreilly.com/blog?categoryID=11',         True,   {'class':['defaultHeaderSmallLinks']},                 []],
                ["Stratfor",            'http://www.billoreilly.com/blog?categoryID=5',          'a',    {'class':['blogLinks']},                               []],
                ["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12',        'td',   {},                                                    []],
                ["Current Column",      'https://www.billoreilly.com/currentcolumn',             'span', {'class':['defaultHeader']},                           []]
              ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open('https://www.billoreilly.com/pg/jsp/member/membersignin.jsp')
            br.select_form(name='login')
            br['formEmailField']   = self.username
            br['formPasswordField'] = self.password
            br.submit()
        return br
    # Returns the best-guess print url.
    # The second parameter (pageURL) is returned if nothing is found.
    def extractPrintURL(self, baseURL, pageURL, printString):
        tagURL = pageURL
        soup = self.index_to_soup(pageURL)
        if soup :
            printText = soup.find('a', text=printString)
        else :
            print("Failed to find Print string "+printString+ " in "+pageURL)
        if printText:
            tag = printText.parent
            tagURL = baseURL+tag['href']
        return tagURL
    def stripBadChars(self, inString) :
        return inString.replace("\'", "")
    def parseGeneric(self, baseURL):
        # Does a generic parsing of the articles.  There are six categories (0-5)
        # Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
        # NoSpin and TV are generic
        fullReturn = []
        for i in range(len(self.catList)) :
            articleList = []
            soup = self.index_to_soup(self.catList[i][1])
            # Set defaults
            description = 'None'
            pubdate = time.strftime('%a, %d %b')
            # Problem: 0-2 create many in an array
            # 3-5 create one.
            # So no for-div for 3-5
            if i < 3 :
                for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
                     print(div)
                     if i == 1:
                        a = div.find('a', href=True)
                     else :
                        a = div
                     print(a)
                     summary = div.find(True, attrs={'class':'summary'})
                     if summary:
                         description = self.tag_to_string(summary, use_alt=False)
                     if not a:
                         continue
                     # url = baseURL+re.sub(r'\?.*', '', a['href'])
                     url = baseURL+a['href']
                     if i < 2 :
                        url = self.extractPrintURL(baseURL, url, "Print this entry")
                        title = self.tag_to_string(a, use_alt=True).strip()
                     elif i == 2 :
                        # Daily Briefs
                        url = self.extractPrintURL(baseURL, url, "Print this entry")
                        title =  div.contents[0]
                     if self.debugMessages :
                        print(title+" @ "+url)
                     articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
            elif i == 3 :   # Stratfor
                a = soup.find('a', self.catList[i][3])
                if a is None :
                    continue
                url = baseURL+a['href']
                title = self.tag_to_string(a, use_alt=True).strip()
                # Get Stratfor contents so we can get the real title.
                stratSoup = self.index_to_soup(url)
                title = stratSoup.html.head.title.string
                stratIndex = title.find('Stratfor.com:', 0)
                if (stratIndex > -1) :
                    title = title[stratIndex+14:-1]
                # Look for first blogBody  <td class="blogBody"
                # Changed 12 Jan 2012 - new page format
                #stratBlogTable = stratSoup.find('td', {'class':['blogBody']}).findParent('table')
                #stratBody = stratSoup.find('td', {'class':['blogBody']})
            elif i == 4 :      # Talking Points
                topDate =  soup.find("td", "blogBody")
                if not topDate :
                    print("Failed to find date in Talking Points")
                # This page has the contents in double-wrapped tables!
                myTable = topDate.findParents('table')[0]
                if myTable is not None:
                    upOneTable = myTable.findParents('table')[0]
                    if upOneTable is not None:
                        upTwo = upOneTable.findParents('table')[0]
                if upTwo is None:
                    continue
                # Now navigate rows of upTwo
                if self.debugMessages :
                    print("Entering rows")
                for rows in upTwo.findChildren("tr", recursive=False):
                    # Inside top level table, each row is an article
                    rowTable = rows.find("table")
                    articleTable = rowTable.find("table")
                    # This looks wrong.
                    articleTable = rows.find("tr")
                    # The middle table is just for formatting the article buffer... but this means we can skip the inner table.
                    blogDate = articleTable.find("a","blogDate").contents[0]
                    # Skip to second blogBody for this.
                    blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
                    blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
                    url = baseURL+re.sub(r'\?.*', '', blogURL)
                    title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
                    if self.debugMessages :
                        print("Talking Points Memo title "+title+" at url: "+url)
                    pubdate = time.strftime('%a, %d %b')
                    articleList.append(dict(title=title, url=url, date=pubdate, description='None', content=''))
            else :       # Current Column
                titleSpan = soup.find(self.catList[i][2], self.catList[i][3])
                if titleSpan is None :
                    continue
                title = titleSpan.contents[0]
                url = self.extractPrintURL(baseURL, self.catList[i][1], "Print This Article")
            if i == 3 or i == 5 :
                 if self.debugMessages :
                    print(self.catList[i][0]+" Title:"+title+" at url: "+url)
                 summary = div.find(True, attrs={'class':'summary'})
                 if summary:
                     description = self.tag_to_string(summary, use_alt=False)
                 articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
            self.catList[i][3] = articleList
            fullReturn.append((self.catList[i][0], articleList))
        return fullReturn
    # calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
    # returns a list of tuple ('feed title', list of articles)
    # {
    # 'title'       : article title,
    # 'url'         : URL of print version,
    # 'date'        : The publication date of the article as a string,
    # 'description' : A summary of the article
    # 'content'     : The full article (can be an empty string). This is used by FullContentProfile
    # }
    # this is used instead of BasicNewsRecipe.parse_feeds().
    def parse_index(self):
        # Parse the page into Python Soup
        baseURL = "https://www.billoreilly.com"
        return self.parseGeneric(baseURL)
    def preprocess_html(self, soup):
        refresh = soup.find('meta', {'http-equiv':'refresh'})
        if refresh is None:
            return soup
        content = refresh.get('content').partition('=')[2]
        raw = self.browser.open('https://www.billoreilly.com'+content).read()
        return BeautifulSoup(raw.decode('cp1252', 'replace'))
--- a/recipes/ottawa_citizen.recipe
+++ b/recipes/ottawa_citizen.recipe
@ -1,4 +1,5 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
@ -6,20 +7,72 @@ __license__   = 'GPL v3'
 www.canada.com
 '''
-from calibre.web.feeds.recipes import BasicNewsRecipe
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 class CanWestPaper(BasicNewsRecipe):
-    # un-comment the following three lines for the Ottawa Citizen
+    # un-comment the following four lines for the Victoria Times Colonist
 ##    title = u'Victoria Times Colonist'
 ##    url_prefix = 'http://www.timescolonist.com'
 ##    description = u'News from Victoria, BC'
 ##    fp_tag = 'CAN_TC'
    # un-comment the following four lines for the Vancouver Province
 ##    title = u'Vancouver Province'
 ##    url_prefix = 'http://www.theprovince.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VP'
    # un-comment the following four lines for the Vancouver Sun
 ##    title = u'Vancouver Sun'
 ##    url_prefix = 'http://www.vancouversun.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VS'
    # un-comment the following four lines for the Edmonton Journal
 ##    title = u'Edmonton Journal'
 ##    url_prefix = 'http://www.edmontonjournal.com'
 ##    description = u'News from Edmonton, AB'
 ##    fp_tag = 'CAN_EJ'
    # un-comment the following four lines for the Calgary Herald
 ##    title = u'Calgary Herald'
 ##    url_prefix = 'http://www.calgaryherald.com'
 ##    description = u'News from Calgary, AB'
 ##    fp_tag = 'CAN_CH'
    # un-comment the following four lines for the Regina Leader-Post
 ##    title = u'Regina Leader-Post'
 ##    url_prefix = 'http://www.leaderpost.com'
 ##    description = u'News from Regina, SK'
 ##    fp_tag = ''
    # un-comment the following four lines for the Saskatoon Star-Phoenix
 ##    title = u'Saskatoon Star-Phoenix'
 ##    url_prefix = 'http://www.thestarphoenix.com'
 ##    description = u'News from Saskatoon, SK'
 ##    fp_tag = ''
    # un-comment the following four lines for the Windsor Star
 ##    title = u'Windsor Star'
 ##    url_prefix = 'http://www.windsorstar.com'
 ##    description = u'News from Windsor, ON'
 ##    fp_tag = 'CAN_'
    # un-comment the following four lines for the Ottawa Citizen
    title = u'Ottawa Citizen'
    url_prefix = 'http://www.ottawacitizen.com'
    description = u'News from Ottawa, ON'
    fp_tag = 'CAN_OC'
-    # un-comment the following three lines for the Montreal Gazette
+    # un-comment the following four lines for the Montreal Gazette
-    #title = u'Montreal Gazette'
+##    title = u'Montreal Gazette'
-    #url_prefix = 'http://www.montrealgazette.com'
+##    url_prefix = 'http://www.montrealgazette.com'
-    #description = u'News from Montreal, QC'
+##    description = u'News from Montreal, QC'
 ##    fp_tag = 'CAN_MG'
    language = 'en_CA'
@ -43,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
-    def preprocess_html(self,soup):
+    def get_cover_url(self):
-        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        from datetime import timedelta, date
-        divtags = soup.findAll('div',attrs={'id':''})
+        if self.fp_tag=='':
-        if divtags:
+            return None
-            for div in divtags:
+        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
-                del(div['id'])
+        br = BasicNewsRecipe.get_browser()
        daysback=1
        try:
            br.open(cover)
        except:
            while daysback<7:
                cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
                br = BasicNewsRecipe.get_browser()
                try:
                    br.open(cover)
                except:
                    daysback = daysback+1
                    continue
                break
        if daysback==7:
            self.log("\nCover unavailable")
            cover = None
        return cover
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
        # Replace rsquo (\x92)
        fixed = re.sub("\x92","’",fixed)
        # Replace ldquo (\x93)
        fixed = re.sub("\x93","“",fixed)
        # Replace rdquo (\x94)
        fixed = re.sub("\x94","”",fixed)
        # Replace ndash (\x96)
        fixed = re.sub("\x96","–",fixed)
        # Replace mdash (\x97)
        fixed = re.sub("\x97","—",fixed)
        fixed = re.sub("&#x2019;","’",fixed)
        return fixed
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&'
            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
    def populate_article_metadata(self, article, soup, first):
        if first:
            picdiv = soup.find('body').find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
        xtitle = article.text_summary.strip()
        if len(xtitle) == 0:
            desc = soup.find('meta',attrs={'property':'og:description'})
            if desc is not None:
                article.summary = article.text_summary = desc['content']
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
    def preprocess_html(self, soup):
        return self.strip_anchors(soup)
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
--- a/recipes/pagina12.recipe
+++ b/recipes/pagina12.recipe
@ -1,12 +1,10 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 pagina12.com.ar
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class Pagina12(BasicNewsRecipe):
    title                 = 'Pagina - 12'
@ -66,9 +64,7 @@ class Pagina12(BasicNewsRecipe):
        return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
    def get_cover_url(self):
-        rawc = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html',True)
+        soup = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html')
        rawc2 = re.sub(r'PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN','PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"',rawc)
        soup = BeautifulSoup(rawc2,fromEncoding=self.encoding,smartQuotesTo=None)
        for image in soup.findAll('img',alt=True):
           if image['alt'].startswith('Tapa de la fecha'):
              return image['src']
--- a/recipes/pambianco.recipe
+++ b/recipes/pambianco.recipe
@ -0,0 +1,14 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1326135591(BasicNewsRecipe):
    title          = u'Pambianco'
    description = 'fashion magazine for professional people'
    language = 'it'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Pambianco', u'http://feeds.feedburner.com/pambianconews/YGXu')]
    __author__      = 'faber1971'
 __version__     = 'v1.0'
 __date__        = '9, January 2011'
--- a/recipes/people_daily.recipe
+++ b/recipes/people_daily.recipe
@ -1,10 +1,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import os, time
 class AdvancedUserRecipe1277129332(BasicNewsRecipe):
-    title          = u'People Daily - China'
+    title          = u'人民日报'
    oldest_article = 2
    max_articles_per_feed = 100
-    __author__            = 'rty'
+    __author__            = 'zzh'
    pubisher  = 'people.com.cn'
    description           = 'People Daily Newspaper'
@ -14,21 +15,65 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
    use_embedded_content   = False
    no_stylesheets = True
    encoding               = 'GB2312'
    language               = 'zh'
    conversion_options = {'linearize_tables':True}
    masthead_url       = 'http://www.people.com.cn/img/2010wb/images/logo.gif'
-    feeds          = [(u'\u56fd\u5185\u65b0\u95fb', u'http://www.people.com.cn/rss/politics.xml'),
+    feeds          = [
-       (u'\u56fd\u9645\u65b0\u95fb', u'http://www.people.com.cn/rss/world.xml'),
+        (u'时政', u'http://www.people.com.cn/rss/politics.xml'),
-       (u'\u7ecf\u6d4e\u65b0\u95fb', u'http://www.people.com.cn/rss/finance.xml'),
+        (u'国际', u'http://www.people.com.cn/rss/world.xml'),
-       (u'\u4f53\u80b2\u65b0\u95fb', u'http://www.people.com.cn/rss/sports.xml'),
+        (u'经济', u'http://www.people.com.cn/rss/finance.xml'),
-       (u'\u53f0\u6e7e\u65b0\u95fb', u'http://www.people.com.cn/rss/haixia.xml')]
+        (u'体育', u'http://www.people.com.cn/rss/sports.xml'),
        (u'教育', u'http://www.people.com.cn/rss/edu.xml'),
        (u'文化', u'http://www.people.com.cn/rss/culture.xml'),
        (u'社会', u'http://www.people.com.cn/rss/society.xml'),
        (u'传媒', u'http://www.people.com.cn/rss/media.xml'),
        (u'娱乐', u'http://www.people.com.cn/rss/ent.xml'),
       # (u'汽车', u'http://www.people.com.cn/rss/auto.xml'),
        (u'海峡两岸', u'http://www.people.com.cn/rss/haixia.xml'),
       # (u'IT频道', u'http://www.people.com.cn/rss/it.xml'),
       # (u'环保', u'http://www.people.com.cn/rss/env.xml'),
       # (u'科技', u'http://www.people.com.cn/rss/scitech.xml'),
       # (u'新农村', u'http://www.people.com.cn/rss/nc.xml'),
       # (u'天气频道', u'http://www.people.com.cn/rss/weather.xml'),
        (u'生活提示', u'http://www.people.com.cn/rss/life.xml'),
        (u'卫生', u'http://www.people.com.cn/rss/medicine.xml'),
       # (u'人口', u'http://www.people.com.cn/rss/npmpc.xml'),
       # (u'读书', u'http://www.people.com.cn/rss/booker.xml'),
       # (u'食品', u'http://www.people.com.cn/rss/shipin.xml'),
       # (u'女性新闻', u'http://www.people.com.cn/rss/women.xml'),
       # (u'游戏', u'http://www.people.com.cn/rss/game.xml'),
       # (u'家电频道', u'http://www.people.com.cn/rss/homea.xml'),
       # (u'房产', u'http://www.people.com.cn/rss/house.xml'),
       # (u'健康', u'http://www.people.com.cn/rss/health.xml'),
       # (u'科学发展观', u'http://www.people.com.cn/rss/kxfz.xml'),
       # (u'知识产权', u'http://www.people.com.cn/rss/ip.xml'),
       # (u'高层动态', u'http://www.people.com.cn/rss/64094.xml'),
       # (u'党的各项工作', u'http://www.people.com.cn/rss/64107.xml'),
       # (u'党建聚焦', u'http://www.people.com.cn/rss/64101.xml'),
       # (u'机关党建', u'http://www.people.com.cn/rss/117094.xml'),
       # (u'事业党建', u'http://www.people.com.cn/rss/117095.xml'),
       # (u'国企党建', u'http://www.people.com.cn/rss/117096.xml'),
       # (u'非公党建', u'http://www.people.com.cn/rss/117097.xml'),
       # (u'社区党建', u'http://www.people.com.cn/rss/117098.xml'),
       # (u'高校党建', u'http://www.people.com.cn/rss/117099.xml'),
       # (u'农村党建', u'http://www.people.com.cn/rss/117100.xml'),
       # (u'军队党建', u'http://www.people.com.cn/rss/117101.xml'),
       # (u'时代先锋', u'http://www.people.com.cn/rss/78693.xml'),
       # (u'网友声音', u'http://www.people.com.cn/rss/64103.xml'),
       # (u'反腐倡廉', u'http://www.people.com.cn/rss/64371.xml'),
       # (u'综合报道', u'http://www.people.com.cn/rss/64387.xml'),
       # (u'中国人大新闻', u'http://www.people.com.cn/rss/14576.xml'),
       # (u'中国政协新闻', u'http://www.people.com.cn/rss/34948.xml'),
     ]
    keep_only_tags = [
-                              dict(name='div', attrs={'class':'left_content'}),
+                              dict(name='div', attrs={'class':'text_c'}),
                               ]
    remove_tags = [
-                    dict(name='table', attrs={'class':'title'}),
+                    dict(name='div', attrs={'class':'tools'}),
                         ]
    remove_tags_after = [
-                  dict(name='table', attrs={'class':'bianji'}),
+                  dict(name='div', attrs={'id':'p_content'}),
                         ]
    def append_page(self, soup, appendtag, position):
@ -36,7 +81,7 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
        if pager:
           nexturl = self.INDEX + pager.a['href']
           soup2 = self.index_to_soup(nexturl)
-           texttag = soup2.find('div', attrs={'class':'left_content'})
+           texttag = soup2.find('div', attrs={'class':'text_c'})
           #for it in texttag.findAll(style=True):
           #   del it['style']
           newpos = len(texttag.contents)
@ -44,9 +89,15 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
           texttag.extract()
           appendtag.insert(position,texttag)
    def skip_ad_pages(self, soup):
        if ('advertisement' in soup.find('title').string.lower()):
            href = soup.find('a').get('href')
            return self.browser.open(href).read().decode('GB2312', 'ignore')
        else:
            return None
    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="content-type" content="text/html;charset=GB2312" />\n<meta http-equiv="content-language" content="utf-8" />'
+        mtag = '<meta http-equiv="content-type" content="text/html;charset=GB2312" />\n<meta http-equiv="content-language" content="GB2312" />'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['form']
@ -55,3 +106,19 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
        #if pager:
        #   pager.extract()
        return soup
    def get_cover_url(self):
        cover = None
        os.environ['TZ'] = 'Asia/Shanghai'
        time.tzset()
        year = time.strftime('%Y')
        month = time.strftime('%m')
        day = time.strftime('%d')
        cover = 'http://paper.people.com.cn/rmrb/page/'+year+'-'+month+'/'+day+'/01/RMRB'+year+month+day+'B001_b.jpg'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(cover)
        except:
            self.log("\nCover unavailable: " + cover)
            cover = None
        return cover
--- a/recipes/pescanik.recipe
+++ b/recipes/pescanik.recipe
@ -1,18 +1,18 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 pescanik.net
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class Pescanik(BasicNewsRecipe):
-    title                 = 'Pescanik'
+    title                 = 'Peščanik'
    __author__            = 'Darko Miletic'
-    description           = 'Pescanik'
+    description           = 'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
-    publisher             = 'Pescanik'
+    publisher             = 'Peščanik'
    category              = 'news, politics, Serbia'
    oldest_article        = 10
    max_articles_per_feed = 100
@ -21,7 +21,12 @@ class Pescanik(BasicNewsRecipe):
    encoding              = 'utf-8'
    language              = 'sr'
    publication_type      = 'newsportal'
-    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,"Lucida Grande",Tahoma,Verdana,sans1,sans-serif} .contentheading{font-size: x-large; font-weight: bold} .small{font-size: small} .createdate{font-size: x-small; font-weight: bold} '
+    masthead_url          = 'http://pescanik.net/wp-content/uploads/2011/10/logo1.png'
    extra_css             = """
                               @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
                               body{font-family: Verdana,Arial,Tahoma,sans1,sans-serif}
                               #BlogTitle{font-size: xx-large; font-weight: bold}
                            """
    conversion_options = {
                          'comment'   : description
@ -32,29 +37,12 @@ class Pescanik(BasicNewsRecipe):
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
-    
+    remove_tags = [dict(name=['object','link','meta','script','iframe','embed'])]
-    remove_attributes = ['valign','colspan','width','height','align','alt']
+    keep_only_tags = [dict(attrs={'id':['BlogTitle','BlogDate','BlogContent']})]
-    
+    feeds       = [
-    remove_tags = [dict(name=['object','link','meta','script'])]
+                    (u'Autori' , u'http://pescanik.net/category/autori/feed/'),
-
+                    (u'Prevodi', u'http://pescanik.net/category/prevodi/feed/')
    keep_only_tags = [
                         dict(attrs={'class':['contentheading','small','createdate']})
                        ,dict(name='td', attrs={'valign':'top','colspan':'2'})                        
                  ]
    feeds       = [(u'Pescanik Online', u'http://www.pescanik.net/index.php?option=com_rd_rss&id=12')]
    def print_version(self, url):
-        nurl = url.replace('/index.php','/index2.php')
+        return url + 'print/'
        return nurl + '&pop=1&page=0'
    def preprocess_html(self, soup):
        st = soup.findAll('td')
        for it in st:
            it.name='p'
        for pt in soup.findAll('img'):
            brtag = Tag(soup,'br')
            brtag2 = Tag(soup,'br')
            pt.append(brtag)
            pt.append(brtag2)
        return soup
--- a/recipes/philly.recipe
+++ b/recipes/philly.recipe
@ -33,3 +33,6 @@ class BasicUserRecipe1314970845(BasicNewsRecipe):
            (u'Obituaries', u'http://www.philly.com/inquirer_obituaries.rss')
    ]
    def print_version(self, url):
        return url + '?viewAll=y'
--- a/recipes/plus_info.recipe
+++ b/recipes/plus_info.recipe
@ -1,4 +1,5 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __author__    = 'Darko Spasovski'
 __license__   = 'GPL v3'
@ -7,7 +8,6 @@ __copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
 '''
 www.plusinfo.mk
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class PlusInfo(BasicNewsRecipe):
@ -27,8 +27,11 @@ class PlusInfo(BasicNewsRecipe):
    oldest_article        = 1
    max_articles_per_feed = 100
-    keep_only_tags = [dict(name='div', attrs={'class': 'vest'})]
+    remove_tags = []
-    remove_tags = [dict(name='div', attrs={'class':['komentari_holder', 'objava']})]
+    remove_tags.append(dict(name='div', attrs={'class':['komentari_holder', 'objava', 'koment']}))
    remove_tags.append(dict(name='ul', attrs={'class':['vest_meni']}))
    remove_tags.append(dict(name='a', attrs={'name': ['fb_share']}))
    keep_only_tags = [dict(name='div', attrs={'class': 'vest1'})]
    feeds          = [(u'Македонија', u'http://www.plusinfo.mk/rss/makedonija'),
                      (u'Бизнис', u'http://www.plusinfo.mk/rss/biznis'),
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@ -1,30 +1,36 @@
 """
 readitlaterlist.com
 """
 __license__   = 'GPL v3'
 __copyright__ = '''
 2010, Darko Miletic <darko.miletic at gmail.com>
 2011, Przemyslaw Kryger <pkryger at gmail.com>
-'''
+2012, tBunnyMan <Wag That Tail At Me dot com>
 '''
 readitlaterlist.com
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class Readitlater(BasicNewsRecipe):
    title                 = 'ReadItLater'
-    __author__            = 'Darko Miletic, Przemyslaw Kryger'
+    __author__            = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
-    description           = '''Personalized news feeds. Go to readitlaterlist.com to
+    description           = '''Personalized news feeds. Go to readitlaterlist.com to setup \
-                               setup up your news. Fill in your account
+                            up your news. This version displays pages of articles from \
-                               username, and optionally you can add password.'''
+                            oldest to newest, with max & minimum counts, and marks articles \
-    publisher             = 'readitlater.com'
+                            read after downloading.'''
    publisher             = 'readitlaterlist.com'
    category              = 'news, custom'
    oldest_article        = 7
-    max_articles_per_feed = 100
+    max_articles_per_feed = 50
    minimum_articles      = 1
    no_stylesheets        = True
    use_embedded_content  = False
    needs_subscription    = True
    INDEX                 = u'http://readitlaterlist.com'
    LOGIN                 = INDEX + u'/l'
    readList              = []
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -38,34 +44,39 @@ class Readitlater(BasicNewsRecipe):
        return br
    def get_feeds(self):
-        self.report_progress(0, ('Fetching list of feeds...'))
+        self.report_progress(0, ('Fetching list of pages...'))
        lfeeds = []
        i = 1
        feedurl = self.INDEX + u'/unread/1'
        while True:
            title = u'Unread articles, page ' + str(i)
-            lfeeds.append((title, feedurl))
+            lfeeds.insert(0, (title, feedurl))
-            self.report_progress(0, ('Got ') + str(i) + (' feeds'))
+            self.report_progress(0, ('Got ') + str(i) + (' pages'))
            i += 1
            soup = self.index_to_soup(feedurl)
            ritem = soup.find('a', attrs={'id':'next', 'class':'active'})
            if ritem is None:
                break
            feedurl = self.INDEX + ritem['href']
        if self.test:
            return lfeeds[:2]
        return lfeeds
    def parse_index(self):
        totalfeeds = []
        articlesToGrab = self.max_articles_per_feed
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            if articlesToGrab < 1:
                break
            feedtitle, feedurl = feedobj
            self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            ritem = soup.find('ul', attrs={'id':'list'})
-            for item in ritem.findAll('li'):
+            for item in reversed(ritem.findAll('li')):
                if articlesToGrab < 1:
                    break
                else:
                    articlesToGrab -= 1
                description = ''
                atag = item.find('a', attrs={'class':'text'})
                if atag and atag.has_key('href'):
@ -78,6 +89,20 @@ class Readitlater(BasicNewsRecipe):
                                     ,'url'        :url
                                     ,'description':description
                                    })
                    readLink = item.find('a', attrs={'class':'check'})['href']
                    self.readList.append(readLink)
            totalfeeds.append((feedtitle, articles))
        if len(self.readList) < self.minimum_articles:
            raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
        return totalfeeds
    def mark_as_read(self, markList):
        br = self.get_browser()
        for link in markList:
            url = self.INDEX + link
            response = br.open(url)
            response
    def cleanup(self):
        self.mark_as_read(self.readList)
--- a/recipes/real_clear.recipe
+++ b/recipes/real_clear.recipe
@ -0,0 +1,170 @@
 #  Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug
 import time
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import NavigableString
 class RealClear(BasicNewsRecipe):
    title           = u'Real Clear'
    __author__      = 'TMcN'
    description     = 'Real Clear Politics/Science/etc... aggregation of news\n'
    cover_url       = 'http://www.realclearpolitics.com/dev/mt-static/images/logo.gif'
    custom_title    = 'Real Clear - '+ time.strftime('%d %b %Y')
    auto_cleanup    = True
    encoding        = 'utf8'
    language        = 'en'
    needs_subscription = False
    no_stylesheets  = True
    oldest_article  = 7
    remove_javascript = True
    remove_tags     = [dict(name='img', attrs={})]
    # Don't go down
    recursions      = 0
    max_articles_per_feed = 400
    debugMessages = False
    # Numeric parameter is type, controls whether we look for
    feedsets = [
                ["Politics",        "http://www.realclearpolitics.com/index.xml", 0],
                ["Science",         "http://www.realclearscience.com/index.xml", 0],
                ["Tech",            "http://www.realcleartechnology.com/index.xml", 0],
                # The feedburner is essentially the same as the top feed, politics.
                # ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1],
                # ["Commentary",      "http://feeds.feedburner.com/Realclearpolitics-Articles", 1],
                ["Markets Home",    "http://www.realclearmarkets.com/index.xml", 0],
                ["Markets",         "http://www.realclearmarkets.com/articles/index.xml", 0],
                ["World",           "http://www.realclearworld.com/index.xml", 0],
                ["World Blog",           "http://www.realclearworld.com/blog/index.xml", 2]
            ]
    # Hints to extractPrintURL.
    # First column is the URL snippet.  Then the string to search for as text, and the attributes to look for above it.  Start with attributes and drill down.
    printhints = [
                    ["billoreilly.com",     "Print this entry",            'a', ''],
                    ["billoreilly.com",     "Print This Article",          'a', ''],
                    ["politico.com",        "Print",                       'a', 'share-print'],
                    ["nationalreview.com",  ">Print<",                     'a', ''],
                    ["reason.com",          "",                       'a', 'printer']
                    # The following are not supported due to JavaScripting, and would require obfuscated_article to handle
                    # forbes,
                    # usatoday - just prints with all current crap anyhow
            ]
    # Returns the best-guess print url.
    # The second parameter (pageURL) is returned if nothing is found.
    def extractPrintURL(self, pageURL):
        tagURL = pageURL
        hintsCount =len(self.printhints)
        for x in range(0,hintsCount):
            if pageURL.find(self.printhints[x][0])== -1 :
                continue
            print("Trying "+self.printhints[x][0])
            # Only retrieve the soup if we have a match to check for the printed article with.
            soup = self.index_to_soup(pageURL)
            if soup is None:
                return pageURL
            if len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
                if self.debugMessages == True :
                    print("search1")
                printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3])
            elif  len(self.printhints[x][3])>0 :
                if self.debugMessages == True :
                    print("search2")
                printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3], text=self.printhints[x][1])
            else :
                printFind = soup.find(self.printhints[x][2], text=self.printhints[x][1])
            if printFind is None:
                if self.debugMessages == True :
                    print("Not Found")
                continue
            print(printFind)
            if isinstance(printFind, NavigableString)==False:
                if printFind['href'] is not None:
                    return printFind['href']
            tag = printFind.parent
            print(tag)
            if tag['href'] is None:
                if self.debugMessages == True :
                    print("Not in parent, trying skip-up")
                if tag.parent['href'] is None:
                    if self.debugMessages == True :
                        print("Not in skip either, aborting")
                    continue;
                return tag.parent['href']
            return tag['href']
        return tagURL
    def get_browser(self):
        if self.debugMessages == True :
            print("In get_browser")
        br = BasicNewsRecipe.get_browser()
        return br
    def parseRSS(self, index) :
        if self.debugMessages == True :
            print("\n\nStarting "+self.feedsets[index][0])
        articleList = []
        soup = self.index_to_soup(self.feedsets[index][1])
        for div in soup.findAll("item"):
            title = div.find("title").contents[0]
            urlEl = div.find("originalLink")
            if urlEl is None or len(urlEl.contents)==0 :
                urlEl = div.find("originallink")
            if urlEl is None or len(urlEl.contents)==0 :
                urlEl = div.find("link")
            if urlEl is None or len(urlEl.contents)==0 :
                urlEl = div.find("guid")
            if urlEl is None or title is None  or len(urlEl.contents)==0 :
                print("Error in feed "+ self.feedsets[index][0])
                print(div)
                continue
            print(title)
            print(urlEl)
            url = urlEl.contents[0].encode("utf-8")
            description = div.find("description")
            if description is not None and description.contents is not None and len(description.contents)>0:
                description = description.contents[0]
            else :
                description="None"
            pubDateEl = div.find("pubDate")
            if pubDateEl is None :
                pubDateEl = div.find("pubdate")
            if pubDateEl is None :
                pubDate = time.strftime('%a, %d %b')
            else :
                pubDate = pubDateEl.contents[0]
            if self.debugMessages == True :
                print("Article");
                print(title)
                print(description)
                print(pubDate)
                print(url)
            url = self.extractPrintURL(url)
            print(url)
            #url +=re.sub(r'\?.*', '', div['href'])
            pubdate = time.strftime('%a, %d %b')
            articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
        return articleList
    # calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
    # returns a list of tuple ('feed title', list of articles)
    # {
    # 'title'       : article title,
    # 'url'         : URL of print version,
    # 'date'        : The publication date of the article as a string,
    # 'description' : A summary of the article
    # 'content'     : The full article (can be an empty string). This is used by FullContentProfile
    # }
    # this is used instead of BasicNewsRecipe.parse_feeds().
    def parse_index(self):
        # Parse the page into Python Soup
        ans = []
        feedsCount = len(self.feedsets)
        for x in range(0,feedsCount): # should be ,4
            feedarticles = self.parseRSS(x)
            if feedarticles is not None:
                ans.append((self.feedsets[x][0], feedarticles))
        if self.debugMessages == True :
            print(ans)
        return ans
--- a/recipes/regina_leader_post.recipe
+++ b/recipes/regina_leader_post.recipe
@ -1,4 +1,5 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
@ -6,35 +7,72 @@ __license__   = 'GPL v3'
 www.canada.com
 '''
-from calibre.web.feeds.recipes import BasicNewsRecipe
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 class CanWestPaper(BasicNewsRecipe):
-    # un-comment the following three lines for the Regina Leader-Post
+    # un-comment the following four lines for the Victoria Times Colonist
 ##    title = u'Victoria Times Colonist'
 ##    url_prefix = 'http://www.timescolonist.com'
 ##    description = u'News from Victoria, BC'
 ##    fp_tag = 'CAN_TC'
    # un-comment the following four lines for the Vancouver Province
 ##    title = u'Vancouver Province'
 ##    url_prefix = 'http://www.theprovince.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VP'
    # un-comment the following four lines for the Vancouver Sun
 ##    title = u'Vancouver Sun'
 ##    url_prefix = 'http://www.vancouversun.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VS'
    # un-comment the following four lines for the Edmonton Journal
 ##    title = u'Edmonton Journal'
 ##    url_prefix = 'http://www.edmontonjournal.com'
 ##    description = u'News from Edmonton, AB'
 ##    fp_tag = 'CAN_EJ'
    # un-comment the following four lines for the Calgary Herald
 ##    title = u'Calgary Herald'
 ##    url_prefix = 'http://www.calgaryherald.com'
 ##    description = u'News from Calgary, AB'
 ##    fp_tag = 'CAN_CH'
    # un-comment the following four lines for the Regina Leader-Post
    title = u'Regina Leader-Post'
    url_prefix = 'http://www.leaderpost.com'
    description = u'News from Regina, SK'
    fp_tag = ''
-    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    # un-comment the following four lines for the Saskatoon Star-Phoenix
-    #title = u'Saskatoon Star-Phoenix'
+##    title = u'Saskatoon Star-Phoenix'
-    #url_prefix = 'http://www.thestarphoenix.com'
+##    url_prefix = 'http://www.thestarphoenix.com'
-    #description = u'News from Saskatoon, SK'
+##    description = u'News from Saskatoon, SK'
 ##    fp_tag = ''
-    # un-comment the following three lines for the Windsor Star
+    # un-comment the following four lines for the Windsor Star
-    #title = u'Windsor Star'
+##    title = u'Windsor Star'
-    #url_prefix = 'http://www.windsorstar.com'
+##    url_prefix = 'http://www.windsorstar.com'
-    #description = u'News from Windsor, ON'
+##    description = u'News from Windsor, ON'
 ##    fp_tag = 'CAN_'
-    # un-comment the following three lines for the Ottawa Citizen
+    # un-comment the following four lines for the Ottawa Citizen
-    #title = u'Ottawa Citizen'
+##    title = u'Ottawa Citizen'
-    #url_prefix = 'http://www.ottawacitizen.com'
+##    url_prefix = 'http://www.ottawacitizen.com'
-    #description = u'News from Ottawa, ON'
+##    description = u'News from Ottawa, ON'
 ##    fp_tag = 'CAN_OC'
-    # un-comment the following three lines for the Montreal Gazette
+    # un-comment the following four lines for the Montreal Gazette
-    #title = u'Montreal Gazette'
+##    title = u'Montreal Gazette'
-    #url_prefix = 'http://www.montrealgazette.com'
+##    url_prefix = 'http://www.montrealgazette.com'
-    #description = u'News from Montreal, QC'
+##    description = u'News from Montreal, QC'
 ##    fp_tag = 'CAN_MG'
    language = 'en_CA'
@ -58,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
-    def preprocess_html(self,soup):
+    def get_cover_url(self):
-        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        from datetime import timedelta, date
-        divtags = soup.findAll('div',attrs={'id':''})
+        if self.fp_tag=='':
-        if divtags:
+            return None
-            for div in divtags:
+        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
-                del(div['id'])
+        br = BasicNewsRecipe.get_browser()
        daysback=1
        try:
            br.open(cover)
        except:
            while daysback<7:
                cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
                br = BasicNewsRecipe.get_browser()
                try:
                    br.open(cover)
                except:
                    daysback = daysback+1
                    continue
                break
        if daysback==7:
            self.log("\nCover unavailable")
            cover = None
        return cover
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
        # Replace rsquo (\x92)
        fixed = re.sub("\x92","’",fixed)
        # Replace ldquo (\x93)
        fixed = re.sub("\x93","“",fixed)
        # Replace rdquo (\x94)
        fixed = re.sub("\x94","”",fixed)
        # Replace ndash (\x96)
        fixed = re.sub("\x96","–",fixed)
        # Replace mdash (\x97)
        fixed = re.sub("\x97","—",fixed)
        fixed = re.sub("&#x2019;","’",fixed)
        return fixed
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&'
            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
    def populate_article_metadata(self, article, soup, first):
        if first:
            picdiv = soup.find('body').find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
        xtitle = article.text_summary.strip()
        if len(xtitle) == 0:
            desc = soup.find('meta',attrs={'property':'og:description'})
            if desc is not None:
                article.summary = article.text_summary = desc['content']
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
    def preprocess_html(self, soup):
        return self.strip_anchors(soup)
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
--- a/recipes/resurgence.recipe
+++ b/recipes/resurgence.recipe
@ -0,0 +1,20 @@
 __license__   = 'GPL v3'
 __copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class TheResurgence(BasicNewsRecipe):
    title          = u'The Resurgence'
    __author__ = 'Peter Grungi'
    language = 'en'
    oldest_article = 7
    max_articles_per_feed = 10
    auto_cleanup = True
    cover_url = 'http://cdn.theresurgence.com/images/logo.png'
    masthead_url = 'http://cdn.theresurgence.com/images/logo.png'
    language = 'en'
    publisher = 'The Resurgence'
    author = 'The Resurgence'
    feeds          = [(u'The Resurgence', u'http://feeds.theresurgence.com/TheResurgence?format=xml')]
--- a/recipes/rionegro.recipe
+++ b/recipes/rionegro.recipe
@ -0,0 +1,65 @@
 __license__   = 'GPL v3'
 __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.rionegro.com.ar
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class RioNegro(BasicNewsRecipe):
    title                 = 'Diario Rio Negro'
    __author__            = 'Darko Miletic'
    description           = 'Noticias desde la Patagonia Argentina y el resto del mundo'
    publisher             = 'Editorial Rio Negro SA.'
    category              = 'news, politics, Argentina'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'es_AR'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.rionegro.com.ar/diario/imagenes/logorn.gif'
    extra_css             = """
                               body{font-family: Arial,Helvetica,sans-serif }
                               img{display:block}
                               h1 {font-size: 0.89em; color: red}
                               h2 {font-family: Georgia,"Times New Roman",Times,serif; font-size: 1.8em}
                               h3 {font-family: Georgia,"Times New Roman",Times,serif; border-bottom: 2px solid gray}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags = [
                     dict(name=['meta','link','iframe','object','embed'])
                    ,dict(name='div', attrs={'class':'logo'})
                  ]
    keep_only_tags=[dict(attrs={'class':'nota'})]
    remove_attributes=['lang']
    feeds = [
              (u'Argentina'        , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9532')
             ,(u'El Mundo'         , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9533')
             ,(u'Carta de lectores', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9538')
             ,(u'Columnistas'      , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9539')
             ,(u'Domingo a Domingo', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9541')
             ,(u'Editorial'        , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9542')
             ,(u'Deportes'         , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9522')
             ,(u'Espectaculos'     , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9523')
             ,(u'Sociedad'         , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9574')
             ,(u'Policiales'       , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9525')
             ,(u'Municipales'      , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9862')
             ,(u'Region'           , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9701')
            ]
    def print_version(self, url):
        idart_raw = url.rpartition('idart=')[2]
        idart = idart_raw.rpartition('&')[0]
        return 'http://www.rionegro.com.ar/diario/rn/print.aspx?idArt=' + idart + '&tipo=2'
--- a/recipes/samanyolu_haber.recipe
+++ b/recipes/samanyolu_haber.recipe
@ -10,6 +10,7 @@ class SHaber (BasicNewsRecipe):
    oldest_article         =2
    max_articles_per_feed  =100
    no_stylesheets         = True
    auto_cleanup = True
    #delay                  = 1
    #use_embedded_content   = False
    encoding               = 'utf-8'
@ -27,11 +28,11 @@ class SHaber (BasicNewsRecipe):
    extra_css              = '  .Haber-Baslik-Yazisi {font-weight: bold; font-size: 9px} .Haber-Ozet-Yazisi{ font-family:sans-serif;font-weight: normal;font-size: 11px } #Haber{ font-family:sans-serif;font-weight: normal;font-size: 9px }.KirmiziText{ font-weight: normal;font-size: 5px }' #.story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
   #extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
-    keep_only_tags    = [dict(name='div', attrs={'class':['Haber-Baslik-Yazisi','Haber-Ozet-Yazisi']}),dict(name='div', attrs={'id':['ctl00_ContentPlaceHolder1_imagenew','Haber']})]#,dict(name='h6', attrs={'class':['KirmiziText',]}) dict(name='div', attrs={'id':['Haber']}),dict(name='div', attrs={'id':['gallery']})]
+    #keep_only_tags    = [dict(name='div', attrs={'class':['Haber-Baslik-Yazisi','Haber-Ozet-Yazisi']}),dict(name='div', attrs={'id':['ctl00_ContentPlaceHolder1_imagenew','Haber']})]#,dict(name='h6', attrs={'class':['KirmiziText',]}) dict(name='div', attrs={'id':['Haber']}),dict(name='div', attrs={'id':['gallery']})]
-    #remove_tags  = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ],dict(name='h1', attrs={'class':['H1-Haber-DetayBasligi']}),dict(name='h4', attrs={'class':['BrownText']}) ,
+    #remove_tags  = [dict(name='div', attrs={'class':['Haber-Baslik-Yazisi']})]#attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ],dict(name='h1', attrs={'class':['H1-Haber-DetayBasligi']}),dict(name='h4', attrs={'class':['BrownText']}) ,
-    cover_img_url = 'http://static.samanyoluhaber.com/Images/resources/images/samanyoluhaber-yazi-logo.png'
+    cover_img_url = 'http://www.samanyoluhaber.com/include/logo.png'
-    masthead_url = 'http://static.samanyoluhaber.com/Images/resources/images/samanyoluhaber-yazi-logo.png'
+    masthead_url = 'http://www.samanyoluhaber.com/include/logo.png'
    remove_empty_feeds= True
    #remove_attributes = ['width','height']
--- a/recipes/sanjosemercurynews.recipe
+++ b/recipes/sanjosemercurynews.recipe
@ -12,6 +12,7 @@ class MercuryNews(BasicNewsRecipe):
    title                 = 'San Jose Mercury News'
    __author__            = 'Darko Miletic'
    description           = 'News from San Jose'
    cover_url = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg12/lg/CA_SJMN.jpg'
    publisher             = 'San Jose Mercury News'
    category              = 'news, politics, USA, San Jose, California'
    oldest_article        = 2
--- a/recipes/saskatoon_star_phoenix.recipe
+++ b/recipes/saskatoon_star_phoenix.recipe
@ -1,4 +1,5 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
@ -6,30 +7,72 @@ __license__   = 'GPL v3'
 www.canada.com
 '''
-from calibre.web.feeds.recipes import BasicNewsRecipe
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 class CanWestPaper(BasicNewsRecipe):
-    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    # un-comment the following four lines for the Victoria Times Colonist
 ##    title = u'Victoria Times Colonist'
 ##    url_prefix = 'http://www.timescolonist.com'
 ##    description = u'News from Victoria, BC'
 ##    fp_tag = 'CAN_TC'
    # un-comment the following four lines for the Vancouver Province
 ##    title = u'Vancouver Province'
 ##    url_prefix = 'http://www.theprovince.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VP'
    # un-comment the following four lines for the Vancouver Sun
 ##    title = u'Vancouver Sun'
 ##    url_prefix = 'http://www.vancouversun.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VS'
    # un-comment the following four lines for the Edmonton Journal
 ##    title = u'Edmonton Journal'
 ##    url_prefix = 'http://www.edmontonjournal.com'
 ##    description = u'News from Edmonton, AB'
 ##    fp_tag = 'CAN_EJ'
    # un-comment the following four lines for the Calgary Herald
 ##    title = u'Calgary Herald'
 ##    url_prefix = 'http://www.calgaryherald.com'
 ##    description = u'News from Calgary, AB'
 ##    fp_tag = 'CAN_CH'
    # un-comment the following four lines for the Regina Leader-Post
 ##    title = u'Regina Leader-Post'
 ##    url_prefix = 'http://www.leaderpost.com'
 ##    description = u'News from Regina, SK'
 ##    fp_tag = ''
    # un-comment the following four lines for the Saskatoon Star-Phoenix
    title = u'Saskatoon Star-Phoenix'
    url_prefix = 'http://www.thestarphoenix.com'
    description = u'News from Saskatoon, SK'
    fp_tag = ''
-    # un-comment the following three lines for the Windsor Star
+    # un-comment the following four lines for the Windsor Star
-    #title = u'Windsor Star'
+##    title = u'Windsor Star'
-    #url_prefix = 'http://www.windsorstar.com'
+##    url_prefix = 'http://www.windsorstar.com'
-    #description = u'News from Windsor, ON'
+##    description = u'News from Windsor, ON'
 ##    fp_tag = 'CAN_'
-    # un-comment the following three lines for the Ottawa Citizen
+    # un-comment the following four lines for the Ottawa Citizen
-    #title = u'Ottawa Citizen'
+##    title = u'Ottawa Citizen'
-    #url_prefix = 'http://www.ottawacitizen.com'
+##    url_prefix = 'http://www.ottawacitizen.com'
-    #description = u'News from Ottawa, ON'
+##    description = u'News from Ottawa, ON'
 ##    fp_tag = 'CAN_OC'
-    # un-comment the following three lines for the Montreal Gazette
+    # un-comment the following four lines for the Montreal Gazette
-    #title = u'Montreal Gazette'
+##    title = u'Montreal Gazette'
-    #url_prefix = 'http://www.montrealgazette.com'
+##    url_prefix = 'http://www.montrealgazette.com'
-    #description = u'News from Montreal, QC'
+##    description = u'News from Montreal, QC'
 ##    fp_tag = 'CAN_MG'
    language = 'en_CA'
@ -53,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
-    def preprocess_html(self,soup):
+    def get_cover_url(self):
-        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        from datetime import timedelta, date
-        divtags = soup.findAll('div',attrs={'id':''})
+        if self.fp_tag=='':
-        if divtags:
+            return None
-            for div in divtags:
+        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
-                del(div['id'])
+        br = BasicNewsRecipe.get_browser()
        daysback=1
        try:
            br.open(cover)
        except:
            while daysback<7:
                cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
                br = BasicNewsRecipe.get_browser()
                try:
                    br.open(cover)
                except:
                    daysback = daysback+1
                    continue
                break
        if daysback==7:
            self.log("\nCover unavailable")
            cover = None
        return cover
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
        # Replace rsquo (\x92)
        fixed = re.sub("\x92","’",fixed)
        # Replace ldquo (\x93)
        fixed = re.sub("\x93","“",fixed)
        # Replace rdquo (\x94)
        fixed = re.sub("\x94","”",fixed)
        # Replace ndash (\x96)
        fixed = re.sub("\x96","–",fixed)
        # Replace mdash (\x97)
        fixed = re.sub("\x97","—",fixed)
        fixed = re.sub("&#x2019;","’",fixed)
        return fixed
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&'
            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
    def populate_article_metadata(self, article, soup, first):
        if first:
            picdiv = soup.find('body').find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
        xtitle = article.text_summary.strip()
        if len(xtitle) == 0:
            desc = soup.find('meta',attrs={'property':'og:description'})
            if desc is not None:
                article.summary = article.text_summary = desc['content']
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
    def preprocess_html(self, soup):
        return self.strip_anchors(soup)
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
--- a/recipes/satira.recipe
+++ b/recipes/satira.recipe
@ -0,0 +1,14 @@
 __license__   = 'GPL v3'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1327351409(BasicNewsRecipe):
    title          = u'Satira'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'spinoza', u'http://feeds.feedburner.com/Spinoza'), (u'umore maligno', u'http://www.umoremaligno.it/feed/rss/'), (u'fed-ex', u'http://exfed.tumblr.com/rss'), (u'metilparaben', u'http://feeds.feedburner.com/metil'), (u'freddy nietzsche', u'http://feeds.feedburner.com/FreddyNietzsche')]
    __author__    = 'faber1971'
    description   = 'Collection of Italian satiric blogs - v1.00 (28, January 2012)'
    language = 'it'
--- a/recipes/seattle_times.recipe
+++ b/recipes/seattle_times.recipe
@ -12,6 +12,7 @@ class SeattleTimes(BasicNewsRecipe):
    title                 = 'The Seattle Times'
    __author__            = 'Darko Miletic'
    description           = 'News from Seattle and USA'
    cover_url = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg12/lg/WA_ST.jpg'
    publisher             = 'The Seattle Times'
    category              = 'news, politics, USA'
    oldest_article        = 2
@ -20,6 +21,8 @@ class SeattleTimes(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'cp1252'
    language = 'en'
    auto_cleanup          = True
    auto_cleanup_keep     = '//div[@id="PhotoContainer"]'
    feeds              = [
                          (u'Top Stories',
@ -69,24 +72,4 @@ class SeattleTimes(BasicNewsRecipe):
                              u'http://seattletimes.nwsource.com/rss/mostreadarticles.xml'),
                         ]
    keep_only_tags = [dict(id='content')]
    remove_tags        = [
                             dict(name=['object','link','script']),
                            {'class':['permission', 'note', 'bottomtools',
                                'homedelivery']},
                            dict(id=["rightcolumn", 'footer', 'adbottom']),
                         ]
    def print_version(self, url):
        return url
        start_url, sep, rest_url = url.rpartition('_')
        rurl, rsep, article_id = start_url.rpartition('/')
        return u'http://seattletimes.nwsource.com/cgi-bin/PrintStory.pl?document_id=' + article_id
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="en-US"/>'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/sivil_dusunce.recipe
+++ b/recipes/sivil_dusunce.recipe
@ -0,0 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1324913680(BasicNewsRecipe):
    title          = u'Sivil Dusunce'
    language = 'tr'
    __author__ = 'asalet_r'
    oldest_article = 7
    max_articles_per_feed = 20
    auto_cleanup = True
    feeds          = [(u'Sivil Dusunce', u'http://www.sivildusunce.com/feed/')]
--- a/recipes/strange_horizons.recipe
+++ b/recipes/strange_horizons.recipe
@ -0,0 +1,133 @@
 #!/usr/bin/env python
 import urlparse
 from collections import OrderedDict
 from calibre.web.feeds.news import BasicNewsRecipe
 class StrangeHorizons(BasicNewsRecipe):
 	# Recipe metadata
 	# Any issue archive page is an acceptable index as well.
 	# However, reviews will not be included in older issues.
 	# (Using the reviews archive instead of the recent reviews page would fix.)
 	INDEX = 'http://www.strangehorizons.com/'
 	title = 'Strange Horizons'
 	description = 'A magazine of speculative fiction and related nonfiction. Best downloaded on weekends'
 	masthead_url = 'http://strangehorizons.com/images/sh_head.gif'
 	publication_type = 'magazine'
 	language = 'en'
 	__author__ = 'Jim DeVona'
 	__version__ = '1.0'
 	# Cruft filters
 	keep_only_tags = [dict(name='div', id='content')]
 	remove_tags = [dict(name='p', attrs={'class': 'forum-links'}), dict(name='p', attrs={'class': 'top-link'})]
 	remove_tags_after = [dict(name='p', attrs={'class': 'author-bio'})]
 	# Styles
 	no_stylesheets = True
 	extra_css = '''div.image-left { margin: 0.5em auto 1em auto; } div.image-right { margin: 0.5em auto 1em auto; } div.illustration { margin: 0.5em auto 1em auto; text-align: center; } p.image-caption { margin-top: 0.25em; margin-bottom: 1em; font-size: 75%; text-align: center; } h1 { font-size: 160%; } h2 { font-size: 110%; } h3 { font-size: 85%; } h4 { font-size: 80%; } p { font-size: 90%; margin: 1em 1em 1em 15px; } p.author-bio { font-size: 75%; font-style: italic; margin: 1em 1em 1em 15px; } p.author-bio i, p.author-bio cite, p.author-bio .foreign { font-style: normal; } p.author-copyright { font-size: 75%; text-align: center; margin: 3em 1em 1em 15px; } p.content-date { font-weight: bold; } p.dedication { font-style: italic; } div.stanza { margin-bottom: 1em; } div.stanza p { margin: 0px 1em 0px 15px; font-size: 90%; } p.verse-line { margin-bottom: 0px; margin-top: 0px; } p.verse-line-indent-1 { margin-bottom: 0px; margin-top: 0px; text-indent: 2em; } p.verse-line-indent-2 { margin-bottom: 0px; margin-top: 0px; text-indent: 4em; } p.verse-stanza-break { margin-bottom: 0px; margin-top: 0px; } .foreign { font-style: italic; } .thought { font-style: italic; } .thought cite { font-style: normal; } .thought em { font-style: normal; } blockquote { font-size: 90%; font-style: italic; } blockquote cite { font-style: normal; } blockquote em { font-style: normal; } blockquote .foreign { font-style: normal; } blockquote .thought { font-style: normal; } .speaker { font-weight: bold; } pre { margin-left: 15px; } div.screenplay { font-family: monospace; } blockquote.screenplay-dialogue { font-style: normal; font-size: 100%; } .screenplay p.dialogue-first { margin-top: 0; } .screenplay p.speaker { margin-bottom: 0; text-align: center; font-weight: normal; } blockquote.typed-letter { font-style: normal; font-size: 100%; font-family: monospace; } .no-italics { font-style: normal; }'''
 	def parse_index(self):
 		sections = OrderedDict()
 		strange_soup = self.index_to_soup(self.INDEX)
 		# Find the heading that marks the start of this issue.
 		issue_heading = strange_soup.find('h2')
 		issue_date = self.tag_to_string(issue_heading)
 		self.title = self.title + " - " + issue_date
 		# Examine subsequent headings for information about this issue.
 		heading_tag = issue_heading.findNextSibling(['h2','h3'])
 		while heading_tag != None:
 			# An h2 indicates the start of the next issue.
 			if heading_tag.name == 'h2':
 				break
 			# The heading begins with a word indicating the article category.
 			section = self.tag_to_string(heading_tag).split(':', 1)[0].title()
 			# Reviews aren't linked from the index, so we need to look them up
 			# separately. Currently using Recent Reviews page. The reviews
 			# archive page lists all reviews, but is >500k.
 			if section == 'Review':
 				# Get the list of recent reviews.
 				review_soup = self.index_to_soup('http://www.strangehorizons.com/reviews/')
 				review_titles = review_soup.findAll('p', attrs={'class': 'contents-title'})
 				# Get the list of reviews included in this issue. (Kludgey.)
 				reviews_summary = heading_tag.findNextSibling('p', attrs={'class': 'contents-pullquote'})
 				for br in reviews_summary.findAll('br'):
 					br.replaceWith('----')
 				review_summary_text = self.tag_to_string(reviews_summary)
 				review_lines = review_summary_text.split(' ----')
 				# Look for each of the needed reviews (there are 3, right?)...
 				for review_info in review_lines[0:3]:
 					# Get the review's release day (unused), title, and author.
 					day, tna = review_info.split(': ', 1)
 					article_title, article_author = tna.split(', reviewed by ')
 					# ... in the list of recent reviews.
 					for review_title_tag in review_titles:
 						review_title = self.tag_to_string(review_title_tag)
 						if review_title != article_title:
 							continue
 						# Extract review information from heading and surrounding text.
 						article_summary = self.tag_to_string(review_title_tag.findNextSibling('p', attrs={'class': 'contents-pullquote'}))
 						review_date = self.tag_to_string(review_title_tag.findNextSibling('p', attrs={'class': 'contents-date'}))
 						article_url = review_title_tag.find('a')['href']
 						# Add this review to the Review section.
 						if section not in sections:
 							sections[section] = []
 						sections[section].append({
 								'title': article_title,
 								'author': article_author,
 								'url': article_url,
 								'description': article_summary,
 								'date': review_date})
 						break
 					else:
 						# Try http://www.strangehorizons.com/reviews/archives.shtml
 						self.log("Review not found in Recent Reviews:", article_title)
 			else:
 				# Extract article information from the heading and surrounding text.
 				link = heading_tag.find('a')
 				article_title = self.tag_to_string(link)
 				article_url = urlparse.urljoin(self.INDEX, link['href'])
 				article_author = link.nextSibling.replace(', by ', '')
 				article_summary = self.tag_to_string(heading_tag.findNextSibling('p', attrs={'class':'contents-pullquote'}))
 				# Add article to the appropriate collection of sections.
 				if section not in sections:
 					sections[section] = []
 				sections[section].append({
 						'title': article_title,
 						'author': article_author,
 						'url': article_url,
 						'description': article_summary,
 						'date': issue_date})
 			heading_tag = heading_tag.findNextSibling(['h2','h3'])
 		# Manually insert standard info about the magazine.
 		sections['About'] = [{
 				'title': 'Strange Horizons',
 				'author': 'Niall Harrison, Editor-in-Chief',
 				'url': 'http://www.strangehorizons.com/AboutUs.shtml',
 				'description': 'Strange Horizons is a magazine of and about speculative fiction and related nonfiction. Speculative fiction includes science fiction, fantasy, horror, slipstream, and all other flavors of fantastika. Work published in Strange Horizons has been shortlisted for or won Hugo, Nebula, Rhysling, Theodore Sturgeon, James Tiptree Jr., and World Fantasy Awards.',
 				'date': ''}]
 		return sections.items()
--- a/recipes/sueddeutsche.recipe
+++ b/recipes/sueddeutsche.recipe
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>' # 2012-01-26 AGe change to actual Year
 '''
 Fetch sueddeutsche.de
@ -8,19 +8,30 @@ Fetch sueddeutsche.de
 from calibre.web.feeds.news import BasicNewsRecipe
 class Sueddeutsche(BasicNewsRecipe):
-    title = u'sueddeutsche.de'
+    title                 = u'Süddeutsche.de'                 # 2012-01-26 AGe Correct Title
-    description = 'News from Germany'
+    description           = 'News from Germany, Access to online content' # 2012-01-26 AGe
-    __author__ = 'Oliver Niesner and Armin Geller' #Update AGe 2011-12-16
+    __author__            = 'Oliver Niesner and Armin Geller' #Update AGe 2012-01-26
-    use_embedded_content   = False
+    publisher             = 'Süddeutsche Zeitung'             # 2012-01-26 AGe add
-    timefmt = ' [%d %b %Y]'
+    category              = 'news, politics, Germany'         # 2012-01-26 AGe add
    timefmt               = ' [%a, %d %b %Y]'                 # 2012-01-26 AGe add %a
    oldest_article        = 7
-    max_articles_per_feed = 50
+    max_articles_per_feed = 100
    no_stylesheets = True
    language              = 'de'
    encoding              = 'utf-8'
    publication_type      = 'newspaper'                         # 2012-01-26 add
    cover_source          = 'http://www.sueddeutsche.de/verlag' # 2012-01-26 AGe add from Darko Miletic paid content source
    masthead_url          = 'http://www.sueddeutsche.de/static_assets/build/img/sdesiteheader/logo_homepage.441d531c.png' # 2012-01-26 AGe add
    use_embedded_content  = False
    no_stylesheets        = True
    remove_javascript     = True
    auto_cleanup          = True
-    cover_url  = 'http://polpix.sueddeutsche.com/polopoly_fs/1.1237395.1324054345!/image/image.jpg_gen/derivatives/860x860/image.jpg' # 2011-12-16 AGe
+
    def get_cover_url(self):                                      # 2012-01-26 AGe add from Darko Miletic paid content source
      cover_source_soup = self.index_to_soup(self.cover_source)
      preview_image_div = cover_source_soup.find(attrs={'class':'preview-image'})
      return preview_image_div.div.img['src']
    feeds = [
              (u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'),
              (u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'),
@ -29,6 +40,9 @@ class Sueddeutsche(BasicNewsRecipe):
              (u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
              (u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
              (u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
              (u'Bildung', u'http://rss.sueddeutsche.de/rss/bildung'),         #2012-01-26 AGe New
              (u'Gesundheit', u'http://rss.sueddeutsche.de/rss/gesundheit'),   #2012-01-26 AGe New
              (u'Stil', u'http://rss.sueddeutsche.de/rss/stil'),               #2012-01-26 AGe New
              (u'München & Region', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMünchen&Region%24?output=rss'),
              (u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
              (u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
@ -42,6 +56,7 @@ class Sueddeutsche(BasicNewsRecipe):
              (u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'),         # sometimes only
              (u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only
              (u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'),   # sometimes only
            ]
 # AGe 2011-12-16 Problem of Handling redirections solved by a solution of Recipes-Re-usable code from kiklop74.
 # Feed is:                    http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss
--- a/recipes/tagesspiegel.recipe
+++ b/recipes/tagesspiegel.recipe
@ -14,6 +14,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
    language = 'de'
    oldest_article = 7
    max_articles_per_feed = 100
    publication_type = 'newspaper'
    extra_css = '''
                .hcf-overline{color:#990000; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;display:block}
@ -33,17 +34,15 @@ class TagesspiegelRSS(BasicNewsRecipe):
    no_javascript = True
    remove_empty_feeds = True
    encoding = 'utf-8'
    remove_tags = [{'class':'hcf-header'}, {'class':'hcf-atlas'}, {'class':'hcf-date hcf-separate'}]
-    keep_only_tags = dict(name='div', attrs={'class':["hcf-article"]})
+    def print_version(self, url):
-    remove_tags = [
+        url = url.split('/')
-                    dict(name='link'), dict(name='iframe'),dict(name='style'),dict(name='meta'),dict(name='button'),
+        url[-1] = 'v_print,%s?p='%url[-1]
-                    dict(name='div', attrs={'class':["hcf-jump-to-comments","hcf-clear","hcf-magnify hcf-media-control",
+        return '/'.join(url)
                        "hcf-socials-widgets hcf-socials-top","hcf-socials-widgets hcf-socials-bottom"] }),
                    dict(name='span', attrs={'class':["hcf-mainsearch",] }),
                    dict(name='ul', attrs={'class':["hcf-tools"]}),
                    dict(name='ul', attrs={'class': re.compile('hcf-services')})
                  ]
    def get_masthead_url(self):
        return 'http://www.tagesspiegel.de/images/tsp_logo/3114/6.png'
    def parse_index(self):
        soup = self.index_to_soup('http://www.tagesspiegel.de/zeitung/')
@ -56,7 +55,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
        ans = []
        maincol = soup.find('div', attrs={'class':re.compile('hcf-main-col')})
-        for div in maincol.findAll(True, attrs={'class':['hcf-teaser', 'hcf-header', 'story headline']}):
+        for div in maincol.findAll(True, attrs={'class':['hcf-teaser', 'hcf-header', 'story headline', 'hcf-teaser hcf-last']}):
             if div['class'] == 'hcf-header':
                 try:
@ -66,7 +65,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
                 except:
                     continue
-             elif div['class'] == 'hcf-teaser' and getattr(div.contents[0],'name','') == 'h2':
+             elif div['class'] in ['hcf-teaser', 'hcf-teaser hcf-last'] and getattr(div.contents[0],'name','') == 'h2':
                 a = div.find('a', href=True)
                 if not a:
                     continue
--- a/recipes/taggeschau_de.recipe
+++ b/recipes/taggeschau_de.recipe
@ -1,24 +1,41 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 ## History:
 ## 1: Base Version
 ## 2: Added rules for wdr.de, ndr.de, br-online.de
 ## 3: Added rules for rbb-online.de, boerse.ard.de, sportschau.de
 class Tagesschau(BasicNewsRecipe):
     title          = 'Tagesschau'
     description    = 'Nachrichten der ARD'
     publisher      = 'ARD'
     language       = 'de'
     version        = 3
-     __author__     = 'Florian Andreas Pfaff'
+     __author__            = 'Florian Andreas Pfaff, a.peter'
     oldest_article        = 7
     max_articles_per_feed = 100
     no_stylesheets        = True
     remove_javascript     = True
     feeds          = [('Tagesschau', 'http://www.tagesschau.de/xml/rss2')]
     remove_tags    = [
-                       dict(name='div', attrs={'class':['linksZumThema schmal','teaserBox','boxMoreLinks','directLinks','teaserBox boxtext','fPlayer','zitatBox breit flashaudio']}),
+                       dict(name='div', attrs={'class':['linksZumThema schmal','teaserBox','boxMoreLinks','directLinks','teaserBox boxtext','fPlayer','zitatBox breit flashaudio','infobox ','footer clearfix','inner recommendations','teaser teaser-08 nachrichten smallstandard','infobox-rechts','infobox-links','csl2','teaserBox metaBlock','articleA archiveDisclaimer']}),
-                       dict(name='div',
+                       dict(name='div', attrs={'id':['pageFunctions']}), ## wdr.de
- attrs={'id':['socialBookmarks','seitenanfang']}),
+                       dict(name='div', attrs={'class':['chart','footerService','toplink','assetsLeft','assetsFullsize']}), ## boerse.ard.de
-                       dict(name='ul',
+                       dict(name='div', attrs={'class':['ardMehrZumThemaLinks','socialBookmarks','ardContentEnd','ardDisclaimer']}), ## sportschau.de
- attrs={'class':['directLinks','directLinks weltatlas']}),
+                       dict(name='div', attrs={'id':['socialBookmarks','seitenanfang','comment']}),
-                       dict(name='strong', attrs={'class':['boxTitle inv','inv']})
+                       dict(name='ul',  attrs={'class':['directLinks','directLinks weltatlas','iconList','right']}),
                       dict(name='strong', attrs={'class':['boxTitle inv','inv']}),
                       dict(name='div', attrs={'class':['moreInfo right','moreInfo']}),
                       dict(name='span', attrs={'class':['videoLink']}),
                       dict(name='img', attrs={'class':['zoom float_right']}),
                       dict(name='a', attrs={'id':['zoom']})
                      ]
-     keep_only_tags = [dict(name='div', attrs={'id':'centerCol'})]
+     keep_only_tags = [dict(name='div', attrs={'id':'centerCol'}),
                       dict(name='div', attrs={'id':['mainColumn','ardContent']}),
                       dict(name='div', attrs={'class':['narrow clearfix','beitrag','detail_inlay','containerArticle noBorder','span-8']})]
     def get_masthead_url(self):
          return 'http://intern.tagesschau.de/html/img/image.jpg' 
--- a/recipes/tasfiye_dergisi.recipe
+++ b/recipes/tasfiye_dergisi.recipe
@ -0,0 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1324739957(BasicNewsRecipe):
    title          = u'Tasfiye Dergisi'
    language = 'tr'
    __author__ = 'asalet_r'
    oldest_article = 7
    max_articles_per_feed = 20
    auto_cleanup = True
    feeds          = [(u'Tasfiye Dergisi', u'http://www.tasfiyedergisi.com/direnen-edebiyat/?feed=rss2')]
--- a/recipes/tech_economy.recipe
+++ b/recipes/tech_economy.recipe
@ -0,0 +1,15 @@
 __license__   = 'GPL v3'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1327051385(BasicNewsRecipe):
    title          = u'Tech Economy'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    masthead_url            = 'http://www.techeconomy.it/wp-content/uploads/2012/01/Logo-TE9.png'
    feeds          = [(u'Tech Economy', u'http://www.techeconomy.it/feed/')]
    remove_tags_after = [dict(name='div', attrs={'class':'cab-author-name'})]
    __author__    = 'faber1971'
    description   = 'Italian website on technology - v1.00 (28, January 2012)'
    language = 'it'
--- a/recipes/telegraph_in.recipe
+++ b/recipes/telegraph_in.recipe
@ -0,0 +1,37 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class Telegraph(BasicNewsRecipe):
    title          = u'The Telegraph India'
    language       = 'en_IN'
    __author__     = 'Krittika Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25
    use_embedded_content = False
    no_stylesheets = True
    auto_cleanup = True
    feeds          = [
 ('Front Page',
 'http://www.telegraphindia.com/feeds/rss.jsp?id=3'),
 ('Nation',
 'http://www.telegraphindia.com/feeds/rss.jsp?id=4'),
 ('Calcutta',
 'http://www.telegraphindia.com/feeds/rss.jsp?id=5'),
 ('Bengal',
 'http://www.telegraphindia.com/feeds/rss.jsp?id=8'),
 ('Bihar',
 'http://www.telegraphindia.com/feeds/rss.jsp?id=22'),
 ('Sports',
 'http://www.telegraphindia.com/feeds/rss.jsp?id=7'),
 ('International',
 'http://www.telegraphindia.com/feeds/rss.jsp?id=13'),
 ('Business',
 'http://www.telegraphindia.com/feeds/rss.jsp?id=9'),
 ('Entertainment',
 'http://www.telegraphindia.com/feeds/rss.jsp?id=20'),
 ('Opinion',
 'http://www.telegraphindia.com/feeds/rss.jsp?id=6'),
 ]
--- a/recipes/the_daily_news_egypt.recipe
+++ b/recipes/the_daily_news_egypt.recipe
@ -0,0 +1,46 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
 '''
 abc.net.au/news
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class TheDailyNewsEG(BasicNewsRecipe):
    title          	   = u'The Daily News Egypt'
    __author__             = 'Omm Mishmishah'
    description            = 'News from Egypt'
    masthead_url           = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif'
    cover_url              = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif'
    auto_cleanup           = True
    oldest_article         = 7
    max_articles_per_feed  = 100
    no_stylesheets         = False
    #delay                  = 1
    use_embedded_content   = False
    encoding               = 'utf8'
    publisher              = 'The Daily News Egypt'
    category               = 'News, Egypt, World'
    language               = 'en_EG'
    publication_type       = 'newsportal'
 #    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
 #Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
    preprocess_regexps     = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': False
                         }
    keep_only_tags = [dict(attrs={'class':['article section']})]
    remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
        'inline-content story left', 'inline-content map left contracted', 'published',
        'story-map', 'statepromo', 'topics', ]})]
    remove_attributes = ['width','height']
    feeds          = [(u'The Daily News Egypt', u'http://www.thedailynewsegypt.com/rss.php?sectionid=all')]
--- a/recipes/tillsonburg.recipe
+++ b/recipes/tillsonburg.recipe
@ -0,0 +1,25 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 '''
 Tillsonburg/Norfolk County newspapers Calibre Recipe
 '''
 class TillsonburgNorfolkCounty(BasicNewsRecipe):
    title = u'Tillsonburg/Norfolk County'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    __author__ = u'Eric Coolman'
    publisher = u'canoe.ca'
    description = u'Norfolk County and Tillsonburg, Ontario Canada Newspapers'
    category = u'News, Ontario, Canada'
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'en_CA'
    encoding = 'utf-8'
    feeds = [
 	(u'Simcoe Reformer', u'http://www.simcoereformer.ca/rss/'),
 	(u'Delhi News-Record', u'http://www.delhinewsrecord.com/rss/'),
 	(u'Tilsonburg News', u'http://www.tillsonburgnews.com/rss/')
 	]
--- a/recipes/tomshardware_it.recipe
+++ b/recipes/tomshardware_it.recipe
@ -0,0 +1,24 @@
 __license__   = 'GPL v3'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1327434170(BasicNewsRecipe):
    title          = u"Tom's Hardware"
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    masthead_url            = 'http://userlogos.org/files/logos/spaljeni/tomshardwre.png'
    def get_article_url(self, article):
       link = BasicNewsRecipe.get_article_url(self, article)
       if link.split('/')[-1]=="story01.htm":
           link=link.split('/')[-2]
           a=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'L'      , 'N'   , 'S'   ]
           b=['0', '.', '/', '?', '-', '=', '&', '_', 'http://', '.com', 'www.']
           for i in range(0,len(a)):
              link=link.replace('0'+a[-i],b[-i])
       return link
    feeds          = [(u"Tom's Hardware", u'http://rss.feedsportal.com/c/32604/f/531080/index.rss')]
    __author__    = 'faber1971'
    description   = 'Italian website on technology - v1.00 (28, January 2012)'
    language = 'it'
--- a/Show More
+++ b/Show More