update series info from metadata

2025-07-09 03:04:10 -04:00 · 2010-02-03 21:26:49 +00:00 · 2010-02-03 21:26:49 +00:00 · fb5c70af29
commit fb5c70af29
parent e71b23e5c3 c8ca1afe70
212 changed files with 38211 additions and 15652 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,305 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.

+- version: 0.6.37
+  date: 2010-02-01
+
+  new features:
+    - title: "E-book viewer: Add support for viewing SVG images"
+      type: major
+
+    - title: "Add category of Recently added books when generating catalog in e-book format"
+
+    - title: "OS X: Allow adding of books to calibre via drag and drop on the calibre dock icon"
+
+    - title: "Add support for masthead images when downloading news for the Kindle"
+
+    - title: "MOBI metadata: Allow setting of metadata in old PRC files without EXTH headers as well"
+
+  bug fixes:
+    - title: Changing the date in Dutch
+      tickets: [4732]
+
+    - title: "Fix regression that broke sending files to unupdated PRS 500s"
+
+    - title: "MOBI Input: Ignore width and height percentage measures for <img> tags."
+      tickets: [4726]
+
+    - title: "EPUB Output: Remove <img> tags that point to the internet for their images as this causes the ever delicate ADE to crash."
+      tickets: [4692]
+
+    - title: "Comic Input: Handle UTF-8 BOM when converting a cbc file"
+      tickets: [4683]
+
+    - title: "Allow rating to be cleared via the Bulk metadata edit dialog"
+      tickets: [4693]
+
+    - title: "Add workaround for broken linux systems with multiply encoded file names"
+      tickets: [4721]
+
+    - title: Fix bug preventing the the use of indices when setting save to disk templates
+      tickets: [4710]
+
+    - title: "Linux device mounting. Use filetype of auto to allow non vfat filesystems to be mounted"
+      tickets: [4707]
+
+    - title: "Catalog generation: Make sorting of numbers in title as text optional"
+
+    - title: "Fix error while sending book with non-ascii character in title/author to device on linux"
+      tickets: [4690]
+
+    - title: "Fix reset cover in edit meta information dialog does not actually remove cover"
+      tickets: [4731]
+
+  new recipes:
+    - title: Kamera Bild
+      author: Darko Miletic
+
+    - title: The Online Photographer
+      author: Darko Miletic
+
+    - title: The Luminous Landscape
+      author: Darko Miletic
+
+    - title: Slovo
+      author: Abelturd
+
+    - title: Various Danish newspapers
+      author: Darko Miletic
+
+    - title: Heraldo de Aragon
+      author: Lorenzo Vigentini
+
+    - title: Orange County Register
+      author: Lorenzi Vigentini
+
+    - title: Open Left
+      author: Xanthan Gum
+
+    - title: Michelle Malkin
+      author: Walt Anthony
+
+    - title: The Metro Montreal
+      author: Jerry Clapperton
+
+    - title: The Gazette
+      author: Jerry Clapperton
+
+    - title: Macleans Magazine
+      author:  Nick Redding
+
+    - title: NY Time Sunday Book Review
+      author: Krittika Goyal
+
+    - title: Various Italian newspapers
+      author: Lorenzo Vigentini
+
+
+  improved recipes:
+    - The Irish Times
+    - Washington Post
+    - NIN
+    - The Discover Magazine
+    - Pagina 12
+
+- version: 0.6.36
+  date: 2010-01-25
+
+  new features:
+    - title: Catalog generation in MOBI format
+
+    - title: "Driver for Inves Book 600"
+
+    - title: "Show notifications on OS X even when systray icon is disabled. "
+
+  bug fixes:
+    - title: Fix memory leak in catalog generation
+
+    - title: Fix regression that broke PML output
+
+    - title: Fix bug in MOBI Input
+      tickets: [4643]
+
+    - title: "Replace commas with semi-colons in download tags"
+      tickets: [4650]
+
+    - title: Fix catalog output format dropdown empty in linux
+      tickets: [4656]
+
+    - title: "Fix display of non-English characters in OS X notifications"
+      tickets: [4654]
+
+    - title: Add .cbc to list of book formats
+      tickets: [4662]
+
+    - title: "Content server: Mobile page breaks if library contains empty books. Now fixed."
+
+    - title: "Support old 212 byte header PDB files"
+      tickets: [4646]
+
+    - title: "Fix regression that caused wrong error message to be displayed when device is out of space"
+
+
+  new recipes:
+    - title: Harvard Business Review Blogs
+      author: Brian_G
+
+    - title: Neowin
+      author: Darko Miletic
+
+    - title: Greensboro News and Record
+      author: Walt Anthony
+
+    - title: Hot Air
+      author: Walt Anthony
+
+    - title: ionline
+      author: Darko Miletic
+
+    - title: The National Review Online
+      author: Walt Anthony
+
+  improved recipes:
+    - Ars Technica
+    - Sports Illustrated
+    - Common Dreams
+    - Wired Magazine
+
+
+- version: 0.6.35
+  date: 2010-01-22
+
+  new features:
+    - title: Catalog generation
+      type: major
+      description: >
+        "You can now easily generate a catlog of all books in your calibre library by clicking the arrow next to the convert button. The catalog can be in one of several formats: XML, CSV, EPUB and MOBI, with scope for future formats via plugins. If you generate the catalog in an e-book format, it will be automatically sent to your e-book reader the next time you  connect it, allowing you to easily browse your collection on the reader itself. This feature is in Beta (may have bugs) so feedback is appreciated."
+
+    - title: "RTF Input: Support for unicode characters."
+      type: major
+      tickets: [4501]
+
+    - title: "Add Quick Start Guide by John Schember to calibre library on first run of calibre"
+      type: major
+
+    - title: "Improve handling of justification"
+      description: >
+        "Now calibre will explicitly change the justification of all left aligned paragraphs to justified or vice versa depending on the justification setting. This should make it possible to robustly convert all content to either justified or not. calibre will not touch centered or right aligned content."
+
+    - title: "E-book viewer: Fit images to viewer window (can be turned off via Preferences)"
+
+    - title: "Add section on E-book viewer to User Manual"
+
+    - title: "Development environment: First look for resources in the location pointed to by CALIBRE_DEVELOP_FROM. If not found, use the normal resource location"
+
+    - title: "When reading metadata from filenames, with the Swap author names option checked, improve the logic used to detect author last name."
+      tickets: [4620]
+
+    - title: "News downloads: When getting an article URL from a RSS feed, look first for an original article link. This speeds up the download of news services that use a syndication service like feedburner or pheedo to publish their RSS feeds."
+
+  bug fixes:
+    - title: "Windows device detection: Don't do expensive polling while waiting for device disconnect. This should fix the problems people have with their floppy drive being activated while an e-book reader is connected"
+
+    - title: "PML Input: Fix creation of metadata Table of Contents"
+      tickets: [5633]
+
+    - title: "Fix Tag browser not updating after using delete specific format actions"
+      tickets: [4632]
+
+    - title: "MOBI Output: Don't die when converting EPUB files with SVG covers"
+
+    - title: "Nook driver: Remove the # character from filenames when sending to device"
+      tickets: [4629]
+
+    - title: "Workaround for bug in QtWebKit on windows that could cause crashes when using the next page button in the e-book viewer for certain files"
+      tickets: [4606]
+
+    - title: "MOBI Input: Rescale img width and height attributes that were specified in em units"
+      tickets: [4608]
+
+    - title: "ebook-meta: Fix setting of series metadata"
+
+    - title: "RTF metadata: Fix reading metadata from very small files"
+
+    - title: "Conversion pipeline: Don't error out if the user sets an invalid chapter detection XPath"
+
+    - title: "Fix main mem and card being swapped in pocketbook detection on OS X"
+
+    - title: "Welcome wizard: Set the language to english if the user doesn't explicitly change the language. This ensures that the language will be english on windows by default"
+
+    - title: "Fix bug in OEBWriter that could cause writing out of resources in subdirectories with URL unsafe names to fail"
+
+    - title: "E-book viewer: Change highlight color to yellow on all platforms."
+      tickets: [4641]
+
+  new recipes:
+    - title: Frankfurter Rundschau
+      author: Justus Bisser
+
+    - title: The Columbia Hournalism Review
+      author: XanthanGum
+
+    - title: Various CanWest Canadian news sources
+      author: Nick Redding
+
+    - title: gigitaljournal.com
+      author: Darko Miletic
+
+    - title: Pajamas Media
+      author: Krittika Goyal
+
+    - title: Algemeen Dagbla
+      author: kwetal
+
+    - title: "The Reader's Digest"
+      author: BrianG
+
+    - title: The Yemen Times
+      author: kwetal
+
+    - title: The Kitsap Sun
+      author: Darko Miletic
+
+    - title: drivelry.com
+      author: Krittika Goyal
+
+    - title: New recipe for Google Reader that downloads unread articles instead of just starred ones
+      author: rollercoaster
+
+    - title: Le Devoir
+      author: Lorenzo Vigentini
+
+    - title: Joop
+      author: kwetal
+
+    - title: Various computer magazines
+      author: Lorenzo Vigentini
+
+    - title: The Wall Street journal (free parts)
+      author: Nick Redding
+
+    - title: Journal of Nephrology
+      author: Krittika Goyal
+
+    - title: stuff.co.nz
+      author: Krittika Goyal
+
+    - title: Editor and Publisher
+      author: XanthanGum
+
+    - title: The Week (free)
+      author: Darko Miletic
+
+  improved recipes:
+    - Physics Today
+    - Wall Street Journal
+    - American Spectator
+    - FTD
+    - The National Post
+    - Blic
+    - Ars Technica
+
+
 - version: 0.6.34
  date: 2010-01-15

--- a/icons/book.icns
+++ b/icons/book.icns
--- a/resources/catalog/DefaultCover.jpg
+++ b/resources/catalog/DefaultCover.jpg
--- a/resources/catalog/mastheadImage.gif
+++ b/resources/catalog/mastheadImage.gif
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -0,0 +1,73 @@
+body { background-color: white; }
+
+p.title  {
+	margin-top:0em;
+	margin-bottom:1em;
+	text-align:center;
+	font-style:italic;
+	font-size:xx-large;
+    border-bottom: solid black 4px;
+	}
+
+p.author {
+	margin-top:0em;
+	margin-bottom:0em;
+	text-align: left;
+	text-indent: 1em;
+	font-size:large;
+  	}
+
+p.tags {
+	margin-top:0em;
+	margin-bottom:0em;
+	text-align: left;
+	text-indent: 1em;
+	font-size:small;
+	}
+
+p.description {
+	text-align:left;
+	font-style:italic;
+	margin-top: 0em;
+	}
+
+p.date_index {
+	font-size:x-large;
+	text-align:center;
+	font-weight:bold;
+	margin-top:1em;
+	margin-bottom:0px;
+	}
+
+p.letter_index {
+	font-size:x-large;
+	text-align:center;
+	font-weight:bold;
+	margin-top:1em;
+	margin-bottom:0px;
+	}
+
+p.author_index {
+	font-size:large;
+	text-align:left;
+	margin-top:0px;
+	margin-bottom:0px;
+	text-indent: 0em;
+	}
+
+p.read_book {
+	text-align:left;
+	margin-top:0px;
+	margin-bottom:0px;
+	margin-left:2em;
+	text-indent:-2em;
+	}
+
+p.unread_book {
+	text-align:left;
+	margin-top:0px;
+	margin-bottom:0px;
+	margin-left:2em;
+	text-indent:-2em;
+	}
+
--- a/resources/images/news/ad.png
+++ b/resources/images/news/ad.png
--- a/resources/images/news/digitaljournal.png
+++ b/resources/images/news/digitaljournal.png
--- a/resources/images/news/greensboro_news_and_record.png
+++ b/resources/images/news/greensboro_news_and_record.png
--- a/resources/images/news/hotair.png
+++ b/resources/images/news/hotair.png
--- a/resources/images/news/information_dk.png
+++ b/resources/images/news/information_dk.png
--- a/resources/images/news/ionline_pt.png
+++ b/resources/images/news/ionline_pt.png
--- a/resources/images/news/jp_dk.png
+++ b/resources/images/news/jp_dk.png
--- a/resources/images/news/kamerabild.png
+++ b/resources/images/news/kamerabild.png
--- a/resources/images/news/ledevoir.png
+++ b/resources/images/news/ledevoir.png
--- a/resources/images/news/michellemalkin_icon.png
+++ b/resources/images/news/michellemalkin_icon.png
--- a/resources/images/news/nationalreviewonline.png
+++ b/resources/images/news/nationalreviewonline.png
--- a/resources/images/news/neowin.png
+++ b/resources/images/news/neowin.png
--- a/resources/images/news/nursingtimes.png
+++ b/resources/images/news/nursingtimes.png
--- a/resources/images/news/observer.png
+++ b/resources/images/news/observer.png
--- a/resources/images/news/politiken_dk.png
+++ b/resources/images/news/politiken_dk.png
--- a/resources/images/news/the_week_magazine_free.png
+++ b/resources/images/news/the_week_magazine_free.png
--- a/resources/images/news/theluminouslandscape.png
+++ b/resources/images/news/theluminouslandscape.png
--- a/resources/quick_start.epub
+++ b/resources/quick_start.epub
--- a/resources/recipes/ad.recipe
+++ b/resources/recipes/ad.recipe
@ -0,0 +1,86 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ADRecipe(BasicNewsRecipe):
+    __license__  = 'GPL v3'
+    __author__ = 'kwetal'
+    language = 'nl'
+    country = 'NL'
+    version = 1
+
+    title = u'AD'
+    publisher = u'de Persgroep Publishing Nederland NV'
+    category = u'News, Sports, the Netherlands'
+    description = u'News and Sports from the Netherlands'
+
+    oldest_article = 1.2
+    max_articles_per_feed = 100
+    use_embedded_content = False
+
+    remove_empty_feeds = True
+    no_stylesheets = True
+    remove_javascript = True
+
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'art_box2'}))
+    keep_only_tags.append(dict(name = 'p', attrs = {'class': 'gen_footnote3'}))
+
+    remove_tags = []
+    remove_tags.append(dict(name = 'div', attrs = {'class': 'gen_clear'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class': re.compile(r'gen_spacer.*')}))
+
+    remove_attributes = ['style']
+
+    # feeds from http://ad.nl/ad/nl/1401/home/integration/nmc/frameset/ad_footer/rssFeeds.dhtml
+    feeds = []
+    feeds.append((u'Binnenland', u'http://www.ad.nl/nieuws/binnenland/rss.xml'))
+    feeds.append((u'Buitenland', u'http://www.ad.nl/nieuws/buitenland/rss.xml'))
+    feeds.append((u'Bizar', u'http://www.ad.nl/nieuws/bizar/rss.xml'))
+    feeds.append((u'Gezondheid & Wetenschap', u'http://www.ad.nl/nieuws/gezondheidwetenschap/rss.xml'))
+    feeds.append((u'Economie', u'http://www.ad.nl/nieuws/economie/rss.xml'))
+    feeds.append((u'Nederlands Voetbal', u'http://www.ad.nl/sportwereld/nederlandsvoetbal/rss.xml'))
+    feeds.append((u'Buitenlands Voetbal', u'http://www.ad.nl/sportwereld/buitenlandsvoetbal/rss.xml'))
+    feeds.append((u'Champions League/Europa League', u'http://www.ad.nl/sportwereld/championsleagueeuropaleague/rss.xml'))
+    feeds.append((u'Wielrennen', u'http://www.ad.nl/sportwereld/wielrennen/rss.xml'))
+    feeds.append((u'Tennis', u'http://www.ad.nl/sportwereld/tennis/rss.xml'))
+    feeds.append((u'Formule 1', u'http://www.ad.nl/sportwereld/formule1/rss.xml'))
+    feeds.append((u'Meer Sport', u'http://www.ad.nl/sportwereld/meersport/rss.xml'))
+    feeds.append((u'Celebs', u'http://www.ad.nl/showbizz/celebs/rss.xml'))
+    feeds.append((u'Film', u'http://www.ad.nl/showbizz/film/rss.xml'))
+    feeds.append((u'Muziek', u'http://www.ad.nl/showbizz/muziek/rss.xml'))
+    feeds.append((u'TV', u'http://www.ad.nl/showbizz/tv/rss.xml'))
+    feeds.append((u'Kunst & Literatuur', u'http://www.ad.nl/showbizz/kunstenliteratuur/rss.xml'))
+    feeds.append((u'Jouw Wereld', u'http://www.ad.nl/you/rss.xml'))
+    feeds.append((u'Consument', u'http://www.ad.nl/consument/rss.xml'))
+    feeds.append((u'Autowereld', u'http://www.ad.nl/autowereld/rss.xml'))
+    feeds.append((u'Reiswereld', u'http://www.ad.nl/reiswereld/rss.xml'))
+    feeds.append((u'Internet', u'http://www.ad.nl/digitaal/internet/rss.xml'))
+    feeds.append((u'Games', u'http://www.ad.nl/digitaal/games/rss.xml'))
+    feeds.append((u'Multimedia', u'http://www.ad.nl/digitaal/multimedia/rss.xml'))
+    feeds.append((u'Planet Watch', u'http://www.ad.nl/planetwatch/rss.xml'))
+
+    extra_css = '''
+                body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
+                div.captionEmbeddedMasterObject {font-size: x-small; font-style: italic; color: #696969;}
+                .gen_footnote3 {font-size: small; color: #666666; margin-top: 0.6em;}
+                '''
+
+    conversion_options = {'comments': description, 'tags': category, 'language': 'en',
+                          'publisher': publisher}
+
+    def print_version(self, url):
+        parts = url.split('/')
+        print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \
+                + parts[10] + '/' + parts[7] + '/print/' + parts[8] + '/' + parts[9] + '/' + parts[13]
+
+        return print_url
+
+    def preprocess_html(self, soup):
+        for br in soup.findAll('br'):
+            prev = br.findPreviousSibling(True)
+            if hasattr(prev, 'name') and prev.name == 'br':
+                next = br.findNextSibling(True)
+                if hasattr(next, 'name') and next.name == 'br':
+                    br.extract()
+
+        return soup
--- a/resources/recipes/amspec.recipe
+++ b/resources/recipes/amspec.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 spectator.org
 '''
@ -11,20 +9,22 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class TheAmericanSpectator(BasicNewsRecipe):
    title                 = 'The American Spectator'
    __author__            = 'Darko Miletic'
-    language = 'en'
-
    description           = 'News from USA'
+    category              = 'news, politics, USA, world'
+    publisher             = 'The American Spectator'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
+    language              = 'en'
    INDEX                 = 'http://spectator.org'
      
-    html2lrf_options = [
-                             '--comment'       , description
-                           , '--category'      , 'news, politics, USA'
-                           , '--publisher'     , title
-                         ]
+    conversion_options = {  
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                         }

    keep_only_tags   = [
                             dict(name='div', attrs={'class':'post inner'})
@ -33,13 +33,11 @@ class TheAmericanSpectator(BasicNewsRecipe):

    remove_tags     = [
                             dict(name='object')
-                            ,dict(name='div', attrs={'class':'col3'         })
-                            ,dict(name='div', attrs={'class':'post-options' })
-                            ,dict(name='p'  , attrs={'class':'letter-editor'})
-                            ,dict(name='div', attrs={'class':'social'       })
+                            ,dict(name='div', attrs={'class':['col3','post-options','social']})
+                            ,dict(name='p'  , attrs={'class':['letter-editor','meta']})
                        ]
                         
-    feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')]
+    feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]

    def get_cover_url(self):
        cover_url = None
@ -53,3 +51,7 @@ class TheAmericanSpectator(BasicNewsRecipe):
          
    def print_version(self, url):
        return url + '/print'
+        
+    def get_article_url(self, article):
+        return article.get('guid', None)
+        
--- a/resources/recipes/ars_technica.recipe
+++ b/resources/recipes/ars_technica.recipe
@ -1,12 +1,12 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 arstechnica.com
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class ArsTechnica2(BasicNewsRecipe):
    title                 = u'Ars Technica'
@ -18,24 +18,24 @@ class ArsTechnica2(BasicNewsRecipe):
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
-    encoding              = 'utf8'
-    remove_javascript     = True
+    encoding              = 'utf-8'
    use_embedded_content  = False
+    extra_css             = ' body {font-family: sans-serif} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '

-    extra_css = '''
-                    .news-item-title{font-size: medium ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;}
-                    .news-item-teaser{font-size: small ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;}
-                    .news-item-byline{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
-                    .news-item-text{font-size:x-small;font-family:Arial,Helvetica,sans-serif;}
-                    .news-item-figure-caption-text{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:bold;}
-                    .news-item-figure-caption-byline{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
-                '''
+    conversion_options = {
+                             'comments'  : description
+                            ,'tags'      : category
+                            ,'language'  : language
+                            ,'publisher' : publisher
+                         }

-    keep_only_tags = [dict(name='div', attrs={'id':['news-item-info','news-item']})]
+
+
+    keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]

    remove_tags = [
                     dict(name=['object','link','embed'])
-                    ,dict(name='div', attrs={'class':'related-stories'})
+                    ,dict(name='div', attrs={'class':'read-more-link'})
                  ]


@ -52,14 +52,19 @@ class ArsTechnica2(BasicNewsRecipe):
            ]

    def append_page(self, soup, appendtag, position):
-        pager = soup.find('div',attrs={'id':'pager'})
+        pager = soup.find('div',attrs={'class':'pager'})
        if pager:
           for atag in pager.findAll('a',href=True):
               str = self.tag_to_string(atag)
               if str.startswith('Next'):
-                  soup2 = self.index_to_soup(atag['href'])
+                  nurl = 'http://arstechnica.com' + atag['href']
+                  rawc = self.index_to_soup(nurl,True)
+                  soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)

-                  texttag = soup2.find('div', attrs={'class':'news-item-text'})
+                  readmoretag = soup2.find('div', attrs={'class':'read-more-link'})
+                  if readmoretag:
+                     readmoretag.extract()
+                  texttag = soup2.find('div', attrs={'class':'body'})
                  for it in texttag.findAll(style=True):
                      del it['style']

@ -71,10 +76,12 @@ class ArsTechnica2(BasicNewsRecipe):


    def preprocess_html(self, soup):
-
-        ftag = soup.find('div', attrs={'class':'news-item-byline'})
+        ftag = soup.find('div', attrs={'class':'byline'})
        if ftag:
-           ftag.insert(4,'<br /><br />')
+           brtag = Tag(soup,'br')
+           brtag2 = Tag(soup,'br')
+           ftag.insert(4,brtag)
+           ftag.insert(5,brtag2)

        for item in soup.findAll(style=True):
           del item['style']
@ -83,5 +90,3 @@ class ArsTechnica2(BasicNewsRecipe):

        return soup

-
-
--- a/resources/recipes/barrons.recipe
+++ b/resources/recipes/barrons.recipe
@ -98,6 +98,9 @@ class Barrons(BasicNewsRecipe):
                ('Funds/Q&A', 'http://online.barrons.com/xml/rss/3_7519.xml'),
                ]

+        def get_article_url(self, article):
+            return article.get('link', None)
+

        def get_cover_url(self):
            cover_url = None
--- a/resources/recipes/bbc_fast.recipe
+++ b/resources/recipes/bbc_fast.recipe
@ -0,0 +1,60 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+news.bbc.co.uk
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class BBC(BasicNewsRecipe):
+    title                  = 'BBC News (fast)'
+    __author__             = 'Darko Miletic'
+    description            = 'News from UK. A much faster version that does not download pictures'
+    oldest_article         = 2
+    max_articles_per_feed  = 100
+    no_stylesheets         = True
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'utf8'
+    publisher              = 'BBC'
+    category               = 'news, UK, world'
+    language               = 'en'
+    extra_css              = ' body{ font-family: sans-serif; } .headline{font-size: xx-large; font-weight: bold} .ibox{display: block; margin: 20px 50px; padding: 10px; border: 1px solid } '
+
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                         }
+
+    remove_tags_before = dict(name='div',attrs={'class':'headline'})
+    remove_tags_after  = dict(name='div', attrs={'class':'footer'})
+    remove_tags       = [
+                           dict(name=['object','link','script','iframe'])
+                          ,dict(name='div', attrs={'class':'footer'})
+                        ]
+
+    feeds          = [
+                      ('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
+                      ('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
+                      ('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
+                      ('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
+                      ('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
+                      ('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
+                      ('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
+                      ('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
+                      ('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
+                      ('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
+                      ('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
+                      ('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
+                      ('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
+                    ]
+
+    def print_version(self, url):
+        emp,sep,rstrip = url.partition('http://')
+        return 'http://newsvote.bbc.co.uk/mpapps/pagetools/print/' + rstrip
+
+    def get_article_url(self, article):
+        return article.get('guid', None)
+
--- a/resources/recipes/calgary_herald.recipe
+++ b/resources/recipes/calgary_herald.recipe
@ -0,0 +1,121 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Calgary Herald
+    title = u'Calgary Herald'
+    url_prefix = 'http://www.calgaryherald.com'
+    description = u'News from Calgary, AB'
+
+    # un-comment the following three lines for the Regina Leader-Post
+    #title = u'Regina Leader-Post'
+    #url_prefix = 'http://www.leaderpost.com'
+    #description = u'News from Regina, SK'
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    #title = u'Saskatoon Star-Phoenix'
+    #url_prefix = 'http://www.thestarphoenix.com'
+    #description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
--- a/resources/recipes/cjr.recipe
+++ b/resources/recipes/cjr.recipe
@ -0,0 +1,15 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CJR(BasicNewsRecipe):
+    title              = u'Columbia Journalism Review'
+    __author__         = u'Xanthan Gum'
+    description        = 'News about journalism.'
+    language = 'en'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    feeds = [(u'News Stories', u'http://www.cjr.org/index.xml')]
+
+    def print_version(self, url):
+        return url + '?page=all&print=true'
--- a/resources/recipes/common_dreams.recipe
+++ b/resources/recipes/common_dreams.recipe
@ -2,17 +2,37 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class CommonDreams(BasicNewsRecipe):
+    # Identify the recipe
+    
    title          = u'Common Dreams'
    description    = u'Progressive news and views'
    __author__     = u'XanthanGum'
    language = 'en'
    
+    # Format the text
+    
+    extra_css = '''
+                 body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
+                 h1{font-size: xx-large;}
+                 h2{font-size: large;}
+                '''
+
+    # Pick no article older than seven days and limit the number of articles per feed to 100
+    
    oldest_article = 7
    max_articles_per_feed = 100
    
-    feeds          = [
-                       (u'Common Dreams Headlines', 
-                       u'http://www.commondreams.org/feed/headlines_rss'), 
-                       (u'Common Dreams Views', u'http://www.commondreams.org/feed/views_rss'), 
-                       (u'Common Dreams Newswire', u'http://www.commondreams.org/feed/newswire_rss')
-                       ]
+    # Remove everything before the article
+    
+    remove_tags_before = dict(name = 'div', attrs = {'id':'node-header'})
+    
+    # Remove everything after the article
+    
+    remove_tags_after = dict(name = 'div', attrs = {'class':'copyright-info'})
+    
+    # Identify the news feeds
+    
+    feeds = [(u'Headlines', u'http://www.commondreams.org/feed/headlines_rss'),
+             (u'Further News Articles', u'http://www.commondreams.org/feed/further_rss'), 
+             (u'Views', u'http://www.commondreams.org/feed/views_rss'), 
+             (u'Progressive Newswire', u'http://www.commondreams.org/feed/newswire_rss')]
--- a/resources/recipes/digitaljournal.recipe
+++ b/resources/recipes/digitaljournal.recipe
@ -0,0 +1,52 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+digitaljournal.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DigitalJournal(BasicNewsRecipe):
+    title                 = 'Digital Journal'
+    __author__            = 'Darko Miletic'
+    description           = 'A Global Citizen Journalism News Network'
+    category              = 'news, politics, USA, world'
+    publisher             = 'Digital Journal'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf8'
+    language              = 'en'
+      
+    conversion_options = {  
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                         }
+
+    keep_only_tags   = [dict(name='div', attrs={'class':['article','body']})]
+
+    remove_tags     = [dict(name=['object','table'])]
+                         
+    feeds = [ 
+                (u'Latest News'  , u'http://digitaljournal.com/rss/?feed=latest_news'                   )
+               ,(u'Business'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Business'     )
+               ,(u'Entertainment', u'http://digitaljournal.com/rss/?feed=top_news&depname=Entertainment')
+               ,(u'Environment'  , u'http://digitaljournal.com/rss/?feed=top_news&depname=Environment'  )
+               ,(u'Food'         , u'http://digitaljournal.com/rss/?feed=top_news&depname=Food'         )
+               ,(u'Health'       , u'http://digitaljournal.com/rss/?feed=top_news&depname=Health'       )
+               ,(u'Internet'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Internet'     )
+               ,(u'Politics'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Politics'     )
+               ,(u'Religion'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Religion'     )
+               ,(u'Science'      , u'http://digitaljournal.com/rss/?feed=top_news&depname=Science'      )
+               ,(u'Sports'       , u'http://digitaljournal.com/rss/?feed=top_news&depname=Sports'       )
+               ,(u'Technology'   , u'http://digitaljournal.com/rss/?feed=top_news&depname=Technology'   )
+               ,(u'World'        , u'http://digitaljournal.com/rss/?feed=top_news&depname=World'        )
+               ,(u'Arts'         , u'http://digitaljournal.com/rss/?feed=top_news&depname=Arts'         )
+            ]
+          
+    def print_version(self, url):
+        return url.replace('digitaljournal.com/','digitaljournal.com/print/')
+        
--- a/resources/recipes/discover_magazine.recipe
+++ b/resources/recipes/discover_magazine.recipe
@ -4,19 +4,31 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'

 '''
-doscovermagazine.com
+discovermagazine.com
 '''

 from calibre.web.feeds.news import BasicNewsRecipe

 class DiscoverMagazine(BasicNewsRecipe):
+
    title = u'Discover Magazine'
    description = u'Science, Technology and the Future' 
    __author__ = 'Mike Diaz' 
-    oldest_article = 33
    language = 'en'

+    oldest_article = 33
    max_articles_per_feed = 20
+    no_stylesheets = True
+    remove_javascript = True
+    use_embedded_content  = False
+    encoding = 'utf-8'
+    extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
+    
+    remove_tags = [dict(name='div', attrs={'id':['searchModule', 'mainMenu', 'tool-box']}),
+                   dict(name='img', attrs={'src':'http://discovermagazine.com/onebyone.gif'})]
+
+    remove_tags_after = [dict(name='div', attrs={'class':'articlebody'})]
+ 
    feeds = [
             (u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'), 
             (u'Health - Medicine', u'http://discovermagazine.com/topics/health-medicine/rss.xml'), 
--- a/resources/recipes/economist_free.recipe
+++ b/resources/recipes/economist_free.recipe
@ -53,6 +53,8 @@ class Economist(BasicNewsRecipe):
            self.feed_dict.items()])

    def eco_sort_sections(self, feeds):
+        if not feeds:
+            raise ValueError('No new articles found')
        order = {
            'The World This Week': 1,
            'Leaders': 2,
--- a/resources/recipes/editor_and_publisher.recipe
+++ b/resources/recipes/editor_and_publisher.recipe
@ -0,0 +1,34 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+class EandP(BasicNewsRecipe):
+    title              = u'Editor and Publisher'
+    __author__         = u'Xanthan Gum'
+    description        = 'News about newspapers and journalism.'
+    language = 'en'
+    no_stylesheets     = True
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    # Font formatting code borrowed from kwetal
+
+    extra_css = '''
+                 body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
+                 h1{font-size: xx-large;}
+                 h2{font-size: large;}
+                '''
+
+    # Delete everything before the article
+
+    remove_tags_before = dict(name='font', attrs={'class':'titlebar_black'})
+
+    # Delete everything after the article
+
+    preprocess_regexps = [(re.compile(r'<!--endclickprintinclude-->.*</body>', re.DOTALL|re.IGNORECASE),
+                           lambda match: '</body>'),]
+
+    feeds = [(u'Breaking News', u'http://feeds.feedburner.com/EditorAndPublisher-BreakingNews'),
+             (u'Business News', u'http://feeds.feedburner.com/EditorAndPublisher-BusinessNews'),
+             (u'Newsroom', u'http://feeds.feedburner.com/EditorAndPublisher-Newsroom'),
+             (u'Technology News', u'http://feeds.feedburner.com/EditorAndPublisher-Technology'),
+             (u'Syndicates News', u'http://feeds.feedburner.com/EditorAndPublisher-Syndicates')]
--- a/resources/recipes/edmonton_journal.recipe
+++ b/resources/recipes/edmonton_journal.recipe
@ -0,0 +1,126 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Edmonton Journal
+    title = u'Edmonton Journal'
+    url_prefix = 'http://www.edmontonjournal.com'
+    description = u'News from Edmonton, AB'
+
+    # un-comment the following three lines for the Calgary Herald
+    #title = u'Calgary Herald'
+    #url_prefix = 'http://www.calgaryherald.com'
+    #description = u'News from Calgary, AB'
+
+    # un-comment the following three lines for the Regina Leader-Post
+    #title = u'Regina Leader-Post'
+    #url_prefix = 'http://www.leaderpost.com'
+    #description = u'News from Regina, SK'
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    #title = u'Saskatoon Star-Phoenix'
+    #url_prefix = 'http://www.thestarphoenix.com'
+    #description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
--- a/resources/recipes/fr_online.recipe
+++ b/resources/recipes/fr_online.recipe
@ -0,0 +1,67 @@
+__license__   = 'GPL v3'
+__copyright__ = '2009, Justus Bisser <justus.bisser at gmail.com>'
+'''
+fr-online.de
+'''
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Spiegel_ger(BasicNewsRecipe):
+    title                 = 'Frankfurter Rundschau'
+    __author__            = 'Justus Bisser'
+    description           = "Dies ist die Online-Ausgabe der Frankfurter Rundschau. Um die abgerufenen individuell einzustellen bearbeiten sie die Liste im erweiterten Modus. Die Feeds findet man auf http://www.fr-online.de/verlagsservice/fr_newsreader/?em_cnt=574255"
+    publisher             = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
+    category              = 'FR Online, Frankfurter Rundschau, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    language              = 'de'
+    lang                  = 'de-DE'
+    no_stylesheets        = True
+    use_embedded_content  = False
+    #encoding              = 'cp1252'
+
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        }
+
+    recursions = 0
+    max_articles_per_feed = 100
+    #keep_only_tags = [dict(name='div', attrs={'class':'text'})]
+    #tags_remove = [dict(name='div', attrs={'style':'text-align: left; margin: 4px 0px 0px 4px; width: 200px; float: right;'})]
+    remove_attributes = ['style']
+    feeds = []
+    #remove_tags_before = [dict(name='div', attrs={'style':'padding-left: 0px;'})]
+    #remove_tags_after = [dict(name='div', attrs={'class':'box_head_text'})]
+
+    # enable for all news
+    allNews = 0
+    if allNews:
+        feeds = [(u'Frankfurter Rundschau', u'http://www.fr-online.de/rss/sport/index.xml')]
+    else:
+        #select the feeds you like
+        feeds = [(u'Nachrichten', u'http://www.fr-online.de/rss/politik/index.xml')]
+        feeds.append((u'Kommentare und Analysen', u'http://www.fr-online.de/rss/meinung/index.xml'))
+        feeds.append((u'Dokumentationen', u'http://www.fr-online.de/rss/dokumentation/index.xml'))
+        feeds.append((u'Deutschlandtrend', u'http://www.fr-online.de/rss/deutschlandtrend/index.xml'))
+        feeds.append((u'Wirtschaft', u'http://www.fr-online.de/rss/wirtschaft/index.xml'))
+        feeds.append((u'Sport', u'http://www.fr-online.de/rss/sport/index.xml'))
+        feeds.append((u'Feuilleton', u'http://www.fr-online.de/rss/feuilleton/index.xml'))
+        feeds.append((u'Panorama', u'http://www.fr-online.de/rss/panorama/index.xml'))
+        feeds.append((u'Rhein Main und Hessen', u'http://www.fr-online.de/rss/hessen/index.xml'))
+        feeds.append((u'Fitness und Gesundheit', u'http://www.fr-online.de/rss/fit/index.xml'))
+        feeds.append((u'Multimedia', u'http://www.fr-online.de/rss/multimedia/index.xml'))
+        feeds.append((u'Wissen und Bildung', u'http://www.fr-online.de/rss/wissen/index.xml'))
+
+    def get_article_url(self, article):
+        url = article.link
+        regex = re.compile("0C[0-9]{6,8}0A?")
+
+        liste = regex.findall(url)
+        string = liste.pop(0)
+        string = string[2:len(string)-1]
+        return "http://www.fr-online.de/_em_cms/_globals/print.php?em_cnt=" + string
+
--- a/resources/recipes/ftd.recipe
+++ b/resources/recipes/ftd.recipe
@ -15,7 +15,7 @@ class FTDe(BasicNewsRecipe):
    __author__ = 'Oliver Niesner'
    use_embedded_content   = False
    timefmt = ' [%d %b %Y]'
-    language = _('German')
+    language = 'de'
    max_articles_per_feed = 40
    no_stylesheets = True

@ -23,6 +23,7 @@ class FTDe(BasicNewsRecipe):
 		   dict(id='topbanner'),
 		   dict(id='seitenkopf'),
 		   dict(id='BoxA-0-0-0'),
+		   #dict(id='BoxA-2-0-0'),
 		   dict(id='footer'),
 		   dict(id='rating_open'),
 		   dict(id='ADS_Top'),
@ -59,6 +60,7 @@ class FTDe(BasicNewsRecipe):
 		   dict(name='div', attrs={'class':'relatedhalb'}),
 		   dict(name='div', attrs={'class':'box boxListScrollOutline'}),
 		   dict(name='div', attrs={'class':'box boxPhotoshow boxImgWide'}),
+		   dict(name='div', attrs={'class':'box boxTeaser boxPhotoshow boxImgWide'}),
 		   dict(name='div', attrs={'class':'box boxTeaser'}),
 		   dict(name='div', attrs={'class':'tagCloud'}),
 		   dict(name='div', attrs={'class':'pollView'}),
--- a/resources/recipes/globe_and_mail.recipe
+++ b/resources/recipes/globe_and_mail.recipe
@ -32,7 +32,7 @@ class GlobeAndMail(BasicNewsRecipe):
 		'gallery-controls', 'video', 'galleryLoading','deck','header',
        'toolsBottom'] },
 		{'class':['credit','inline-img-caption','tab-pointer'] },
-		dict(name='div', attrs={'id':'lead-photo'}),
+		dict(name='div', attrs={'id':['lead-photo', 'most-popular-story']}),
 		dict(name='div', attrs={'class':'right'}),
 		dict(name='div', attrs={'id':'footer'}),
 		dict(name='div', attrs={'id':'beta-msg'}),
@ -44,8 +44,9 @@ class GlobeAndMail(BasicNewsRecipe):
 		dict(name='div', attrs={'id':'blog-header'}),
 		dict(name='div', attrs={'id':'right-rail'}),
 		dict(name='div', attrs={'id':'group-footer-container'}),
-		dict(name=['iframe'])
+		dict(name=['iframe', 'style'])
 		]
+    remove_attributes = ['style']
    remove_tags_after = [{'id':['article-content']},
 		{'class':['pull','inline-img'] },
 		dict(name='img', attrs={'class':'inline-media-embed'}),
--- a/resources/recipes/greensboro_news_and_record.recipe
+++ b/resources/recipes/greensboro_news_and_record.recipe
@ -0,0 +1,54 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Walt Anthony <workshop.northpole at gmail.com>'
+'''
+www.news-record.com
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NewsandRecord(BasicNewsRecipe):
+    title          = u'Greensboro News & Record'
+    description    = "News from Greensboro, North Carolina"
+    __author__     = 'Walt Anthony'
+    publisher             = 'News & Record and Landmark Media Enterprises, LLC'
+    category              = 'news, USA'
+    oldest_article        = 3 #days
+    max_articles_per_feed = 25
+    summary_length        = 150
+    language              = 'en'
+    encoding              = 'utf-8'
+    remove_javascript     = True
+    no_stylesheets        = True
+
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+
+
+    remove_tags_before = dict(name='h3', attrs={'class':'nrcTxt_headline'})
+    remove_tags_after  = dict(name='div', attrs={'id':'nrcBlk_ContentBody'})
+
+    remove_tags = [
+       dict(name='iframe'),
+       dict(name=['notags','embed','object','link','img']),
+
+    ]
+
+
+    feeds = [
+        ('News', 'http://www.news-record.com/news/archive/feed'),
+        ('Greensboro News', 'http://www.news-record.com/news/greensboro/feed'),
+        ('Education', 'http://www.news-record.com/news/education/feed'),
+        ('Government', 'http://www.news-record.com/news/government/feed'),
+        ('College Sports', 'http://www.news-record.com/sports/college/feed'),
+        ('Sports Extra', 'http://www.news-record.com/blog/sportsextra/feed'),
+        ('Life', 'http://www.news-record.com/life/top/feed'),
+        ('NASCAR', 'http://www.news-record.com/sports/nascar/top/feed'),
+        ('Editorials', 'http://www.news-record.com/opinion/editorials/feed'),
+        ('Letters to the Editor', 'http://www.news-record.com/opinion/letters/feed')
+    ]
+
--- a/resources/recipes/hbr_blogs.recipe
+++ b/resources/recipes/hbr_blogs.recipe
@ -0,0 +1,197 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+# Needed for BLOGs
+from calibre.web.feeds import Feed
+
+class HBR(BasicNewsRecipe):
+
+    title = 'Harvard Business Review Blogs'
+    description = 'To subscribe go to http://hbr.harvardbusiness.org'
+    needs_subscription = True
+    __author__ = 'Kovid Goyal and Sujata Raman, enhanced by BrianG'
+    language = 'en'
+    no_stylesheets = True
+
+    LOGIN_URL = 'http://hbr.org/login?request_url=/'
+    INDEX = 'http://hbr.org/current'
+
+    #
+    # Blog Stuff
+    #
+
+
+    INCLUDE_BLOGS = True
+    INCLUDE_ARTICLES = False
+
+    # option-specific settings.
+
+    if INCLUDE_BLOGS == True:
+        remove_tags_after = dict(id='articleBody')
+        remove_tags_before = dict(id='pageFeature')
+        feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
+        oldest_article = 30
+        max_articles_per_feed = 100
+    else:
+        timefmt                = ' [%B %Y]'
+
+
+    keep_only_tags = [	dict(name='div', id='pageContainer')
+				]
+
+    remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
+        'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
+        'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
+        'articleToolbarTop','articleToolbarBottom', 'articleToolbarRD',
+        'mailingListTout', 'partnerCenter', 'pageFooter']),
+        dict(name='iframe')]
+
+    extra_css = '''
+                a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
+                .article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
+                h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
+                h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small;  }
+                #articleBody{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
+                #summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
+                '''
+#-------------------------------------------------------------------------------------------------
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        br.open(self.LOGIN_URL)
+        br.select_form(name='signInForm')
+        br['signInForm:username'] = self.username
+        br['signInForm:password'] = self.password
+        raw = br.submit().read()
+        if 'My Account' not in raw:
+            raise Exception('Failed to login, are you sure your username and password are correct?')
+        self.logout_url = None
+        link = br.find_link(text='Sign out')
+        if link:
+            self.logout_url = link.absolute_url
+        return br
+#-------------------------------------------------------------------------------------------------
+    def cleanup(self):
+        if self.logout_url is not None:
+            self.browser.open(self.logout_url)
+#-------------------------------------------------------------------------------------------------
+    def map_url(self, url):
+        if url.endswith('/ar/1'):
+            return url[:-1]+'pr'
+#-------------------------------------------------------------------------------------------------
+
+    def hbr_get_toc(self):
+        soup = self.index_to_soup(self.INDEX)
+        url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href')
+        return self.index_to_soup('http://hbr.org'+url)
+
+#-------------------------------------------------------------------------------------------------
+
+    def hbr_parse_section(self, container, feeds):
+        current_section = None
+        current_articles = []
+        for x in container.findAll(name=['li', 'h3', 'h4']):
+            if x.name in ['h3', 'h4'] and not x.findAll(True):
+                if current_section and current_articles:
+                    feeds.append((current_section, current_articles))
+                current_section = self.tag_to_string(x)
+                current_articles = []
+                self.log('\tFound section:', current_section)
+            if x.name == 'li':
+                a = x.find('a', href=True)
+                if a is not None:
+                    title = self.tag_to_string(a)
+                    url = a.get('href')
+                    if '/ar/' not in url:
+                        continue
+                    if url.startswith('/'):
+                        url = 'http://hbr.org'+url
+                    url = self.map_url(url)
+                    p = x.find('p')
+                    desc = ''
+                    if p is not None:
+                        desc = self.tag_to_string(p)
+                    if not title or not url:
+                        continue
+                    self.log('\t\tFound article:', title)
+                    self.log('\t\t\t', url)
+                    self.log('\t\t\t', desc)
+                    current_articles.append({'title':title, 'url':url,
+                        'description':desc, 'date':''})
+        if current_section and current_articles:
+            feeds.append((current_section, current_articles))
+
+#-------------------------------------------------------------------------------------------------
+
+    def hbr_parse_toc(self, soup):
+        feeds = []
+        features = soup.find(id='issueFeaturesContent')
+        self.hbr_parse_section(features, feeds)
+        departments = soup.find(id='issueDepartments')
+        self.hbr_parse_section(departments, feeds)
+        return feeds
+#-------------------------------------------------------------------------------------------------
+    def feed_to_index_append(self, feedObject, masterFeed):
+        # Loop thru the feed object and build the correct type of article list
+        for feed in feedObject:
+        # build the correct structure from the feed object
+            newArticles = []
+            for article in feed.articles:
+                newArt = {
+                'title' : article.title,
+                'url'   : article.url,
+                'date'  : article.date,
+                'description' : article.text_summary
+                }
+                newArticles.append(newArt)
+
+		# Append the earliest/latest dates of the feed to the feed title
+		startDate, endDate = self.get_feed_dates(feed, '%d-%b')
+		newFeedTitle = feed.title + '  (' + startDate + ' thru ' + endDate + ')'
+
+		# append the newly-built list object to the index object passed in
+		# as masterFeed.
+		masterFeed.append( (newFeedTitle,newArticles) )
+
+#-------------------------------------------------------------------------------------------------
+    def get_feed_dates(self, feedObject, dateMask):
+        startDate = feedObject.articles[len(feedObject.articles)-1].localtime.strftime(dateMask)
+        endDate   = feedObject.articles[0].localtime.strftime(dateMask)
+
+        return startDate, endDate
+
+#-------------------------------------------------------------------------------------------------
+    def hbr_parse_blogs(self, feeds):
+        # Do the "official" parse_feeds first
+        rssFeeds = Feed()
+
+        # Use the PARSE_FEEDS method to get a Feeds object of the articles
+        rssFeeds = BasicNewsRecipe.parse_feeds(self)
+
+        # Create a new feed of the right configuration and append to existing afeeds
+        self.feed_to_index_append(rssFeeds[:], feeds)
+
+#-------------------------------------------------------------------------------------------------
+    def parse_index(self):
+        if self.INCLUDE_ARTICLES == True:
+            soup = self.hbr_get_toc()
+            feeds = self.hbr_parse_toc(soup)
+        else:
+            feeds = []
+
+        # blog stuff
+        if self.INCLUDE_BLOGS == True:
+            self.hbr_parse_blogs(feeds)
+
+        return feeds
+#-------------------------------------------------------------------------------------------------
+    def get_cover_url(self):
+        cover_url = None
+        index = 'http://hbr.org/current'
+        soup = self.index_to_soup(index)
+        link_item = soup.find('img', alt=re.compile("Current Issue"), src=True)
+
+        if link_item:
+           cover_url = 'http://hbr.org' + link_item['src']
+
+        return cover_url
--- a/resources/recipes/heraldo.recipe
+++ b/resources/recipes/heraldo.recipe
@ -0,0 +1,50 @@
+#!/usr/bin/env  python
+__license__    = 'GPL v3'
+__author__     = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__description__ = 'Daily newspaper from Aragon'
+__version__     = 'v1.01'
+__date__        = '30, January 2010'
+
+'''
+http://www.heraldo.es/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class heraldo(BasicNewsRecipe):
+    author        = 'Lorenzo Vigentini'
+    description   = 'Daily newspaper from Aragon'
+
+    cover_url      = 'http://www.heraldo.es/MODULOS/global/publico/interfaces/img/logo.gif'
+    title          = u'Heraldo de Aragon'
+    publisher      = 'OJD Nielsen'
+    category       = 'News, politics, culture, economy, general interest'
+
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 1
+    max_articles_per_feed = 25
+
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript = True
+    no_stylesheets = True
+
+    keep_only_tags = [
+                        dict(name='div', attrs={'class':['titularNoticiaNN','textoGrisVerdanaContenidos']})
+                    ]
+
+    feeds          = [
+                        (u'Portadas ', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss')
+                    ]
+    extra_css = '''
+                    .articledate {color: gray;font-family: monospace;}
+                    .articledescription {display: block;font-family: sans;font-size: 0.7em; text-indent: 0;}
+                    .firma {color: #666;display: block;font-family: verdana, arial, helvetica;font-size: 1em;margin-bottom: 8px;}
+                    .textoGrisVerdanaContenidos {color: #56595c;display: block;font-family: Verdana;font-size: 1.28571em;padding-bottom: 10px}
+                    .titularNoticiaNN {display: block;padding-bottom: 10px;padding-left: 0;padding-right: 0;padding-top: 4px}
+                    .titulo {color: #003066;font-family: Tahoma;font-size: 1.92857em;font-weight: bold;line-height: 1.2em}
+                    '''
--- a/resources/recipes/hotair.recipe
+++ b/resources/recipes/hotair.recipe
@ -0,0 +1,41 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Walt Anthony <workshop.northpole at gmail.com>'
+'''
+www.hotair.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class hotair(BasicNewsRecipe):
+    title          = u'Hot Air'
+    __author__            = 'Walt Anthony'
+    description           = "The world's first, full-service conservative Internet broadcast network"
+    publisher             = 'Hot Air'
+    category              = 'news, politics, USA'
+    oldest_article = 3
+    max_articles_per_feed = 100
+    summary_length = 150
+    language              = 'en'
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    remove_javascript = True
+
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+
+
+
+    keep_only_tags = [dict(name='div', attrs={'id':'page-post'})]
+
+    remove_tags  = [dict(name=['iframe', 'small', 'embed', 'object','link','script','form'])]
+
+    feeds = [
+        ('Hot Air', 'http://feeds.feedburner.com/hotair/main'),
+        ('The Greenroom', 'http://feeds2.feedburner.com/hotair/greenroom')
+    ]
--- a/resources/recipes/ilsole24ore.recipe
+++ b/resources/recipes/ilsole24ore.recipe
@ -0,0 +1,67 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__author__    = 'Lorenzo Vigentini & Edwin van Maastrigt'
+__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com> and Edwin van Maastrigt <evanmaastrigt at gmail.com>'
+__description__ = 'Financial news daily paper - v1.02 (30, January 2010)'
+
+'''
+http://www.ilsole24ore.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class ilsole(BasicNewsRecipe):
+    author        = 'Lorenzo Vigentini & Edwin van Maastrigt'
+    description   = 'Financial news daily paper'
+
+    cover_url      = 'http://www.ilsole24ore.com/img2009/header/t_logosole.gif'
+    title          = u'il Sole 24 Ore '
+    publisher      = 'italiaNews'
+    category       = 'News, finance, economy, politics'
+
+    language       = 'it'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 2
+    max_articles_per_feed = 50
+    use_embedded_content  = False
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+    def get_article_url(self, article):
+        return article.get('id', article.get('guid', None))
+
+    def print_version(self, url):
+        link, sep, params = url.rpartition('?')
+        return link.replace('.shtml', '_PRN.shtml')
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'class':'txt'})
+                        ]
+    remove_tags = [dict(name='br')]
+
+    feeds          = [
+                       (u'Prima pagina', u'http://www.ilsole24ore.com/rss/primapagina.xml'),
+                       (u'Norme e tributi', u'http://www.ilsole24ore.com/rss/norme-tributi.xml'),
+                       (u'Finanza e mercati', u'http://www.ilsole24ore.com/rss/finanza-mercati.xml'),
+                       (u'Economia e lavoro', u'http://www.ilsole24ore.com/rss/economia-lavoro.xml'),
+                       (u'Italia', u'http://www.ilsole24ore.com/rss/italia.xml'),
+                       (u'Mondo', u'http://www.ilsole24ore.com/rss/mondo.xml'),
+                       (u'Tecnologia e business', u'http://www.ilsole24ore.com/rss/tecnologia-business.xml'),
+                       (u'Cultura e tempo libero', u'http://www.ilsole24ore.com/rss/tempolibero-cultura.xml'),
+                       (u'Sport', u'http://www.ilsole24ore.com/rss/sport.xml'),
+                       (u'Professionisti 24', u'http://www.ilsole24ore.com/rss/prof_home.xml')
+                     ]
+
+    extra_css = '''
+                html, body, table, tr, td, h1, h2, h3, h4, h5, h6, p, a, span, br, img {margin:0;padding:0;border:0;font-size:12px;font-family:Arial;}
+                .linkHighlight {color:#0292c6;}
+                .txt {border-bottom:1px solid #7c7c7c;padding-bottom:20px;text-align:justify;}
+                .txt p {line-height:18px;}
+                .txt span {line-height:22px;}
+                .title h3 {color:#7b7b7b;}
+                .title h4 {color:#08526e;font-size:26px;font-family:"Times New Roman";font-weight:normal;}
+                '''
+
--- a/resources/recipes/information_dk.recipe
+++ b/resources/recipes/information_dk.recipe
@ -0,0 +1,50 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+information.dk
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Information_dk(BasicNewsRecipe):
+    title                 = 'Information - Denmark'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Denmark'
+    publisher             = 'information.dk'
+    category              = 'news, politics, Denmark'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    remove_empty_feeds    = True
+    use_embedded_content  = False
+    encoding              = 'utf8'
+    language              = 'da'
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    feeds              = [
+                            (u'Nyheder fra'               , u'http://www.information.dk/feed')
+                           ,(u'Bedst lige nu'             , u'http://www.information.dk/bedstligenu/feed')
+                           ,(u'Politik og internationalt' , u'http://www.information.dk/politik/feed')
+                           ,(u'Kunst og kultur'           , u'http://www.information.dk/kultur/feed')
+                           ,(u'Moderne Tider'             , u'http://www.information.dk/modernetider/feed')
+                           ,(u'Klima'                     , u'http://www.information.dk/klima/feed')
+                           ,(u'Opinion'                   , u'http://www.information.dk/opinion/feed')
+                           ,(u'Literatur'                 , u'http://www.information.dk/litteratur/feed')
+                           ,(u'Film'                      , u'http://www.information.dk/film/feed')
+                           ,(u'Kunst'                     , u'http://www.information.dk/kunst/feed')
+                          ]
+
+    remove_tags_before = dict(name='h1',attrs={'class':'print-title'})
+    remove_tags_after  = dict(name='div',attrs={'class':'print-footer'})
+    remove_tags        = [dict(name=['object','link'])]
+
+    def print_version(self, url):
+        return url.replace('information.dk/','information.dk/print/')
+
--- a/resources/recipes/ionline_pt.recipe
+++ b/resources/recipes/ionline_pt.recipe
@ -0,0 +1,58 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.ionline.pt
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class IOnline_pt(BasicNewsRecipe):
+    title                 = 'ionline - Portugal'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Portugal'
+    publisher             = 'ionline.pt'
+    category              = 'ionline, noticias, portugal, jornal, actualidade, benfica, bolsa, desporto, empresas, globo, europa, futebol, internacional, investir, lisboa, jogos, musica, videos, tempo, meteorologia, pais, politica, porto, sporting, fcporto, televisao, tv, opiniao, nacional, sociedade, crise, financeira, policia, crime, artes, cinema, cultura, madeleine, blog, ciencia, tecnologia, galerias, fotografia, fotos, famosos, emprego, imagens, teatro, news, mundial, governo, ps, psd, be, pcp, cds, pp, partidos'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    language              = 'pt'
+    extra_css             = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [
+                        dict(name=['h5','h1'])
+                      , dict(name='div', attrs={'class':['publish','overview','entity']})
+                     ]
+
+    remove_tags = [
+                    dict(name=['object','embed','iframe'])
+                  ]
+
+
+    feeds = [
+              (u'Portugal' , u'http://www.ionline.pt/rss/portugal.xml' )
+             ,(u'Mundo'    , u'http://www.ionline.pt/rss/mundo.xml'    )
+             ,(u'Dinheiro' , u'http://www.ionline.pt/rss/dinheiro.xml' )
+             ,(u'Desporto' , u'http://www.ionline.pt/rss/desporto.xml' )
+             ,(u'Boa Vida' , u'http://www.ionline.pt/rss/boavida.xml'  )
+             ,(u'iReporter', u'http://www.ionline.pt/rss/ireporter.xml')
+             ,(u'iBloges'  , u'http://www.ionline.pt/rss/iblogues.xml' )
+             ,(u'Desporto' , u'http://www.ionline.pt/rss/desporto.xml' )
+            ]
+
+    def print_version(self, url):
+        rest  = url.rpartition('/')[2]
+        lmain = rest.partition('-')[0]
+        lurl = u'http://www.ionline.pt/interior/index.php?p=news-print&idNota=' + lmain
+        return lurl
+
+
--- a/resources/recipes/irish_times.recipe
+++ b/resources/recipes/irish_times.recipe
@ -11,7 +11,7 @@ class IrishTimes(BasicNewsRecipe):
    title          = u'The Irish Times'
    __author__     = "Derry FitzGerald, Ray Kinsella and David O'Callaghan"
    language = 'en'
-    timefmt = ' (%A, %B %e, %Y)'
+    timefmt = ' (%A, %B %d, %Y)'


    oldest_article = 3
--- a/resources/recipes/jp_dk.recipe
+++ b/resources/recipes/jp_dk.recipe
@ -0,0 +1,50 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+jp.dk
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class JP_dk(BasicNewsRecipe):
+    title                 = 'Jyllands-Posten'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Denmark'
+    publisher             = 'jp.dk'
+    category              = 'news, politics, Denmark'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'cp1252'
+    language              = 'da'
+
+    extra_css = ' body{font-family: Arial,Verdana,Helvetica,Geneva,sans-serif } h1{font-family: Times,Georgia,Verdana,serif } '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    feeds              = [
+                            (u'Tophistorier', u'http://www.jp.dk/rss/topnyheder.jsp')
+                           ,(u'Seneste nyt' , u'http://jp.dk/index.jsp?service=rssfeed&submode=seneste')
+                           ,(u'Indland'     , u'http://www.jp.dk/rss/indland.jsp')
+                           ,(u'Udland'      , u'http://www.jp.dk/rss/udland.jsp')
+                           ,(u'Ny viden'    , u'http://www.jp.dk/rss/nyviden.jsp')
+                           ,(u'Timeout'     , u'http://www.jp.dk/rss/timeout.jsp')
+                           ,(u'Kultur'      , u'http://www.jp.dk/rss/kultur.jsp')
+                           ,(u'Sport'       , u'http://www.jp.dk/rss/sport.jsp')
+                          ]
+
+    remove_tags        = [
+                            dict(name=['object','link'])
+                           ,dict(name='p',attrs={'class':'artByline'})
+                         ]
+
+    def print_version(self, url):
+        return url + '?service=printversion'
+
--- a/resources/recipes/kamerabild.recipe
+++ b/resources/recipes/kamerabild.recipe
@ -0,0 +1,46 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.kamerabild.se
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Kamerabild(BasicNewsRecipe):
+    title                 = 'Kamera & Bild'
+    __author__            = 'Darko Miletic'
+    description           = 'Photo News from Sweden'
+    publisher             = 'politiken.dk'
+    category              = 'news, photograph, Sweden'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    remove_empty_feeds    = True
+    use_embedded_content  = False
+    encoding              = 'utf8'
+    language              = 'sv'
+
+    extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } .title{font-weight: bold} .pricerunnerAdContainer{border-bottom: 1px solid; border-top: 1px solid; margin-top: 0.5em; margin-bottom: 0.5em} .elementTeaserKicker{font-weight: bold; color: #AE0A10} '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    feeds              = [(u'Articles', u'http://www.kamerabild.se/cmlink/Nyheter-fran-KAMERA-BILD-1.43315.xml')]
+    keep_only_tags     = [dict(name='div',attrs={'class':'container'})]
+    remove_tags_after = dict(name='div',attrs={'class':'editor'})
+    remove_tags        = [
+                            dict(name=['object','link','iframe'])
+                           ,dict(name='div',attrs={'class':['pricerunner_head','sideBar','img']})
+                         ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return self.adeify_images(soup)
+
+
--- a/resources/recipes/kidney.recipe
+++ b/resources/recipes/kidney.recipe
@ -1,4 +1,7 @@
 # -*- coding: utf-8 -*-
+
+import time
+
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup

@ -8,6 +11,7 @@ class JASN(BasicNewsRecipe):
    __author__     = 'Krittika Goyal'
    oldest_article = 31 #days
    max_articles_per_feed = 25
+    delay = 5
    needs_subscription = True

    INDEX = 'http://jasn.asnjournals.org/current.shtml'
@ -15,13 +19,13 @@ class JASN(BasicNewsRecipe):
    remove_tags_before = dict(name='h2')
    #remove_tags_after  = dict(name='th', attrs={'align':'left'})
    remove_tags = [
-       dict(name='iframe'),
+        dict(name='iframe'),
       #dict(name='div', attrs={'class':'related-articles'}),
-       dict(name='td', attrs={'id':['jasnFooter']}),
-       dict(name='table', attrs={'id':"jasnNavBar"}),
-       dict(name='table', attrs={'class':'content_box_outer_table'}),
-       dict(name='th', attrs={'align':'left'})
-    ]
+        dict(name='td', attrs={'id':['jasnFooter']}),
+        dict(name='table', attrs={'id':"jasnNavBar"}),
+        dict(name='table', attrs={'class':'content_box_outer_table'}),
+        dict(name='th', attrs={'align':'left'})
+       ]



@ -45,10 +49,52 @@ class JASN(BasicNewsRecipe):
            raise ValueError('Failed to log in, is your account expired?')
        return br

-    feeds          = [
-        ('JASN',
-        'http://jasn.asnjournals.org/rss/current.xml'),
-    ]
+    #feeds          = [
+        #('JASN',
+        #'http://jasn.asnjournals.org/rss/current.xml'),
+    #]
+
+
+    #TO GET ARTICLE TOC
+    def jasn_get_index(self):
+        return self.index_to_soup('http://jasn.asnjournals.org/current.shtml')
+
+    # To parse artice toc
+    def parse_index(self):
+            parse_soup = self.jasn_get_index()
+
+            div = parse_soup.find(id='tocBody')
+
+            current_section = None
+            current_articles = []
+            feeds = []
+            for x in div.findAll(True):
+                if x.name == 'h2':
+                    # Section heading found
+                    if current_articles and current_section:
+                        feeds.append((current_section, current_articles))
+                    current_section = self.tag_to_string(x)
+                    current_articles = []
+                    self.log('\tFound section:', current_section)
+                if current_section is not None and x.name == 'strong':
+                    title = self.tag_to_string(x)
+                    a = x.parent.parent.find('a', href=lambda x: x and '/full/' in x)
+                    if a is None:
+                        continue
+                    url = a.get('href', False)
+                    if not url or not title:
+                        continue
+                    if url.startswith('/'):
+                        url = 'http://jasn.asnjournals.org'+url
+                    self.log('\t\tFound article:', title)
+                    self.log('\t\t\t', url)
+                    current_articles.append({'title': title, 'url':url,
+                        'description':'', 'date':''})
+
+            if current_articles and current_section:
+                feeds.append((current_section, current_articles))
+
+            return feeds



@ -59,10 +105,18 @@ class JASN(BasicNewsRecipe):
            if not url:
                continue
            if url.startswith('/'):
-                url = 'http://jasn.asnjournals.org/'+url
-                isoup = self.index_to_soup(url)
-                img = isoup.find('img', src=lambda x: x and
-                x.startswith('/content/'))
+                url = 'http://jasn.asnjournals.org'+url
+                img = isoup = None
+                try:
+                    isoup = self.index_to_soup(url)
+                except:
+                    time.sleep(5)
+                    try:
+                        isoup = self.index_to_soup(url)
+                    except:
+                        continue
+                img = isoup.find('img', src=lambda x: x and x.startswith('/content/'))
+
            if img is not None:
                img.extract()
                table = a.findParent('table')
@ -71,3 +125,4 @@ class JASN(BasicNewsRecipe):



+
--- a/resources/recipes/lescienze.recipe
+++ b/resources/recipes/lescienze.recipe
@ -0,0 +1,89 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '10, January 2010'
+__description__ = 'Monthly Italian edition of Scientific American'
+
+'''
+http://lescienze.espresso.repubblica.it/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class leScienze(BasicNewsRecipe):
+    author        = 'Lorenzo Vigentini'
+    description   = 'Monthly Italian edition of Scientific American'
+
+    cover_url      = 'http://lescienze.espresso.repubblica.it/images/logo_lescienze.gif'
+    title          = 'le Scienze'
+    publisher      = 'Gruppo editoriale lEspresso'
+    category       = 'Science, general interest'
+
+    language       = 'it'
+    encoding       = 'cp1252'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 31
+    max_articles_per_feed = 20
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'class':'bigbox'})
+                        ]
+
+    remove_tags        = [
+                            dict(name='span',attrs={'class':'linkindice'}),
+                            dict(name='div',attrs={'class':'box-commenti'}),
+                            dict(name='div',attrs={'id':['rssdiv','blocco']})
+                         ]
+    remove_tags_after = [dict(name='div',attrs={'class':'box-commenti'})]
+
+    feeds          = [
+                       (u'Antropologia', u'http://data.kataweb.it/rss/scienze/antropologia'),
+                       (u'Archeologia', u'http://data.kataweb.it/rss/scienze/archeologia'),
+                       (u'Arte e Musica', u'http://data.kataweb.it/rss/scienze/arte_e_musica'),
+                       (u'Astrofisica', u'http://data.kataweb.it/rss/scienze/astrofisica'),
+                       (u'Astronautica', u'http://data.kataweb.it/rss/scienze/astronautica'),
+                       (u'Astronomia', u'http://data.kataweb.it/rss/scienze/astronomia_e_cosmologia'),
+                       (u'Biologia', u'http://data.kataweb.it/rss/scienze/biologia'),
+                       (u'Chimica', u'http://data.kataweb.it/rss/scienze/chimica'),
+                       (u'Ecologia & ambiente', u'http://data.kataweb.it/rss/scienze/ecologia_e_ambiente'),
+                       (u'Economia', u'http://data.kataweb.it/rss/scienze/Economia'),
+                       (u'Fisica', u'http://data.kataweb.it/rss/scienze/Fisica'),
+                       (u'Informatica', u'http://data.kataweb.it/rss/scienze/informatica_e_telecomunicazioni'),
+                       (u'Ingegneria', u'http://data.kataweb.it/rss/scienze/ingegneria_e_tecnologia'),
+                       (u'Matematica', u'http://data.kataweb.it/rss/scienze/Matematica'),
+                       (u'Medicina', u'http://data.kataweb.it/rss/scienze/Medicina'),
+                       (u'Paleontologia', u'http://data.kataweb.it/rss/scienze/Paleontologia'),
+                       (u'Recensioni', u'http://data.kataweb.it/rss/scienze/Recensioni'),
+                       (u'Psicologia', u'http://data.kataweb.it/rss/scienze/psicologie_e_scienze_cognitive'),
+                       (u'Scienze della Terra', u'http://data.kataweb.it/rss/scienze/scienze_della_terra'),
+                       (u'Scienze dello spazio', u'http://data.kataweb.it/rss/scienze/scienze_dello_spazio'),
+                       (u'Scienze naturali', u'http://data.kataweb.it/rss/scienze/scienze_naturali'),
+                       (u'Scienze sociali', u'http://data.kataweb.it/rss/scienze/scienze_sociali'),
+                       (u'Statistica', u'http://data.kataweb.it/rss/scienze/statistica'),
+                       (u'Storia della scienza', u'http://data.kataweb.it/rss/scienze/storia_della_scienza')
+                     ]
+
+    extra_css = '''
+                h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
+                h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+                h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+                h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+                .occhiello {color:#666666;display:block;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:13px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:15px;}
+                .titolo {font-weight:bold;}
+                .label {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;height:15px;line-height:15px;text-transform:uppercase;}
+                .firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;}
+                .testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
+                '''
+
+
+
--- a/resources/recipes/macleans.recipe
+++ b/resources/recipes/macleans.recipe
@ -0,0 +1,239 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+macleans.ca
+'''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+from datetime import timedelta, date
+
+class Macleans(BasicNewsRecipe):
+    title          = u'Macleans Magazine'
+    __author__     = 'Nick Redding'
+    language = 'en_CA'
+    description = ('Macleans Magazine')
+
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+
+    # customization notes: delete sections you are not interested in
+    # set oldest_article to the maximum number of days back from today to include articles
+    sectionlist = [
+                        ['http://www2.macleans.ca/','Front Page'],
+                        ['http://www2.macleans.ca/category/canada/','Canada'],
+                        ['http://www2.macleans.ca/category/world-from-the-magazine/','World'],
+                        ['http://www2.macleans.ca/category/business','Business'],
+                        ['http://www2.macleans.ca/category/arts-culture/','Culture'],
+                        ['http://www2.macleans.ca/category/opinion','Opinion'],
+                        ['http://www2.macleans.ca/category/health-from-the-magazine/','Health'],
+                        ['http://www2.macleans.ca/category/environment-from-the-magazine/','Environment'],
+                        ['http://www2.macleans.ca/category/education/','On Campus'],
+                        ['http://www2.macleans.ca/category/travel-from-the-magazine/','Travel']
+                    ]
+    oldest_article = 7
+
+    # formatting for print version of articles
+    extra_css   =   '''h2{font-family:Times,serif; font-size:large;}
+                        small {font-family:Times,serif; font-size:xx-small; list-style-type: none;}
+                    '''
+
+    # tag handling for print version of articles
+    keep_only_tags = [dict(id='tw-print')]
+    remove_tags =   [dict({'class':'postmetadata'})]
+
+
+    def preprocess_html(self,soup):
+        for img_tag in soup.findAll('img'):
+            parent_tag = img_tag.parent
+            if parent_tag.name == 'a':
+                new_tag = Tag(soup,'p')
+                new_tag.insert(0,img_tag)
+                parent_tag.replaceWith(new_tag)
+            elif parent_tag.name == 'p':
+                if not self.tag_to_string(parent_tag) == '':
+                    new_div = Tag(soup,'div')
+                    new_tag = Tag(soup,'p')
+                    new_tag.insert(0,img_tag)
+                    parent_tag.replaceWith(new_div)
+                    new_div.insert(0,new_tag)
+                    new_div.insert(1,parent_tag)
+        return soup
+
+    def parse_index(self):
+
+
+
+        articles = {}
+        key = None
+        ans = []
+
+        def parse_index_page(page_url,page_title):
+
+            def decode_date(datestr):
+                dmysplit = datestr.strip().lower().split(',')
+                mdsplit = dmysplit[1].split()
+                m = ['january','february','march','april','may','june','july','august','september','october','november','december'].index(mdsplit[0])+1
+                d = int(mdsplit[1])
+                y = int(dmysplit[2].split()[0])
+                return date(y,m,d)
+
+            def article_title(tag):
+                atag = tag.find('a',href=True)
+                if not atag:
+                    return ''
+                return self.tag_to_string(atag)
+
+            def article_url(tag):
+                atag = tag.find('a',href=True)
+                if not atag:
+                    return ''
+                return atag['href']+'print/'
+
+            def article_description(tag):
+                for p_tag in tag.findAll('p'):
+                    d = self.tag_to_string(p_tag,False)
+                    if not d == '':
+                        return d
+                return ''
+
+            def compound_h4_h3_title(tag):
+                if tag.h4:
+                    if tag.h3:
+                        return self.tag_to_string(tag.h4,False)+u'\u2014'+self.tag_to_string(tag.h3,False)
+                    else:
+                        return self.tag_to_string(tag.h4,False)
+                elif tag.h3:
+                    return self.tag_to_string(tag.h3,False)
+                else:
+                    return ''
+
+            def compound_h2_h4_title(tag):
+                if tag.h2:
+                    if tag.h4:
+                        return self.tag_to_string(tag.h2,False)+u'\u2014'+self.tag_to_string(tag.h4,False)
+                    else:
+                        return self.tag_to_string(tag.h2,False)
+                elif tag.h4:
+                    return self.tag_to_string(tag.h4,False)
+                else:
+                    return ''
+
+
+            def handle_article(header_tag, outer_tag):
+                if header_tag:
+                    url = article_url(header_tag)
+                    title = article_title(header_tag)
+                    author_date_tag = outer_tag.h4
+                    if author_date_tag:
+                        author_date = self.tag_to_string(author_date_tag,False).split(' - ')
+                        author = author_date[0].strip()
+                        article_date = decode_date(author_date[1])
+                        earliest_date = date.today() - timedelta(days=self.oldest_article)
+                        if article_date < earliest_date:
+                            self.log("Skipping article dated %s" % author_date[1])
+                        else:
+                            excerpt_div = outer_tag.find('div','excerpt')
+                            if excerpt_div:
+                                description = article_description(excerpt_div)
+                            else:
+                                description = ''
+                            if not articles.has_key(page_title):
+                                articles[page_title] = []
+                            articles[page_title].append(dict(title=title,url=url,date=author_date[1],description=description,author=author,content=''))
+
+            def handle_category_article(cat, header_tag, outer_tag):
+                url = article_url(header_tag)
+                title = article_title(header_tag)
+                if not title == '':
+                    title = cat+u'\u2014'+title
+                a_tag = outer_tag.find('span','authorLink')
+                if a_tag:
+                    author = self.tag_to_string(a_tag,False)
+                    a_tag.parent.extract()
+                else:
+                    author = ''
+                description = article_description(outer_tag)
+                if not articles.has_key(page_title):
+                    articles[page_title] = []
+                articles[page_title].append(dict(title=title,url=url,date='',description=description,author=author,content=''))
+
+
+            soup = self.index_to_soup(page_url)
+
+            if page_title == 'Front Page':
+                # special processing for the front page
+                top_stories = soup.find('div',{ "id" : "macleansFeatured" })
+                if top_stories:
+                    for div_slide in top_stories.findAll('div','slide'):
+                        url = article_url(div_slide)
+                        div_title = div_slide.find('div','header')
+                        if div_title:
+                            title = self.tag_to_string(div_title,False)
+                        else:
+                            title = ''
+                        description = article_description(div_slide)
+                        if not articles.has_key(page_title):
+                             articles[page_title] = []
+                        articles[page_title].append(dict(title=title,url=url,date='',description=description,author='',content=''))
+
+                from_macleans = soup.find('div',{ "id" : "fromMacleans" })
+                if from_macleans:
+                    for li_tag in from_macleans.findAll('li','fromMacleansArticle'):
+                        title = compound_h4_h3_title(li_tag)
+                        url = article_url(li_tag)
+                        description = article_description(li_tag)
+                        if not articles.has_key(page_title):
+                            articles[page_title] = []
+                        articles[page_title].append(dict(title=title,url=url,date='',description=description,author='',content=''))
+
+                blog_central = soup.find('div',{ "id" : "bloglist" })
+                if blog_central:
+                    for li_tag in blog_central.findAll('li'):
+                        title = compound_h2_h4_title(li_tag)
+                        if li_tag.h4:
+                            url = article_url(li_tag.h4)
+                            if not articles.has_key(page_title):
+                                articles[page_title] = []
+                            articles[page_title].append(dict(title=title,url=url,date='',description='',author='',content=''))
+
+#                need_to_know = soup.find('div',{ "id" : "needToKnow" })
+#                if need_to_know:
+#                    for div_tag in need_to_know('div',attrs={'class' : re.compile("^needToKnowArticle")}):
+#                        title = compound_h4_h3_title(div_tag)
+#                        url = article_url(div_tag)
+#                        description = article_description(div_tag)
+#                        if not articles.has_key(page_title):
+#                            articles[page_title] = []
+#                        articles[page_title].append(dict(title=title,url=url,date='',description=description,author='',content=''))
+
+                for news_category in soup.findAll('div','newsCategory'):
+                    news_cat = self.tag_to_string(news_category.h4,False)
+                    handle_category_article(news_cat, news_category.find('h2'), news_category.find('div'))
+                    for news_item in news_category.findAll('li'):
+                        handle_category_article(news_cat,news_item.h3,news_item)
+
+                return
+
+            # find the div containing the highlight article
+            div_post = soup.find('div','post')
+            if div_post:
+                h1_tag = div_post.h1
+                handle_article(h1_tag,div_post)
+
+            # find the divs containing the rest of the articles
+            div_other = div_post.find('div', { "id" : "categoryOtherPosts" })
+            if div_other:
+                for div_entry in div_other.findAll('div','entry'):
+                    h2_tag = div_entry.h2
+                    handle_article(h2_tag,div_entry)
+
+
+
+        for page_name,page_title in self.sectionlist:
+            parse_index_page(page_name,page_title)
+            ans.append(page_title)
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
--- a/resources/recipes/metro_montreal.recipe
+++ b/resources/recipes/metro_montreal.recipe
@ -0,0 +1,29 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Metro_Montreal(BasicNewsRecipe):
+
+    title          = u'M\xe9tro Montr\xe9al'
+    __author__     = 'Jerry Clapperton'
+    description    = 'Le quotidien le plus branché sur le monde'
+    language       = 'fr'
+
+    oldest_article        = 7
+    max_articles_per_feed = 20
+    use_embedded_content  = False
+    remove_javascript     = True
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    extra_css             = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
+
+    remove_tags = [dict(attrs={'id':'buttons'})]
+    
+    feeds = [
+             (u"L'info", u'http://journalmetro.com/linfo/rss'),
+             (u'Monde', u'http://journalmetro.com/monde/rss'),
+             (u'Culture', u'http://journalmetro.com/culture/rss'),
+             (u'Sports', u'http://journalmetro.com/sports/rss'),
+             (u'Paroles', u'http://journalmetro.com/paroles/rss')
+            ]
+
+    def print_version(self, url):
+          return url.replace('article', 'ArticlePrint') + '?language=fr'
--- a/resources/recipes/michellemalkin.recipe
+++ b/resources/recipes/michellemalkin.recipe
@ -0,0 +1,49 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Walt Anthony <workshop.northpole at gmail.com>'
+'''
+www.michellemalkin.com
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MichelleMalkin(BasicNewsRecipe):
+    title          = u'Michelle Malkin'
+    description    = "Michelle Malkin's take on events, a mother, wife, blogger, conservative syndicated columnist, author, and Fox News Channel contributor."
+    __author__     = 'Walt Anthony'
+    publisher             = 'Michelle Malkin LLC'
+    category              = 'news, politics, USA'
+    oldest_article        = 7 #days
+    max_articles_per_feed = 50
+    summary_length        = 150
+    language              = 'en'
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        , 'linearize_tables' : True
+                        }
+
+
+    keep_only_tags = [
+      dict(name='div', attrs={'class':'article'})
+    ]
+
+    remove_tags  = [
+       dict(name=['iframe', 'embed', 'object']),
+       dict(name='div', attrs={'id':['comments', 'commentForm']}),
+       dict(name='div', attrs={'class':['postCategories', 'comments', 'blogInfo', 'postInfo']})
+
+    ]
+
+
+    feeds = [(u'http://feeds.feedburner.com/michellemalkin/posts')]
+
+
+
+    def print_version(self, url):
+        return url + '?print=1'
--- a/resources/recipes/montreal_gazette.recipe
+++ b/resources/recipes/montreal_gazette.recipe
@ -0,0 +1,96 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Montreal Gazette
+    title = u'Montreal Gazette'
+    url_prefix = 'http://www.montrealgazette.com'
+    description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
--- a/resources/recipes/nationalreviewonline.recipe
+++ b/resources/recipes/nationalreviewonline.recipe
@ -0,0 +1,50 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Walt Anthony <workshop.northpole at gmail.com>'
+'''
+www.nationalreview.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NRO(BasicNewsRecipe):
+    title          = u'National Review Online'
+    __author__            = 'Walt Anthony'
+    description           = "National Review is America's most widely read and influential magazine and web site for Republican/conservative news, commentary, and opinion."
+    publisher             = 'National Review, Inc.'
+    category              = 'news, politics, USA'
+    oldest_article = 3
+    max_articles_per_feed = 100
+    summary_length = 150
+    language              = 'en'
+    encoding              = 'utf-8'
+    use_embedded_content  = True
+    remove_javascript = True
+
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [
+                    dict(name=['embed','object','iframe']),
+
+                  ]
+
+    feeds = [
+
+              (u'National Review', u'http://www.nationalreview.com/index.xml'),
+              (u'The Corner', u'http://corner.nationalreview.com/corner.xml'),
+              (u'The Agenda', u'http://agenda.nationalreview.com/agenda.xml'),
+              (u'Bench Memos', u'http://bench.nationalreview.com/bench.xml'),
+              (u'Campaign Spot', u'http://campaignspot.nationalreview.com/campaignspot.xml'),
+              (u'Critical Care', u'http://healthcare.nationalreview.com/healthcare.xml'),
+              (u'Doctor, Doctor', u'http://www.nationalreview.com/doctor/doctor.xml'),
+              (u"Kudlow's Money Politic$", u'http://kudlow.nationalreview.com/kudlow.xml'),
+              (u'Media Blog', u'http://media.nationalreview.com/media.xml'),
+              (u'Phi Beta Cons', u'http://phibetacons.nationalreview.com/phibetacons.xml'),
+              (u'Planet Gore', u'http://planetgore.nationalreview.com/planetgore.xml')
+
+            ]
--- a/resources/recipes/neowin.recipe
+++ b/resources/recipes/neowin.recipe
@ -0,0 +1,40 @@
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Neowin(BasicNewsRecipe):
+    title                 = u'Neowin.net'
+    oldest_article        = 5
+    language              = 'en'
+    description           = 'News from IT'
+    publisher             = 'Neowin'
+    category              = 'news, IT, Microsoft, Apple, hardware, software, games'
+    __author__            = 'Darko Miletic'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf8'
+
+    conversion_options = {
+                             'tags'      : category
+                            ,'language'  : language
+                            ,'comments'  : description
+                            ,'publisher' : publisher
+                         }
+
+    keep_only_tags     = [dict(name='div', attrs={'id':'article'})]
+    remove_tags_after = dict(name='div', attrs={'id':'tag-bar'})
+
+    remove_tags        = [
+                            dict(name=['base','object','link','iframe'])
+                           ,dict(name='div', attrs={'id':'tag-bar'})
+                         ]
+
+    feeds          = [
+                        (u'Software' , u'http://www.neowin.net/news/rss/software' )
+                       ,(u'Gaming'   , u'http://www.neowin.net/news/rss/gaming'   )
+                       ,(u'Microsoft', u'http://www.neowin.net/news/rss/microsoft')
+                       ,(u'Apple'    , u'http://www.neowin.net/news/rss/apple'    )
+                       ,(u'Editorial', u'http://www.neowin.net/news/rss/editorial')
+                     ]
+    def image_url_processor(cls, baseurl, url):
+        return url
+
--- a/resources/recipes/nin.recipe
+++ b/resources/recipes/nin.recipe
@ -1,46 +1,42 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
-nin.co.rs
+www.nin.co.rs
 '''

 import re, urllib
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class Nin(BasicNewsRecipe):
    title                  = 'NIN online'
    __author__             = 'Darko Miletic'
-    description            = 'Nedeljne informativne novine'
-    publisher              = 'NIN D.O.O.'
+    description            = 'Nedeljne Informativne Novine'
+    publisher              = 'NIN d.o.o.'
    category               = 'news, politics, Serbia'
    no_stylesheets         = True
    oldest_article         = 15
-    simultaneous_downloads = 1
-    delay                  = 1
    encoding               = 'utf-8'
    needs_subscription     = True
+    remove_empty_feeds     = True
    PREFIX                 = 'http://www.nin.co.rs'
    INDEX                  = PREFIX + '/?change_lang=ls'
    LOGIN                  = PREFIX + '/?logout=true'
    use_embedded_content   = False
    language               = 'sr'
-    lang                   = 'sr-Latn-RS'
-    direction              = 'ltr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold}'
+    extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana, Lucida, sans1, sans-serif} .article_description{font-family: Verdana, Lucida, sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold; color: #900} .izjava{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold;} img{margin-top:0.5em; margin-bottom: 0.7em} b{margin-top: 1em} '

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
-                        , 'pretty_print'     : True
+                        , 'linearize_tables' : True
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+    remove_attributes = ['height','width']

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -65,30 +61,14 @@ class Nin(BasicNewsRecipe):
           cover_url = self.PREFIX + link_item['src']
        return cover_url

-    def preprocess_html(self, soup):
-        soup.html['lang'] = self.lang
-        soup.html['dir' ] = self.direction
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
-        soup.head.insert(0,mlang)
-        soup.head.insert(1,mcharset)
-        attribs = [  'style','font','valign'
-                    ,'colspan','width','height'
-                    ,'rowspan','summary','align'
-                    ,'cellspacing','cellpadding'
-                    ,'frames','rules','border'
-                  ]
-        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
-            item.name = 'div'
-            for attrib in attribs:
-                if item.has_key(attrib):
-                   del item[attrib]
-        return soup
-
    def parse_index(self):
        articles = []
+        count = 0
        soup = self.index_to_soup(self.PREFIX)
        for item in soup.findAll('a',attrs={'class':'lmeninavFont'}):
+            count = count +1
+            if self.test and count > 2:
+               return articles
            section  = self.tag_to_string(item)
            feedlink = self.PREFIX + item['href']
            feedpage = self.index_to_soup(feedlink)
@ -110,3 +90,4 @@ class Nin(BasicNewsRecipe):
                                })
            articles.append((section,inarts))
        return articles
+
--- a/resources/recipes/nursingtimes.recipe
+++ b/resources/recipes/nursingtimes.recipe
@ -0,0 +1,67 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.nursingtimes.net
+'''
+
+import urllib
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class NursingTimes(BasicNewsRecipe):
+    title                  = 'Nursing Times'
+    __author__             = 'Darko Miletic'
+    description            = 'Nursing practice, NHS and health care news'
+    oldest_article         = 8
+    max_articles_per_feed  = 100
+    no_stylesheets         = True
+    use_embedded_content   = False
+    encoding               = 'utf-8'
+    publisher              = 'emap'
+    category               = 'news, health, nursing, UK'
+    language               = 'en-UK'
+    needs_subscription     = True
+    LOGIN                  = 'http://www.nursingtimes.net/sign-in'
+
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                         }
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        br.open(self.LOGIN)
+        if self.username is not None and self.password is not None:
+            data = urllib.urlencode({ 'campaigncode' :'0'
+                                     ,'referrer'     :''
+                                     ,'security_text':''
+                                     ,'SIemail'      :self.username
+                                     ,'passWord'     :self.password
+                                     ,'LoginButton.x':'27'
+                                     ,'LoginButton.y':'13'
+                                   })
+            br.open(self.LOGIN,data)
+        return br
+
+    keep_only_tags   = [dict(name='div', attrs={'class':'storytext'})]
+    remove_tags      = [
+                          dict(name=['object','link','script','iframe'])
+                         ,dict(name='div',attrs={'id':'comments_form'})
+                       ]
+    remove_tags_after = dict(name='div',attrs={'id':'comments_form'})
+
+    feeds = [
+               (u'Breaking News', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=1')
+              ,(u'Practice', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=512')
+              ,(u'Behind the headlines', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=468')
+              ,(u'Analysis', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=62')
+              ,(u'Acute care news', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=5')
+              ,(u'Primary vare news', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=231')
+              ,(u'Mental Health news', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=27')
+              ,(u'Management news', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=32')
+              ,(u"Older people's nursing news", u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=181')
+              ,(u'Respiratory news', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=177')
+              ,(u'Wound care news', u'http://www.nursingtimes.net/XmlServers/navsectionRSS.aspx?navsectioncode=182')
+            ]
+
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import re
+import re, time
 from calibre import entity_to_unicode
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment
@ -79,13 +79,30 @@ class NYTimes(BasicNewsRecipe):
                 .authorId      {text-align:    left;       \
                                 font-style:    italic;}\n  '

-#     def get_cover_url(self):
-#        st = time.localtime()
-#        year = str(st.tm_year)
-#        month = "%.2d" % st.tm_mon
-#        day = "%.2d" % st.tm_mday
-#        cover = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/nytfrontpage/' + 'scan.jpg'
-#        return cover
+    def get_cover_url(self):
+        cover = None
+        st = time.localtime()
+        year = str(st.tm_year)
+        month = "%.2d" % st.tm_mon
+        day = "%.2d" % st.tm_mday
+        cover = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/nytfrontpage/scan.jpg'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(cover)
+        except:
+            self.log("\nCover unavailable")
+            cover = None
+        return cover
+
+    def get_masthead_url(self):
+        masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(masthead)
+        except:
+            self.log("\nCover unavailable")
+            masthead = None
+        return masthead

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -5,16 +5,23 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import string, re
+import string, re, time
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup

+def decode(self, src):
+    enc = 'utf-8'
+    if 'iso-8859-1' in src:
+        enc = 'cp1252'
+    return src.decode(enc, 'ignore')
+
 class NYTimes(BasicNewsRecipe):

    title       = 'The New York Times (subscription)'
    __author__  = 'Kovid Goyal'
    language = 'en'
+    requires_version = (0, 6, 36)

    description = 'Daily news from the New York Times (subscription version)'
    timefmt  = ' [%a, %b %d, %Y]'
@ -24,10 +31,11 @@ class NYTimes(BasicNewsRecipe):
    remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
                   dict(id=['footer', 'toolsRight', 'articleInline',
                       'navigation', 'archive', 'side_search', 'blog_sidebar',
-                       'side_tool', 'side_index',
+                       'side_tool', 'side_index', 'login', 'businessSearchBar',
+                       'adxLeaderboard',
                       'relatedArticles', 'relatedTopics', 'adxSponLink']),
                   dict(name=['script', 'noscript', 'style'])]
-    #encoding = 'cp1252'
+    encoding = decode
    no_stylesheets = True
    extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'

@ -44,13 +52,39 @@ class NYTimes(BasicNewsRecipe):
            #open('/t/log.html', 'wb').write(raw)
        return br

+    def get_masthead_url(self):
+        masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(masthead)
+        except:
+            self.log("\nCover unavailable")
+            masthead = None
+        return masthead
+
+
+    def get_cover_url(self):
+        cover = None
+        st = time.localtime()
+        year = str(st.tm_year)
+        month = "%.2d" % st.tm_mon
+        day = "%.2d" % st.tm_mday
+        cover = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/nytfrontpage/scan.jpg'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(cover)
+        except:
+            self.log("\nCover unavailable")
+            cover = None
+        return cover
+
    def short_title(self):
        return 'NY Times'

    def parse_index(self):
        self.encoding = 'cp1252'
        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
-        self.encoding = None
+        self.encoding = decode

        def feed_title(div):
            return ''.join(div.findAll(text=True, recursive=False)).strip()
--- a/resources/recipes/nytimesbook.recipe
+++ b/resources/recipes/nytimesbook.recipe
@ -0,0 +1,56 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class NewYorkTimesBookReview(BasicNewsRecipe):
+    title          = u'New York Times Book Review'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 8 #days
+    max_articles_per_feed = 1000
+    recursions = 2
+    #encoding = 'latin1'
+
+    remove_stylesheets = True
+    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
+    remove_tags_after  = dict(name='div', attrs={'id':'authorId'})
+    remove_tags = [
+       dict(name='iframe'),
+       dict(name=['div', 'a'], attrs={'class':['enlargeThis', 'jumpLink']}),
+       dict(name='div', attrs={'id':['sidebarArticles', 'toolsRight']}),
+       #dict(name='ul', attrs={'class':'article-tools'}),
+       #dict(name='ul', attrs={'class':'articleTools'}),
+    ]
+    match_regexps = [
+            r'http://www.nytimes.com/.+pagewanted=[2-9]+'
+            ]
+
+    feeds          = [
+('New York Times Sunday Book Review',
+ 'http://feeds.nytimes.com/nyt/rss/SundayBookReview'),
+]
+
+
+    def preprocess_html(self, soup):
+        story = soup.find(name='div', attrs={'id':'article'})
+        #td = heading.findParent(name='td')
+        #td.extract()
+        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        body = soup.find(name='body')
+        body.insert(0, story)
+        #for x in soup.findAll(name='p', text=lambda x:x and '--&gt;' in x):
+             #p = x.findParent('p')
+             #if p is not None:
+                  #p.extract()
+        return soup
+
+    def postprocess_html(self, soup, first):
+        for div in soup.findAll(id='pageLinks'):
+            div.extract()
+        if not first:
+            h1 = soup.find('h1')
+            if h1 is not None:
+                h1.extract()
+            t = soup.find(attrs={'class':'timestamp'})
+            if t is not None:
+                t.extract()
+        return soup
--- a/resources/recipes/observer.recipe
+++ b/resources/recipes/observer.recipe
@ -1,31 +1,40 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class NewsandObserver(BasicNewsRecipe):
-    title          = u'News and Observer'
+    title          = u'Raleigh News & Observer'
    description = 'News from Raleigh, North Carolina'
    language       = 'en'
-    __author__     = 'Krittika Goyal'
-    oldest_article = 5 #days
+    __author__     = 'Krittika Goyal updated by Walt Anthony'
+    oldest_article = 3 #days
    max_articles_per_feed = 25
+    summary_length = 150
+
+    no_stylesheets    = True
+    remove_javascript = True

-    remove_stylesheets = True
    remove_tags_before = dict(name='h1', attrs={'id':'story_headline'})
-    remove_tags_after  = dict(name='div', attrs={'id':'story_text_remaining'})
+    remove_tags_after   = dict(name='div', attrs={'id':'story_text_remaining'})
+
+
    remove_tags = [
       dict(name='iframe'),
-       dict(name='div', attrs={'id':['right-rail', 'story_tools']}),
+       dict(name='div', attrs={'id':['right-rail', 'story_tools', 'toolbox', 'toolbar', 'tool', 'shirttail', 'comment_widget', 'story_keywords', 'txtResizeTool']}),
+       dict(name='div', attrs={'class':['Buy-It-Now', 'story_link_share']}),
       dict(name='ul', attrs={'class':'bold_tabs_nav'}),
+
    ]


+
    feeds = [
-        ('Cover', 'http://www.newsobserver.com/100/index.rss'),
-        ('News', 'http://www.newsobserver.com/102/index.rss'),
-        ('Politics', 'http://www.newsobserver.com/105/index.rss'),
-        ('Business', 'http://www.newsobserver.com/104/index.rss'),
-        ('Sports', 'http://www.newsobserver.com/103/index.rss'),
-        ('College Sports', 'http://www.newsobserver.com/119/index.rss'),
-        ('Lifestyles', 'http://www.newsobserver.com/106/index.rss'),
-        ('Editorials', 'http://www.newsobserver.com/158/index.rss')]
+       ('Cover', 'http://www.newsobserver.com/100/index.rss'),
+       ('News', 'http://www.newsobserver.com/102/index.rss'),
+       ('Politics', 'http://www.newsobserver.com/105/index.rss'),
+       ('Business', 'http://www.newsobserver.com/104/index.rss'),
+       ('Sports', 'http://www.newsobserver.com/103/index.rss'),
+       ('College Sports', 'http://www.newsobserver.com/119/index.rss'),
+       ('Lifestyles', 'http://www.newsobserver.com/106/index.rss'),
+       ('Editorials', 'http://www.newsobserver.com/158/index.rss')
+    ]


--- a/resources/recipes/oc_register.recipe
+++ b/resources/recipes/oc_register.recipe
@ -0,0 +1,73 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__author__    = 'Lorenzo Vigentini'
+__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+description   = 'News from the Orange county - v1.01 (29, January 2010)'
+
+'''
+http://www.ocregister.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ocRegister(BasicNewsRecipe):
+    author        = 'Lorenzo Vigentini'
+    description   = 'News from the Orange county'
+
+    cover_url      = 'http://images.onset.freedom.com/ocregister/logo.gif'
+    title          = u'Orange County Register'
+    publisher      = 'Orange County Register Communication'
+    category       = 'News, finance, economy, politics'
+
+    language       = 'en'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 1
+    max_articles_per_feed = 25
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+    def print_version(self,url):
+        printUrl    = 'http://www.ocregister.com/common/printer/view.php?db=ocregister&id='
+        segments = url.split('/')
+        subSegments = (segments[4]).split('.')
+        myArticle = (subSegments[0]).replace('-', '')
+        myURL= printUrl + myArticle
+        return myURL
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'id':'ArticleContentWrap'})
+                        ]
+
+    remove_tags = [
+                     dict(name='div', attrs={'class':'hideForPrint'}),
+                     dict(name='div', attrs={'id':'ContentFooter'})
+                  ]
+
+    feeds          = [
+                       (u'News', u'http://www.ocregister.com/common/rss/rss.php?catID=18800'),
+                       (u'Today paper', u'http://www.ocregister.com/common/rss/rss.php?catID=18976'),
+                       (u'Business', u'http://www.ocregister.com/common/rss/rss.php?catID=18909'),
+                       (u'Cars', u'http://www.ocregister.com/common/rss/rss.php?catID=20128'),
+                       (u'Entertainment', u'http://www.ocregister.com/common/rss/rss.php?catID=18926'),
+                       (u'Home', u'http://www.ocregister.com/common/rss/rss.php?catID=19142'),
+                       (u'Life', u'http://www.ocregister.com/common/rss/rss.php?catID=18936'),
+                       (u'Opinion', u'http://www.ocregister.com/common/rss/rss.php?catID=18963'),
+                       (u'Sports', u'http://www.ocregister.com/common/rss/rss.php?catID=18901'),
+                       (u'Travel', u'http://www.ocregister.com/common/rss/rss.php?catID=18959')
+                     ]
+
+    extra_css = '''
+                h1 {color:#ff6600;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
+                h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;}
+                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
+                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
+                #articledate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                #articlebyline {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                img {align:left;}
+                #topstoryhead {color:#ff6600;font-family:Arial,Helvetica,sans-serif; font-size:22px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
+                '''
--- a/resources/recipes/open_left.recipe
+++ b/resources/recipes/open_left.recipe
@ -0,0 +1,22 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class OpenLeft(BasicNewsRecipe):
+    # Information about the recipe
+
+    title              = 'Open Left'
+    description        = 'Progressive American commentary on current events'
+    category           = 'news, commentary'
+    language           = 'en'
+    __author__         = 'Xanthan Gum'
+
+    # Fetch no article older than seven days
+
+    oldest_article = 7
+
+    # Fetch no more than 100 articles
+
+    max_articles_per_feed = 100
+
+    # Fetch the articles from the RSS feed
+
+    feeds          = [(u'Articles', u'http://www.openleft.com/rss/rss2.xml')]
--- a/resources/recipes/ottawa_citizen.recipe
+++ b/resources/recipes/ottawa_citizen.recipe
@ -0,0 +1,101 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Ottawa Citizen
+    title = u'Ottawa Citizen'
+    url_prefix = 'http://www.ottawacitizen.com'
+    description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@ -1,10 +1,12 @@

 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 pagina12.com.ar
 '''

+import time
+from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe

 class Pagina12(BasicNewsRecipe):
@ -19,6 +21,8 @@ class Pagina12(BasicNewsRecipe):
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'es'
+    remove_empty_feeds    = True
+    extra_css             = ' body{font-family: sans-serif} '

    conversion_options = {
                          'comment'   : description
@ -47,3 +51,8 @@ class Pagina12(BasicNewsRecipe):
    def print_version(self, url):
        return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')

+    def get_cover_url(self):
+        imgnames = ['tapan.jpg','tapagn.jpg','tapan_gr.jpg','tapagn.jpg','tapagn.jpg','tapan.jpg','tapagn.jpg']
+        weekday = time.localtime().tm_wday
+        return strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/') + imgnames[weekday]
+
--- a/resources/recipes/pajama.recipe
+++ b/resources/recipes/pajama.recipe
@ -0,0 +1,48 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class PajamasMedia(BasicNewsRecipe):
+    title          = u'Pajamas Media'
+    description = u'Provides exclusive news and opinion for forty countries.'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    recursions = 1
+    match_regexps = [r'http://pajamasmedia.com/blog/.*/2/$']
+    #encoding = 'latin1'
+
+    remove_stylesheets = True
+    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
+    remove_tags_after  = dict(name='div', attrs={'class':'paged-nav'})
+    remove_tags = [
+       dict(name='iframe'),
+       dict(name='div', attrs={'class':['pages']}),
+       #dict(name='div', attrs={'id':['bookmark']}),
+       #dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}),
+       #dict(name='ul', attrs={'class':'articleTools'}),
+    ]
+
+    feeds          = [
+('pajamas Media',
+ 'http://feeds.feedburner.com/PajamasMedia'),
+
+]
+
+    def preprocess_html(self, soup):
+        story = soup.find(name='div', attrs={'id':'innerpage-content'})
+        #td = heading.findParent(name='td')
+        #td.extract()
+
+        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        body = soup.find(name='body')
+        body.insert(0, story)
+        return soup
+
+    def postprocess_html(self, soup, first):
+        if not first:
+            h = soup.find(attrs={'class':'innerpage-header'})
+            if h: h.extract()
+            auth = soup.find(attrs={'class':'author'})
+            if auth: auth.extract()
+        return soup
--- a/resources/recipes/physics_today.recipe
+++ b/resources/recipes/physics_today.recipe
@ -8,8 +8,7 @@ class Physicstoday(BasicNewsRecipe):
    description           = u'Physics Today magazine'
    publisher             = 'American Institute of Physics'
    category              = 'Physics'
-    language = 'en'
-
+    language              = 'en'
    cover_url = strftime('http://ptonline.aip.org/journals/doc/PHTOAD-home/jrnls/images/medcover%m_%Y.jpg')
    oldest_article = 30
    max_articles_per_feed = 100
@ -30,8 +29,8 @@ class Physicstoday(BasicNewsRecipe):
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
-            br.open('http://www.physicstoday.org/pt/sso_login.jsp')
-            br.select_form(name='login')
+            br.open('http://ptonline.aip.org/journals/doc/PHTOAD-home/pt_login.jsp?fl=f')
+            br.select_form(name='login_form')
            br['username'] = self.username
            br['password'] = self.password
            br.submit()
--- a/resources/recipes/politiken_dk.recipe
+++ b/resources/recipes/politiken_dk.recipe
@ -0,0 +1,55 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+politiken.dk
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Politiken_dk(BasicNewsRecipe):
+    title                 = 'Politiken.dk'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Denmark'
+    publisher             = 'politiken.dk'
+    category              = 'news, politics, Denmark'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    remove_empty_feeds    = True
+    use_embedded_content  = False
+    encoding              = 'cp1252'
+    language              = 'da'
+
+    extra_css = ' body{font-family: Arial,Helvetica,sans-serif } h1{font-family: Georgia,"Times New Roman",Times,serif } '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    feeds              = [
+                            (u'Tophistorier'   , u'http://politiken.dk/rss/tophistorier.rss')
+                           ,(u'Seneste nyt'    , u'http://politiken.dk/rss/senestenyt.rss')
+                           ,(u'Mest laeste'    , u'http://politiken.dk/rss/mestlaeste.rss')
+                           ,(u'Danmark'        , u'http://politiken.dk/rss/indland.rss')
+                           ,(u'Politik'        , u'http://politiken.dk/rss/politik.rss')
+                           ,(u'Klima'          , u'http://politiken.dk/rss/klima.rss')
+                           ,(u'Internationalt' , u'http://politiken.dk/rss/udland.rss')
+                           ,(u'Erhverv'        , u'http://politiken.dk/rss/erhverv.rss')
+                           ,(u'Kultur'         , u'http://politiken.dk/rss/kultur.rss')
+                           ,(u'Sport'          , u'http://politiken.dk/rss/sport.rss')
+                           ,(u'Uddannelse'     , u'http://politiken.dk/rss/uddannelse.rss')
+                           ,(u'Videnskab'      , u'http://politiken.dk/rss/videnskab.rss')
+                          ]
+    remove_tags_before = dict(name='h1')
+    remove_tags        = [
+                            dict(name=['object','link'])
+                           ,dict(name='div',attrs={'class':'footer'})
+                         ]
+
+    def print_version(self, url):
+        return url + '?service=print'
+
--- a/resources/recipes/readers_digest.recipe
+++ b/resources/recipes/readers_digest.recipe
@ -0,0 +1,188 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+'''
+'''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.web.feeds import Feed
+
+
+class ReadersDigest(BasicNewsRecipe):
+
+    title       = 'Readers Digest'
+    __author__  = 'BrianG'
+    language = 'en'
+    description = 'Readers Digest Feeds'
+    no_stylesheets        = True
+    use_embedded_content  = False
+    oldest_article = 60
+    max_articles_per_feed = 200
+
+    language = 'en'
+    remove_javascript     = True
+
+    extra_css      = ''' h1 {font-family:georgia,serif;color:#000000;}
+                        .mainHd{font-family:georgia,serif;color:#000000;}
+                         h2 {font-family:Arial,Sans-serif;}
+                        .name{font-family:Arial,Sans-serif; font-size:x-small;font-weight:bold; }
+                        .date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
+                        .byline{font-family:Arial,Sans-serif; font-size:x-small ;}
+                        .photoBkt{ font-size:x-small ;}
+                        .vertPhoto{font-size:x-small ;}
+                        .credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
+                        .credit{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
+                        .artTxt{font-family:georgia,serif;}
+                        .caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
+                        .credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
+                        a:link{color:#CC0000;}
+                        .breadcrumb{font-family:Arial,Sans-serif;font-size:x-small;}
+                        '''
+
+
+    remove_tags = [
+        dict(name='h4', attrs={'class':'close'}),
+        dict(name='div', attrs={'class':'fromLine'}),
+        dict(name='img', attrs={'class':'colorTag'}),
+        dict(name='div', attrs={'id':'sponsorArticleHeader'}),
+        dict(name='div', attrs={'class':'horizontalAd'}),
+        dict(name='div', attrs={'id':'imageCounterLeft'}),
+        dict(name='div', attrs={'id':'commentsPrint'})
+        ]
+
+
+    feeds = [
+            ('New in RD', 'http://feeds.rd.com/ReadersDigest'),
+            ('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'),
+            ('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'),
+            ('Blogs','http://feeds.rd.com/ReadersDigestBlogs')
+        ]
+
+    cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
+
+
+
+#-------------------------------------------------------------------------------------------------
+
+    def print_version(self, url):
+
+        # Get the identity number of the current article and append it to the root print URL
+
+        if url.find('/article') > 0:
+            ident = url[url.find('/article')+8:url.find('.html?')-4]
+            url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
+
+        elif url.find('/post') > 0:
+
+            # in this case, have to get the page itself to derive the Print page.
+            soup = self.index_to_soup(url)
+            newsoup = soup.find('ul',attrs={'class':'printBlock'})
+            url = 'http://www.rd.com' + newsoup('a')[0]['href']
+            url = url[0:url.find('&Keep')]
+
+        return url
+
+#-------------------------------------------------------------------------------------------------
+
+    def parse_index(self):
+
+        pages = [
+                ('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
+                # useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
+                ('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
+
+            ]
+
+        feeds = []
+
+        for page in pages:
+            section, url, divider, attrList = page
+            newArticles = self.page_parse(url, divider, attrList)
+            feeds.append((section,newArticles))
+
+        # after the pages of the site have been processed, parse several RSS feeds for additional sections
+        newfeeds = Feed()
+        newfeeds = self.parse_rss()
+
+
+        # The utility code in parse_rss returns a Feed object.  Convert each feed/article combination into a form suitable
+        # for this module (parse_index).
+
+        for feed in newfeeds:
+            newArticles = []
+            for article in feed.articles:
+                newArt = {
+                            'title' : article.title,
+                            'url'   : article.url,
+                            'date'  : article.date,
+                            'description' : article.text_summary
+                        }
+                newArticles.append(newArt)
+
+
+            # New and Blogs should be the first two feeds.
+            if feed.title == 'New in RD':
+                feeds.insert(0,(feed.title,newArticles))
+            elif feed.title == 'Blogs':
+                feeds.insert(1,(feed.title,newArticles))
+            else:
+                feeds.append((feed.title,newArticles))
+
+
+        return feeds
+
+#-------------------------------------------------------------------------------------------------
+
+    def page_parse(self, mainurl, divider, attrList):
+
+        articles = []
+        mainsoup = self.index_to_soup(mainurl)
+        for item in mainsoup.findAll(attrs=attrList):
+            newArticle = {
+                        'title' : item('img')[0]['alt'],
+                        'url'   : 'http://www.rd.com'+item('a')[0]['href'],
+                        'date'  : '',
+                        'description' : ''
+                    }
+            articles.append(newArticle)
+
+
+
+        return articles
+
+
+
+#-------------------------------------------------------------------------------------------------
+
+    def parse_rss (self):
+
+        # Do the "official" parse_feeds first
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+
+        # Loop thru the articles in all feeds to find articles with "recipe" in it
+        recipeArticles = []
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if curarticle.title.upper().find('RECIPE') >= 0:
+                    recipeArticles.append(curarticle)
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        # If there are any recipes found, create a new Feed object and append.
+        if len(recipeArticles) > 0:
+            pfeed = Feed()
+            pfeed.title = 'Recipes'
+            pfeed.descrition = 'Recipe Feed (Virtual)'
+            pfeed.image_url  = None
+            pfeed.oldest_article = 30
+            pfeed.id_counter = len(recipeArticles)
+            # Create a new Feed, add the recipe articles, and then append
+            # to "official" list of feeds
+            pfeed.articles = recipeArticles[:]
+            feeds.append(pfeed)
+
+        return feeds
+
--- a/resources/recipes/regina_leader_post.recipe
+++ b/resources/recipes/regina_leader_post.recipe
@ -0,0 +1,116 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Regina Leader-Post
+    title = u'Regina Leader-Post'
+    url_prefix = 'http://www.leaderpost.com'
+    description = u'News from Regina, SK'
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    #title = u'Saskatoon Star-Phoenix'
+    #url_prefix = 'http://www.thestarphoenix.com'
+    #description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
--- a/resources/recipes/saskatoon_star_phoenix.recipe
+++ b/resources/recipes/saskatoon_star_phoenix.recipe
@ -0,0 +1,111 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    title = u'Saskatoon Star-Phoenix'
+    url_prefix = 'http://www.thestarphoenix.com'
+    description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
--- a/resources/recipes/slovo.recipe
+++ b/resources/recipes/slovo.recipe
@ -0,0 +1,41 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class SlovoRecipe(BasicNewsRecipe):
+    __license__  = 'GPL v3'
+    __author__ = 'Abelturd'
+    language = 'sk'
+    version = 1
+
+    title = u'SLOVO'
+    publisher = u''
+    category = u'News, Newspaper'
+    description = u'Politicko-spolo\u010densk\xfd t\xfd\u017edenn\xedk'
+    encoding = 'Windows-1250'
+
+    oldest_article = 1
+    max_articles_per_feed = 100
+    use_embedded_content = False
+    remove_empty_feeds = True
+
+    no_stylesheets = True
+    remove_javascript = True
+
+
+    feeds = []
+    feeds.append((u'V\u0161etky \u010dl\xe1nky', u'http://www.noveslovo.sk/rss.asp'))
+
+    keep_only_tags = []
+    remove_tags = []
+
+    preprocess_regexps = [
+        (re.compile(r'<img src="gif/image1.gif">', re.DOTALL|re.IGNORECASE),
+        lambda match: ''),
+    ]
+
+
+    def print_version(self, url):
+        m = re.search('(?<=id=)[0-9]*', url)
+
+
+        return u'http://www.noveslovo.sk/clanoktlac.asp?id=' + str(m.group(0))
--- a/resources/recipes/sportsillustrated.recipe
+++ b/resources/recipes/sportsillustrated.recipe
@ -1,6 +1,5 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
-#from random import randint
 from urllib import quote

 class SportsIllustratedRecipe(BasicNewsRecipe) :
@ -9,12 +8,11 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
    __license__ = 'GPL v3'
    language = 'en'
    description = 'Sports Illustrated'
-    version = 1
+    version = 3
    title          = u'Sports Illustrated'

    no_stylesheets = True
    remove_javascript = True
-    #template_css = ''
    use_embedded_content   = False

    INDEX = 'http://sportsillustrated.cnn.com/'
@ -22,13 +20,39 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
    def parse_index(self):
        answer = []
        soup = self.index_to_soup(self.INDEX)
-        # Find the link to the current issue on the front page.
+        # Find the link to the current issue on the front page. SI Cover
        cover = soup.find('img', attrs = {'alt' : 'Read All Articles', 'style' : 'vertical-align:bottom;'})
        if cover:
            currentIssue = cover.parent['href']
            if currentIssue:
                # Open the index of current issue
+
                index = self.index_to_soup(currentIssue)
+                self.log('\tLooking for current issue in: ' + currentIssue)
+                # Now let us see if they updated their frontpage
+                nav = index.find('div', attrs = {'class': 'siv_trav_top'})
+                if nav:
+                    img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_next_v2.jpg'})
+                    if img:
+                        parent = img.parent
+                        if parent.name == 'a':
+                            # They didn't update their frontpage; Load the next issue from here
+                            href = self.INDEX + parent['href']
+                            index = self.index_to_soup(href)
+                            self.log('\tLooking for current issue in: ' + href)
+
+                if index.find('div', 'siv_noArticleMessage'):
+                    nav = index.find('div', attrs = {'class': 'siv_trav_top'})
+                    if nav:
+                    # Their frontpage points to an issue without any articles; Use the previous issue
+                        img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_previous_v2.jpg'})
+                        if img:
+                            parent = img.parent
+                            if parent.name == 'a':
+                                href = self.INDEX + parent['href']
+                                index = self.index_to_soup(href)
+                                self.log('\tLooking for current issue in: ' + href)
+

                # Find all articles.
                list = index.find('div', attrs = {'class' : 'siv_artList'})
@ -69,31 +93,26 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :

    def preprocess_html(self, soup):
        header = soup.find('div', attrs = {'class' : 'siv_artheader'})
-        if header:
-            # It's an article, prepare a container for the content
-            homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
-            body = homeMadeSoup.find('body')
+        homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
+        body = homeMadeSoup.body

-            # Find the date, title and byline
-            temp = header.find('td', attrs = {'class' : 'title'})
-            if temp :
-                date = temp.find('div', attrs = {'class' : 'date'})
-                if date:
-                    body.append(date)
-                if temp.h1:
-                    body.append(temp.h1)
-                if temp.h2 :
-                    body.append(temp.h2)
-                byline = temp.find('div', attrs = {'class' : 'byline'})
-                if byline:
-                    body.append(byline)
+        # Find the date, title and byline
+        temp = header.find('td', attrs = {'class' : 'title'})
+        if temp :
+            date = temp.find('div', attrs = {'class' : 'date'})
+            if date:
+                body.append(date)
+            if temp.h1:
+                body.append(temp.h1)
+            if temp.h2 :
+                body.append(temp.h2)
+            byline = temp.find('div', attrs = {'class' : 'byline'})
+            if byline:
+                body.append(byline)

-            # Find the content
-            for para in soup.findAll('div', attrs = {'class' : 'siv_artpara'}) :
-                body.append(para)
+        # Find the content
+        for para in soup.findAll('div', attrs = {'class' : 'siv_artpara'}) :
+            body.append(para)

-            return homeMadeSoup
-        else :
-            # It's a TOC, just return the whole lot
-            return soup
+        return homeMadeSoup

--- a/resources/recipes/the_gazette.recipe
+++ b/resources/recipes/the_gazette.recipe
@ -0,0 +1,22 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class The_Gazette(BasicNewsRecipe):
+
+    cover_url      = 'file:///D:/Documents/Pictures/Covers/The_Gazette.jpg'
+    title          = u'The Gazette'
+    __author__     = 'Jerry Clapperton'
+    description    = 'Montreal news in English'
+    language = 'en_CA'
+
+    oldest_article = 7
+    max_articles_per_feed = 20
+    use_embedded_content  = False
+    remove_javascript = True
+    no_stylesheets = True
+    encoding = 'utf-8'
+
+    keep_only_tags = [dict(name='div', attrs={'id':['storyheader','page1']})]
+
+    extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
+
+    feeds = [(u'News', u'http://feeds.canada.com/canwest/F297'), (u'Opinion', u'http://feeds.canada.com/canwest/F7383'), (u'Arts', u'http://feeds.canada.com/canwest/F7366'), (u'Life', u'http://rss.canada.com/get/?F6934'), (u'Business', u'http://feeds.canada.com/canwest/F6939'), (u'Travel', u'http://rss.canada.com/get/?F6938'), (u'Health', u'http://feeds.canada.com/canwest/F7397'), (u'Technology', u'http://feeds.canada.com/canwest/F7411')]
--- a/resources/recipes/the_week_magazine_free.recipe
+++ b/resources/recipes/the_week_magazine_free.recipe
@ -0,0 +1,49 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.theweek.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TheWeekFree(BasicNewsRecipe):
+    title                 = 'The Week Magazine - Free content'
+    __author__            = 'Darko Miletic'
+    description           = "The best of the US and international media.  Daily coverage of commentary and analysis of the day's events, as well as arts, entertainment, people and gossip, and political cartoons."
+    publisher             = 'The Week Publications, Inc.'
+    category              = 'news, politics, USA'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    language              = 'en'
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [
+                        dict(name=['h1','h2'])
+                      , dict(name='div', attrs={'class':'basefont'})
+                      , dict(name='div', attrs={'id':'slideshowLoader'})
+                     ]
+
+    remove_tags = [
+                     dict(name='div', attrs={'id':['digg_dugg','articleRight','dateHeader']})
+                    ,dict(name=['object','embed','iframe'])
+                  ]
+
+
+    feeds = [
+              (u'News & Opinions'       , u'http://www.theweek.com/section/index/news_opinion.rss')
+             ,(u'Arts & Leisure'        , u'http://www.theweek.com/section/index/arts_leisure.rss')
+             ,(u'Business'              , u'http://www.theweek.com/section/index/business.rss'    )
+             ,(u'Cartoon & Short takes' , u'http://www.theweek.com/section/index/cartoons_wit.rss')
+            ]
+
+
--- a/resources/recipes/theluminouslandscape.recipe
+++ b/resources/recipes/theluminouslandscape.recipe
@ -0,0 +1,37 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+luminous-landscape.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class theluminouslandscape(BasicNewsRecipe):
+    title                 = 'The Luminous Landscape'
+    __author__            = 'Darko Miletic'
+    description           = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.'
+    publisher             = 'The Luminous Landscape '
+    category              = 'news, blog, photograph, international'
+    oldest_article        = 15
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    remove_empty_feeds    = True
+    use_embedded_content  = True
+    encoding              = 'cp1252'
+    language              = 'en'
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    feeds              = [(u"What's new", u'http://www.luminous-landscape.com/whatsnew/rssfeed.php')]
+    remove_tags        = [dict(name=['object','link','iframe'])]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+
--- a/resources/recipes/theonlinephotographer.recipe
+++ b/resources/recipes/theonlinephotographer.recipe
@ -0,0 +1,41 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+theonlinephotographer.typepad.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class theonlinephotographer(BasicNewsRecipe):
+    title                 = 'The Online Photographer'
+    __author__            = 'Darko Miletic'
+    description           = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.'
+    publisher             = 'The Online Photographer'
+    category              = 'news, blog, photograph, international'
+    oldest_article        = 15
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    remove_empty_feeds    = True
+    use_embedded_content  = False
+    encoding              = 'utf8'
+    language              = 'en'
+
+    extra_css = ' body{font-family: Georgia,"Times New Roman",serif } '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    feeds              = [(u'Articles', u'http://feeds.feedburner.com/typepad/ZSjz')]
+    remove_tags_before = dict(name='h3',attrs={'class':'entry-header'})
+    remove_tags_after  = dict(name='div',attrs={'class':'entry-footer'})
+    remove_tags        = [dict(name=['object','link','iframe'])]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+
--- a/resources/recipes/tuttosport.recipe
+++ b/resources/recipes/tuttosport.recipe
@ -0,0 +1,66 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '30, January 2010'
+__description__ = 'Sport daily news from Italy'
+
+'''www.tuttosport.com'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class tuttosport(BasicNewsRecipe):
+    author        = 'Lorenzo Vigentini'
+    description   = 'Sport daily news from Italy'
+
+    cover_url      = 'http://www.tuttosport.com/res/imgs/logo_TuttoSport.png'
+    title          = 'Tuttosport'
+    publisher      = 'Nuova Editoriale Sportiva S.r.l'
+    category       = 'Sport News'
+
+    language       = 'it'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 2
+    max_articles_per_feed = 20
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript = True
+    no_stylesheets = True
+
+    def print_version(self,url):
+        segments = url.split('/')
+        printURL = '/'.join(segments[0:10]) + '?print'
+        return printURL
+
+    keep_only_tags = [
+                    dict(name='h2', attrs={'class':'tit_Article'}),
+                    dict(name='div', attrs={'class':['box_Img img_L ','txt_ArticleAbstract','txt_Article txtBox_cms']})
+                      ]
+
+    feeds       = [
+                   (u'Primo piano',u'http://www.tuttosport.com/rss/primo_piano.xml'),
+                   (u'Cronanca',u'http://www.tuttosport.com/rss/Cronaca-205.xml'),
+                   (u'Lettere al direttore',u'http://blog.tuttosport.com/direttore/feed'),
+                   (u'Calcio',u'http://www.tuttosport.com/rss/Calcio-3.xml'),
+                   (u'Speciale Derby',u'http://www.tuttosport.com/rss/Speciale-derby-310.xml'),
+                   (u'Formula 1',u'hhttp://www.tuttosport.com/rss/Formula-1-7.xml'),
+                   (u'Moto',u'hhttp://www.tuttosport.com/rss/Moto-8.xml'),
+                   (u'Basket',u'http://www.tuttosport.com/rss/Basket-9.xml'),
+                   (u'Altri Sport',u'http://www.tuttosport.com/rss/Altri-Sport-2.xml'),
+                   (u'Tuttosport League',u'http://www.tuttosport.com/rss/Tuttosport-League-245.xml'),
+                   (u'Scommesse',u'http://www.tuttosport.com/rss/Scommesse-286.xml')
+                  ]
+
+    extra_css = '''
+                body {font-family: Arial, Verdana, sans-serif; margin-bottom: 3em;}
+                h1 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
+                h3 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;}
+                h2.tit_Article {color:#9C3A0B;margin: 15px 8px 0; margin-bottom: 1px; border-bottom: 3px solid;}
+                .txt_ArticleAbstract {color:#4080AE;clear: both; margin: 3px 8px;}
+                .txt_Article {clear: both; margin: 8px 8px 12px;}
+                .txt_Author {float: right;}
+                .txt_ArticleAuthor {clear: both; margin: 8px;}
+                '''
--- a/resources/recipes/vancouver_provice.recipe
+++ b/resources/recipes/vancouver_provice.recipe
@ -0,0 +1,136 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Vancouver Province
+    title = u'Vancouver Province'
+    url_prefix = 'http://www.theprovince.com'
+    description = u'News from Vancouver, BC'
+
+    # un-comment the following three lines for the Vancouver Sun
+    #title = u'Vancouver Sun'
+    #url_prefix = 'http://www.vancouversun.com'
+    #description = u'News from Vancouver, BC'
+
+    # un-comment the following three lines for the Edmonton Journal
+    #title = u'Edmonton Journal'
+    #url_prefix = 'http://www.edmontonjournal.com'
+    #description = u'News from Edmonton, AB'
+
+    # un-comment the following three lines for the Calgary Herald
+    #title = u'Calgary Herald'
+    #url_prefix = 'http://www.calgaryherald.com'
+    #description = u'News from Calgary, AB'
+
+    # un-comment the following three lines for the Regina Leader-Post
+    #title = u'Regina Leader-Post'
+    #url_prefix = 'http://www.leaderpost.com'
+    #description = u'News from Regina, SK'
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    #title = u'Saskatoon Star-Phoenix'
+    #url_prefix = 'http://www.thestarphoenix.com'
+    #description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
--- a/resources/recipes/vancouver_sun.recipe
+++ b/resources/recipes/vancouver_sun.recipe
@ -0,0 +1,131 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Vancouver Sun
+    title = u'Vancouver Sun'
+    url_prefix = 'http://www.vancouversun.com'
+    description = u'News from Vancouver, BC'
+
+    # un-comment the following three lines for the Edmonton Journal
+    #title = u'Edmonton Journal'
+    #url_prefix = 'http://www.edmontonjournal.com'
+    #description = u'News from Edmonton, AB'
+
+    # un-comment the following three lines for the Calgary Herald
+    #title = u'Calgary Herald'
+    #url_prefix = 'http://www.calgaryherald.com'
+    #description = u'News from Calgary, AB'
+
+    # un-comment the following three lines for the Regina Leader-Post
+    #title = u'Regina Leader-Post'
+    #url_prefix = 'http://www.leaderpost.com'
+    #description = u'News from Regina, SK'
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    #title = u'Saskatoon Star-Phoenix'
+    #url_prefix = 'http://www.thestarphoenix.com'
+    #description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
--- a/resources/recipes/vic_times.recipe
+++ b/resources/recipes/vic_times.recipe
@ -0,0 +1,141 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Victoria Times Colonist
+    title = u'Victoria Times Colonist'
+    url_prefix = 'http://www.timescolonist.com'
+    description = u'News from Victoria, BC'
+
+    # un-comment the following three lines for the Vancouver Province
+    #title = u'Vancouver Province'
+    #url_prefix = 'http://www.theprovince.com'
+    #description = u'News from Vancouver, BC'
+
+    # un-comment the following three lines for the Vancouver Sun
+    #title = u'Vancouver Sun'
+    #url_prefix = 'http://www.vancouversun.com'
+    #description = u'News from Vancouver, BC'
+
+    # un-comment the following three lines for the Edmonton Journal
+    #title = u'Edmonton Journal'
+    #url_prefix = 'http://www.edmontonjournal.com'
+    #description = u'News from Edmonton, AB'
+
+    # un-comment the following three lines for the Calgary Herald
+    #title = u'Calgary Herald'
+    #url_prefix = 'http://www.calgaryherald.com'
+    #description = u'News from Calgary, AB'
+
+    # un-comment the following three lines for the Regina Leader-Post
+    #title = u'Regina Leader-Post'
+    #url_prefix = 'http://www.leaderpost.com'
+    #description = u'News from Regina, SK'
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    #title = u'Saskatoon Star-Phoenix'
+    #url_prefix = 'http://www.thestarphoenix.com'
+    #description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
--- a/resources/recipes/wash_post.recipe
+++ b/resources/recipes/wash_post.recipe
@ -46,3 +46,10 @@ class WashingtonPost(BasicNewsRecipe):
            div['style'] = ''
        return soup

+    def preprocess_html(self, soup):
+        for tag in soup.findAll('font'):
+            if tag.has_key('size'):
+                if tag['size'] == '+2':
+                    if tag.b:
+                        return soup
+        return None
--- a/resources/recipes/windows_star.recipe
+++ b/resources/recipes/windows_star.recipe
@ -0,0 +1,106 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Windsor Star
+    title = u'Windsor Star'
+    url_prefix = 'http://www.windsorstar.com'
+    description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
--- a/resources/recipes/wired.recipe
+++ b/resources/recipes/wired.recipe
@ -1,44 +1,105 @@
-#!/usr/bin/env  python
+
 __license__   = 'GPL v3'
-__docformat__ = 'restructuredtext en'
-
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.wired.com
+'''

+import re
+from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe

 class Wired(BasicNewsRecipe):
+    title                 = 'Wired Magazine'
+    __author__            = 'Darko Miletic'
+    description           = 'Gaming news'
+    publisher             = 'Conde Nast Digital'
+    category              = 'news, games, IT, gadgets'
+    oldest_article        = 32
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    language              = 'en'
+    extra_css             = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} '
+    index                 = 'http://www.wired.com/magazine/'

-    title = 'Wired.com'
-    __author__ = 'Kovid Goyal'
-    description = 'Technology news'
-    timefmt  = ' [%Y%b%d  %H%M]'
-    language = 'en'
+    preprocess_regexps = [(re.compile(r'<meta name="Title".*<title>', re.DOTALL|re.IGNORECASE),lambda match: '<title>')]
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }

-    no_stylesheets = True
+    keep_only_tags = [dict(name='div', attrs={'class':'post'})]
+    remove_tags_after = dict(name='div', attrs={'class':'tweetmeme_button'})
+    remove_tags = [
+                     dict(name=['object','embed','iframe','link'])
+                    ,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
+                  ]

-    remove_tags_before = dict(name='div', id='content')
-    remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
-        'footer', 'advertisement', 'blog_subscription_unit',
-        'brightcove_component']),
-        {'class':'entryActions'},
-        dict(name=['noscript', 'script'])]

-    feeds = [
-        ('Top News', 'http://feeds.wired.com/wired/index'),
-        ('Culture', 'http://feeds.wired.com/wired/culture'),
-        ('Software', 'http://feeds.wired.com/wired/software'),
-        ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
-        ('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
-        ('Cars', 'http://feeds.wired.com/wired/cars'),
-        ('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
-        ('Gaming', 'http://feeds.wired.com/wired/gaming'),
-        ('Science', 'http://feeds.wired.com/wired/science'),
-        ('Med Tech', 'http://feeds.wired.com/wired/medtech'),
-        ('Politics', 'http://feeds.wired.com/wired/politics'),
-        ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
-        ('Commentary', 'http://feeds.wired.com/wired/commentary'),
-        ]
+    #feeds = [(u'Articles' , u'http://www.wired.com/magazine/feed/' )]
+
+    def parse_index(self):
+        totalfeeds = []
+
+        soup = self.index_to_soup(self.index)
+        features = soup.find('div',attrs={'id':'my-glider'})
+        if features:
+           farticles = []
+           for item in features.findAll('div',attrs={'class':'section'}):
+               divurl = item.find('div',attrs={'class':'feature-header'})
+               divdesc = item.find('div',attrs={'class':'feature-text'})
+               url   = 'http://www.wired.com' + divurl.a['href']
+               title = self.tag_to_string(divurl.a)
+               description = self.tag_to_string(divdesc)
+               date  = strftime(self.timefmt)
+               farticles.append({
+                                  'title'      :title
+                                 ,'date'       :date
+                                 ,'url'        :url
+                                 ,'description':description
+                                })
+           totalfeeds.append(('Featured Articles', farticles))
+        #department feeds
+        departments = ['rants','start','test','play','found']
+        dept = soup.find('div',attrs={'id':'magazine-departments'})
+        if dept:
+            for ditem in departments:
+                darticles = []
+                department = dept.find('div',attrs={'id':'department-'+ditem})
+                if department:
+                    for item in department.findAll('div'):
+                        description = ''
+                        feed_link = item.find('a')
+                        if feed_link and feed_link.has_key('href'):
+                            url   = feed_link['href']
+                            title = self.tag_to_string(feed_link)
+                            date  = strftime(self.timefmt)
+                            darticles.append({
+                                              'title'      :title
+                                             ,'date'       :date
+                                             ,'url'        :url
+                                             ,'description':description
+                                            })
+                    totalfeeds.append((ditem.capitalize(), darticles))
+        return totalfeeds
+
+    def get_cover_url(self):
+        cover_url = None
+        soup = self.index_to_soup(self.index)
+        cover_item = soup.find('div',attrs={'class':'spread-image'})
+        if cover_item:
+           cover_url = 'http://www.wired.com' + cover_item.a.img['src']
+        return cover_url

    def print_version(self, url):
-        return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
+        return url.rstrip('/') + '/all/1'

+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup

--- a/resources/recipes/wsj.recipe
+++ b/resources/recipes/wsj.recipe
@ -5,6 +5,7 @@ __docformat__ = 'restructuredtext en'


 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre import strftime

 # http://online.wsj.com/page/us_in_todays_paper.html

@ -67,6 +68,13 @@ class WallStreetJournal(BasicNewsRecipe):
        def parse_index(self):
            soup = self.wsj_get_index()

+            year = strftime('%Y')
+            for x in soup.findAll('td', attrs={'class':'b14'}):
+                txt = self.tag_to_string(x).strip()
+                if year in txt:
+                    self.timefmt = ' [%s]'%txt
+                    break
+
            left_column = soup.find(
                    text=lambda t: 'begin ITP Left Column' in str(t))

@ -91,7 +99,7 @@ class WallStreetJournal(BasicNewsRecipe):
                    url = url.partition('#')[0]
                    desc = ''
                    d = x.findNextSibling(True)
-                    if d.get('class', None) == 'arialResize':
+                    if d is not None and d.get('class', None) == 'arialResize':
                        desc = self.tag_to_string(d)
                        desc = desc.partition(u'\u2022')[0]
                    self.log('\t\tFound article:', title)
--- a/resources/recipes/wsj_free.recipe
+++ b/resources/recipes/wsj_free.recipe
@ -3,47 +3,139 @@
 __license__   = 'GPL v3'

 '''
-online.wsj.com.com
+online.wsj.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag, NavigableString
+from datetime import timedelta, date

 class WSJ(BasicNewsRecipe):
    # formatting adapted from original recipe by Kovid Goyal and Sujata Raman
    title          = u'Wall Street Journal (free)'
    __author__     = 'Nick Redding'
    language = 'en'
-    description = ('All the free content from the Wall Street Journal (business'
-            ', financial and political news)')
+    description = ('All the free content from the Wall Street Journal (business, financial and political news)')
+
    no_stylesheets = True
    timefmt = ' [%b %d]'
-    extra_css   = '''h1{font-size:large; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif;}
-                    h2{font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
-                    .subhead{font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
-                    .insettipUnit {font-family:Arial,Sans-serif;font-size:xx-small;}
-                    .targetCaption{font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
-                    .article{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
-                    .tagline { ont-size:xx-small;}
-                    .dateStamp {font-family:Arial,Helvetica,sans-serif;}
-                    h3{font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
-                    .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small; list-style-type: none;}
+
+    # customization notes: delete sections you are not interested in
+    # set omit_paid_content to False if you want the paid content article snippets
+    # set oldest_article to the maximum number of days back from today to include articles
+    sectionlist = [
+                        ['/home-page','Front Page'],
+                        ['/public/page/news-opinion-commentary.html','Commentary'],
+                        ['/public/page/news-global-world.html','World News'],
+                        ['/public/page/news-world-business.html','US News'],
+                        ['/public/page/news-business-us.html','Business'],
+                        ['/public/page/news-financial-markets-stock.html','Markets'],
+                        ['/public/page/news-tech-technology.html','Technology'],
+                        ['/public/page/news-personal-finance.html','Personal Finnce'],
+                        ['/public/page/news-lifestyle-arts-entertainment.html','Life & Style'],
+                        ['/public/page/news-real-estate-homes.html','Real Estate'],
+                        ['/public/page/news-career-jobs.html','Careers'],
+                        ['/public/page/news-small-business-marketing.html','Small Business']
+                    ]
+    oldest_article = 2
+    omit_paid_content = True
+
+    extra_css   = '''h1{font-size:large; font-family:Times,serif;}
+                    h2{font-family:Times,serif; font-size:small; font-style:italic;}
+                    .subhead{font-family:Times,serif; font-size:small; font-style:italic;}
+                    .insettipUnit {font-family:Times,serif;font-size:xx-small;}
+                    .targetCaption{font-size:x-small; font-family:Times,serif; font-style:italic; margin-top: 0.25em;}
+                    .article{font-family:Times,serif; font-size:x-small;}
+                    .tagline { font-size:xx-small;}
+                    .dateStamp {font-family:Times,serif;}
+                    h3{font-family:Times,serif; font-size:xx-small;}
+                    .byline {font-family:Times,serif; font-size:xx-small; list-style-type: none;}
                    .metadataType-articleCredits {list-style-type: none;}
-                    h6{ font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small;font-style:italic;}
+                    h6{font-family:Times,serif; font-size:small; font-style:italic;}
                    .paperLocation{font-size:xx-small;}'''

-    remove_tags_before = dict(name='h1')
-    remove_tags =   [   dict(id=["articleTabs_tab_article", "articleTabs_tab_comments",
-                                 "articleTabs_tab_interactive","articleTabs_tab_video",
-                                 "articleTabs_tab_map","articleTabs_tab_slideshow"]),
-			{'class':['footer_columns','network','insetCol3wide','interactive','video','slideshow','map',
-			'insettip','insetClose','more_in', "insetContent", 'articleTools_bottom', 'aTools', 'tooltip',
-			'adSummary', 'nav-inline','insetFullBracket']},
-                        dict(rel='shortcut icon'),
+
+    remove_tags_before = dict({'class':re.compile('^articleHeadlineBox')})
+    remove_tags =   [   dict({'id':re.compile('^articleTabs_tab_')}),
+                        #dict(id=["articleTabs_tab_article", "articleTabs_tab_comments",
+                        #         "articleTabs_tab_interactive","articleTabs_tab_video",
+                        #         "articleTabs_tab_map","articleTabs_tab_slideshow"]),
+			{'class':  ['footer_columns','network','insetCol3wide','interactive','video','slideshow','map',
+                                    'insettip','insetClose','more_in', "insetContent",
+                        #            'articleTools_bottom','articleTools_bottom mjArticleTools',
+                                    'aTools', 'tooltip',
+                                    'adSummary', 'nav-inline','insetFullBracket']},
+                        dict({'class':re.compile('^articleTools_bottom')}),
+                        dict(rel='shortcut icon')
                    ]
    remove_tags_after = [dict(id="article_story_body"), {'class':"article story"}]

+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        return br
+

    def preprocess_html(self,soup):
+
+        def decode_us_date(datestr):
+            udate = datestr.strip().lower().split()
+            m = ['january','february','march','april','may','june','july','august','september','october','november','december'].index(udate[0])+1
+            d = int(udate[1])
+            y = int(udate[2])
+            return date(y,m,d)
+
+        # check if article is paid content
+        if self.omit_paid_content:
+            divtags = soup.findAll('div','tooltip')
+            if divtags:
+                for divtag in divtags:
+                    if divtag.find(text="Subscriber Content"):
+                        return None
+
+        # check if article is too old
+        datetag = soup.find('li',attrs={'class' : re.compile("^dateStamp")})
+        if datetag:
+            dateline_string = self.tag_to_string(datetag,False)
+            date_items = dateline_string.split(',')
+            datestring = date_items[0]+date_items[1]
+            article_date = decode_us_date(datestring)
+            earliest_date = date.today() - timedelta(days=self.oldest_article)
+            if article_date < earliest_date:
+                self.log("Skipping article dated %s" % datestring)
+                return None
+            datetag.parent.extract()
+
+            # place dateline in article heading
+
+            bylinetag = soup.find('h3','byline')
+            if bylinetag:
+                h3bylinetag = bylinetag
+            else:
+                bylinetag = soup.find('li','byline')
+                if bylinetag:
+                    h3bylinetag = bylinetag.h3
+                    if not h3bylinetag:
+                        h3bylinetag = bylinetag
+                    bylinetag = bylinetag.parent
+            if bylinetag:
+                if h3bylinetag.a:
+                    bylinetext = 'By '+self.tag_to_string(h3bylinetag.a,False)
+                else:
+                    bylinetext = self.tag_to_string(h3bylinetag,False)
+                h3byline = Tag(soup,'h3',[('class','byline')])
+                if bylinetext.isspace() or (bylinetext == ''):
+                    h3byline.insert(0,NavigableString(date_items[0]+','+date_items[1]))
+                else:
+                    h3byline.insert(0,NavigableString(bylinetext+u'\u2014'+date_items[0]+','+date_items[1]))
+                bylinetag.replaceWith(h3byline)
+            else:
+                headlinetag = soup.find('div',attrs={'class' : re.compile("^articleHeadlineBox")})
+                if headlinetag:
+                    dateline = Tag(soup,'h3', [('class','byline')])
+                    dateline.insert(0,NavigableString(date_items[0]+','+date_items[1]))
+                    headlinetag.insert(len(headlinetag),dateline)
+        else: # if no date tag, don't process this page--it's not a news item
+            return None
        # This gets rid of the annoying superfluous bullet symbol preceding columnist bylines
        ultag = soup.find('ul',attrs={'class' : 'cMetadata metadataType-articleCredits'})
        if ultag:
@ -58,7 +150,7 @@ class WSJ(BasicNewsRecipe):
        key = None
        ans = []

-        def parse_index_page(page_name,page_title,omit_paid_content):
+        def parse_index_page(page_name,page_title):

            def article_title(tag):
                atag = tag.find('h2') # title is usually in an h2 tag
@ -119,7 +211,6 @@ class WSJ(BasicNewsRecipe):
            soup = self.index_to_soup(pageurl)
            # Find each instance of div with class including "headlineSummary"
            for divtag in soup.findAll('div',attrs={'class' : re.compile("^headlineSummary")}):
-
                # divtag contains all article data as ul's and li's
                # first, check if there is an h3 tag which provides a section name
                stag = divtag.find('h3')
@ -162,7 +253,7 @@ class WSJ(BasicNewsRecipe):
                        # now skip paid subscriber articles if desired
                        subscriber_tag = litag.find(text="Subscriber Content")
                        if subscriber_tag:
-                                if omit_paid_content:
+                                if self.omit_paid_content:
                                    continue
                                # delete the tip div so it doesn't get in the way
                                tiptag = litag.find("div", { "class" : "tipTargetBox" })
@ -185,7 +276,7 @@ class WSJ(BasicNewsRecipe):
                            continue
                        if url.startswith("/article"):
                            url = mainurl+url
-                        if not url.startswith("http"):
+                        if not url.startswith("http://online.wsj.com"):
                            continue
                        if not url.endswith(".html"):
                            continue
@ -214,48 +305,10 @@ class WSJ(BasicNewsRecipe):
                            articles[page_title] = []
                        articles[page_title].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))

-        # customization notes: delete sections you are not interested in
-        # set omit_paid_content to False if you want the paid content article previews
-        sectionlist = ['Front Page','Commentary','World News','US News','Business','Markets',
-                       'Technology','Personal Finance','Life & Style','Real Estate','Careers','Small Business']
-        omit_paid_content = True

-        if 'Front Page' in sectionlist:
-            parse_index_page('/home-page','Front Page',omit_paid_content)
-            ans.append('Front Page')
-        if 'Commentary' in sectionlist:
-            parse_index_page('/public/page/news-opinion-commentary.html','Commentary',omit_paid_content)
-            ans.append('Commentary')
-        if 'World News' in sectionlist:
-            parse_index_page('/public/page/news-global-world.html','World News',omit_paid_content)
-            ans.append('World News')
-        if 'US News' in sectionlist:
-            parse_index_page('/public/page/news-world-business.html','US News',omit_paid_content)
-            ans.append('US News')
-        if 'Business' in sectionlist:
-            parse_index_page('/public/page/news-business-us.html','Business',omit_paid_content)
-            ans.append('Business')
-        if 'Markets' in sectionlist:
-            parse_index_page('/public/page/news-financial-markets-stock.html','Markets',omit_paid_content)
-            ans.append('Markets')
-        if 'Technology' in sectionlist:
-            parse_index_page('/public/page/news-tech-technology.html','Technology',omit_paid_content)
-            ans.append('Technology')
-        if 'Personal Finance' in sectionlist:
-            parse_index_page('/public/page/news-personal-finance.html','Personal Finance',omit_paid_content)
-            ans.append('Personal Finance')
-        if 'Life & Style' in sectionlist:
-            parse_index_page('/public/page/news-lifestyle-arts-entertainment.html','Life & Style',omit_paid_content)
-            ans.append('Life & Style')
-        if 'Real Estate' in sectionlist:
-            parse_index_page('/public/page/news-real-estate-homes.html','Real Estate',omit_paid_content)
-            ans.append('Real Estate')
-        if 'Careers' in sectionlist:
-            parse_index_page('/public/page/news-career-jobs.html','Careers',omit_paid_content)
-            ans.append('Careers')
-        if 'Small Business' in sectionlist:
-            parse_index_page('/public/page/news-small-business-marketing.html','Small Business',omit_paid_content)
-            ans.append('Small Business')
+        for page_name,page_title in self.sectionlist:
+            parse_index_page(page_name,page_title)
+            ans.append(page_title)

        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
--- a/resources/viewer/images.js
+++ b/resources/viewer/images.js
@ -0,0 +1,39 @@
+/*
+ * images management
+ * Copyright 2008 Kovid Goyal
+ * License: GNU GPL v3
+ */
+
+function scale_images() {
+    $("img:visible").each(function() {
+        var offset = $(this).offset();
+        //window.py_bridge.debug(window.getComputedStyle(this, '').getPropertyValue('max-width'));
+        $(this).css("max-width", (window.innerWidth-offset.left-5)+"px");
+        $(this).css("max-height", (window.innerHeight-5)+"px");
+    });
+}
+
+function setup_image_scaling_handlers() {
+   scale_images();
+   $(window).resize(function(){
+        scale_images();
+   });
+}
+
+function extract_svged_images() {
+    $("svg").each(function() {
+        var children = $(this).children("img");
+        if (children.length == 1) {
+            var img = $(children[0]);
+            var href = img.attr('xlink:href');
+            if (href != undefined) {
+                $(this).replaceWith('<div style="text-align:center; margin: 0; padding: 0"><img style="height: 98%" alt="SVG Image" src="' + href +'"></img></div>');
+            }
+        }
+    });
+}
+
+$(document).ready(function() {
+   //extract_svged_images();
+});
+
--- a/setup/installer/linux/freeze.py
+++ b/setup/installer/linux/freeze.py
@ -112,6 +112,9 @@ class LinuxFreeze(Command):

        includes += ['calibre.gui2.convert.'+x.split('/')[-1].rpartition('.')[0] for x in \
                glob.glob('src/calibre/gui2/convert/*.py')]
+        includes += ['calibre.gui2.catalog.'+x.split('/')[-1].rpartition('.')[0] for x in \
+                glob.glob('src/calibre/gui2/catalog/*.py')]
+

        LOADER = '/tmp/loader.py'
        open(LOADER, 'wb').write('# This script is never actually used.\nimport sys')
--- a/setup/installer/osx/app/main.py
+++ b/setup/installer/osx/app/main.py
@ -266,6 +266,7 @@ class Py2App(object):
    def get_local_dependencies(self, path_to_lib):
        for x in self.get_dependencies(path_to_lib):
            for y in (SW+'/lib/', '/usr/local/lib/', SW+'/qt/lib/',
+                    '/opt/local/lib/',
                    '/Library/Frameworks/Python.framework/', SW+'/freetype/lib/'):
                if x.startswith(y):
                    if y == '/Library/Frameworks/Python.framework/':
@ -338,8 +339,8 @@ class Py2App(object):
        c = join(self.build_dir, 'Contents')
        for x in ('Frameworks', 'MacOS', 'Resources'):
            os.makedirs(join(c, x))
-        x = 'library.icns'
-        shutil.copyfile(join('icons', x), join(self.resources_dir, x))
+        for x in ('library.icns', 'book.icns'):
+            shutil.copyfile(join('icons', x), join(self.resources_dir, x))

    @flush
    def add_calibre_plugins(self):
@ -355,8 +356,13 @@ class Py2App(object):

    @flush
    def create_plist(self):
+        from calibre.ebooks import BOOK_EXTENSIONS
        env = dict(**ENV)
        env['CALIBRE_LAUNCHED_FROM_BUNDLE']='1';
+        docs = [{'CFBundleTypeName':'E-book',
+            'CFBundleTypeExtensions':list(BOOK_EXTENSIONS),
+            'CFBundleTypeRole':'Viewer',
+            }]

        pl = dict(
                CFBundleDevelopmentRegion='English',
@ -367,10 +373,11 @@ class Py2App(object):
                CFBundlePackageType='APPL',
                CFBundleSignature='????',
                CFBundleExecutable='calibre',
+                CFBundleDocumentTypes=docs,
                LSMinimumSystemVersion='10.4.2',
                LSRequiresNativeExecution=True,
                NSAppleScriptEnabled=False,
-                NSHumanReadableCopyright='Copyright 2008, Kovid Goyal',
+                NSHumanReadableCopyright='Copyright 2010, Kovid Goyal',
                CFBundleGetInfoString=('calibre, an E-book management '
                'application. Visit http://calibre-ebook.com for details.'),
                CFBundleIconFile='library.icns',
@ -594,6 +601,7 @@ class Py2App(object):
            if x == 'Info.plist':
                plist = plistlib.readPlist(join(self.contents_dir, x))
                plist['LSUIElement'] = '1'
+                plist.pop('CFBundleDocumentTypes')
                plistlib.writePlist(plist, join(cc_dir, x))
            else:
                os.symlink(join('../..', x),
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -117,9 +117,12 @@ def prints(*args, **kwargs):
            try:
                arg = arg.encode(enc)
            except UnicodeEncodeError:
-                if not safe_encode:
-                    raise
-                arg = repr(arg)
+                try:
+                    arg = arg.encode('utf-8')
+                except:
+                    if not safe_encode:
+                        raise
+                    arg = repr(arg)
        if not isinstance(arg, str):
            try:
                arg = str(arg)
@ -129,9 +132,12 @@ def prints(*args, **kwargs):
                try:
                    arg = arg.encode(enc)
                except UnicodeEncodeError:
-                    if not safe_encode:
-                        raise
-                    arg = repr(arg)
+                    try:
+                        arg = arg.encode('utf-8')
+                    except:
+                        if not safe_encode:
+                            raise
+                        arg = repr(arg)

        file.write(arg)
        if i != len(args)-1:
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.6.34'
+__version__   = '0.6.37'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -2,10 +2,11 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

-import sys
+import os, sys, zipfile

-from calibre.ptempfile import PersistentTemporaryFile
 from calibre.constants import numeric_version
+from calibre.ptempfile import PersistentTemporaryFile
+

 class Plugin(object):
    '''
@ -231,6 +232,8 @@ class CatalogPlugin(Plugin):
    A plugin that implements a catalog generator.
    '''

+    resources_path = None
+
    #: Output file type for which this plugin should be run
    #: For example: 'epub' or 'xml'
    file_types = set([])
@ -249,14 +252,25 @@ class CatalogPlugin(Plugin):

    cli_options = []

+
    def search_sort_db(self, db, opts):
+
+        '''
+        # Don't add Catalogs to the generated Catalogs
+        cat = _('Catalog')
        if opts.search_text:
-            db.search(opts.search_text)
+            opts.search_text += ' not tag:'+cat
+        else:
+            opts.search_text = 'not tag:'+cat
+        '''
+
+        db.search(opts.search_text)
+
        if opts.sort_by:
            # 2nd arg = ascending
            db.sort(opts.sort_by, True)

-        return db.get_data_as_dict()
+        return db.get_data_as_dict(ids=opts.ids)

    def get_output_fields(self, opts):
        # Return a list of requested fields, with opts.sort_by first
@ -272,11 +286,40 @@ class CatalogPlugin(Plugin):
            fields = list(all_fields & requested_fields)
        else:
            fields = list(all_fields)
+
        fields.sort()
-        fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
+        if opts.sort_by and opts.sort_by in fields:
+            fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
        return fields

-    def run(self, path_to_output, opts, db):
+    def initialize(self):
+        '''
+        If plugin is not a built-in, copy the plugin's .ui and .py files from
+        the zip file to $TMPDIR.
+        Tab will be dynamically generated and added to the Catalog Options dialog in
+        calibre.gui2.dialogs.catalog.py:Catalog
+        '''
+        from calibre.customize.builtins import plugins as builtin_plugins
+        from calibre.customize.ui import config
+        from calibre.ptempfile import PersistentTemporaryDirectory
+
+        if not type(self) in builtin_plugins and \
+           not self.name in config['disabled_plugins']:
+            files_to_copy = ["%s.%s" % (self.name.lower(),ext) for ext in ["ui","py"]]
+            resources = zipfile.ZipFile(self.plugin_path,'r')
+
+            if self.resources_path is None:
+                self.resources_path = PersistentTemporaryDirectory('_plugin_resources', prefix='')
+
+            for file in files_to_copy:
+                try:
+                    resources.extract(file, self.resources_path)
+                except:
+                    print " customize:__init__.initialize(): %s not found in %s" % (file, os.path.basename(self.plugin_path))
+                    continue
+            resources.close()
+
+    def run(self, path_to_output, opts, db, ids, notification=None):
        '''
        Run the plugin. Must be implemented in subclasses.
        It should generate the catalog in the format specified
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -404,7 +404,7 @@ from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX
 from calibre.devices.blackberry.driver import BLACKBERRY
 from calibre.devices.cybook.driver import CYBOOK
 from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
-                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK
+                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK
 from calibre.devices.iliad.driver import ILIAD
 from calibre.devices.irexdr.driver import IREXDR1000
 from calibre.devices.jetbook.driver import JETBOOK
@ -421,8 +421,8 @@ from calibre.devices.binatone.driver import README
 from calibre.devices.hanvon.driver import N516

 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
-from calibre.library.catalog import CSV_XML
-plugins = [HTML2ZIP, PML2PMLZ, GoogleBooks, ISBNDB, Amazon, CSV_XML]
+from calibre.library.catalog import CSV_XML, EPUB_MOBI
+plugins = [HTML2ZIP, PML2PMLZ, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
 plugins += [
    ComicInput,
    EPUBInput,
@ -485,6 +485,7 @@ plugins += [
    ITALICA,
    ECLICTO,
    DBOOK,
+    INVESBOOK,
    BOOX,
    EB600,
    README,
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -85,6 +85,9 @@ class OptionRecommendation(object):

 class DummyReporter(object):

+    def __init__(self):
+        self.cancel_requested = False
+
    def __call__(self, percent, msg=''):
        pass

--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@ -9,23 +9,22 @@ from calibre.devices.usbms.driver import USBMS
 class BLACKBERRY(USBMS):

    name           = 'Blackberry Device Interface'
+    gui_name       = 'Blackberry'
    description    = _('Communicate with the Blackberry smart phone.')
    author         = _('Kovid Goyal')
-    supported_platforms = ['windows', 'linux']
+    supported_platforms = ['windows', 'linux', 'osx']

    # Ordered list of supported formats
    FORMATS     = ['mobi', 'prc']

    VENDOR_ID   = [0x0fca]
    PRODUCT_ID  = [0x8004, 0x0004]
-    BCD         = [0x0200, 0x0107]
+    BCD         = [0x0200, 0x0107, 0x0210, 0x0201]

    VENDOR_NAME = 'RIM'
    WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'

-    #OSX_MAIN_MEM = 'Kindle Internal Storage Media'
-
    MAIN_MEMORY_VOLUME_LABEL  = 'Blackberry SD Card'

-    EBOOK_DIR_MAIN = 'ebooks'
+    EBOOK_DIR_MAIN = 'eBooks'
    SUPPORTS_SUB_DIRS = True
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -154,7 +154,7 @@ class ECLICTO(EB600):
    name = 'eClicto Device Interface'
    gui_name = 'eClicto'

-    FORMATS = ['epub', 'pdf', 'txt']
+    FORMATS = ['epub', 'pdf', 'htm', 'html', 'txt']

    VENDOR_NAME = 'ECLICTO'
    WINDOWS_MAIN_MEM = 'EBOOK'
@ -173,3 +173,14 @@ class DBOOK(EB600):
    VENDOR_NAME = 'INFINITY'
    WINDOWS_MAIN_MEM = 'AIRIS_DBOOK'
    WINDOWS_CARD_A_MEM = 'AIRIS_DBOOK'
+
+class INVESBOOK(EB600):
+
+    name = 'Inves Book Device Interface'
+    gui_name = 'Inves Book 600'
+
+    FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'html', 'pdf', 'rtf', 'txt']
+
+    VENDOR_NAME = 'INVES_E6'
+    WINDOWS_MAIN_MEM = '00INVES_E600'
+    WINDOWS_CARD_A_MEM = '00INVES_E600'
--- a/src/calibre/devices/hanvon/driver.py
+++ b/src/calibre/devices/hanvon/driver.py
@ -23,7 +23,7 @@ class N516(USBMS):

    VENDOR_ID   = [0x0525]
    PRODUCT_ID  = [0xa4a5]
-    BCD         = [0x323]
+    BCD         = [0x323, 0x326]

    VENDOR_NAME      = 'INGENIC'
    WINDOWS_MAIN_MEM = '_FILE-STOR_GADGE'
--- a/src/calibre/devices/linux_mount_helper.c
+++ b/src/calibre/devices/linux_mount_helper.c
@ -71,7 +71,7 @@ int do_mount(const char *dev, const char *mp) {
 #ifdef __NetBSD__
    execlp("mount_msdos", "mount_msdos", "-u", uids, "-g", gids, "-o", options, dev, mp, NULL);
 #else
-    execlp("mount", "mount", "-t", "vfat", "-o", options, dev, mp, NULL);
+    execlp("mount", "mount", "-t", "auto", "-o", options, dev, mp, NULL);
 #endif
    errsv = errno;
    fprintf(stderr, "Failed to mount with error: %s\n", strerror(errsv));
--- a/Show More
+++ b/Show More