diff --git a/Changelog.yaml b/Changelog.yaml
index e7071bfd07..1ec61fa0b5 100644
--- a/Changelog.yaml
+++ b/Changelog.yaml
@@ -4,6 +4,305 @@
# for important features/bug fixes.
# Also, each release can have new and improved recipes.
+- version: 0.6.37
+ date: 2010-02-01
+
+ new features:
+ - title: "E-book viewer: Add support for viewing SVG images"
+ type: major
+
+ - title: "Add category of Recently added books when generating catalog in e-book format"
+
+ - title: "OS X: Allow adding of books to calibre via drag and drop on the calibre dock icon"
+
+ - title: "Add support for masthead images when downloading news for the Kindle"
+
+ - title: "MOBI metadata: Allow setting of metadata in old PRC files without EXTH headers as well"
+
+ bug fixes:
+ - title: Changing the date in Dutch
+ tickets: [4732]
+
+ - title: "Fix regression that broke sending files to unupdated PRS 500s"
+
+ - title: "MOBI Input: Ignore width and height percentage measures for
tags."
+ tickets: [4726]
+
+ - title: "EPUB Output: Remove
tags that point to the internet for their images as this causes the ever delicate ADE to crash."
+ tickets: [4692]
+
+ - title: "Comic Input: Handle UTF-8 BOM when converting a cbc file"
+ tickets: [4683]
+
+ - title: "Allow rating to be cleared via the Bulk metadata edit dialog"
+ tickets: [4693]
+
+ - title: "Add workaround for broken linux systems with multiply encoded file names"
+ tickets: [4721]
+
+ - title: Fix bug preventing the the use of indices when setting save to disk templates
+ tickets: [4710]
+
+ - title: "Linux device mounting. Use filetype of auto to allow non vfat filesystems to be mounted"
+ tickets: [4707]
+
+ - title: "Catalog generation: Make sorting of numbers in title as text optional"
+
+ - title: "Fix error while sending book with non-ascii character in title/author to device on linux"
+ tickets: [4690]
+
+ - title: "Fix reset cover in edit meta information dialog does not actually remove cover"
+ tickets: [4731]
+
+ new recipes:
+ - title: Kamera Bild
+ author: Darko Miletic
+
+ - title: The Online Photographer
+ author: Darko Miletic
+
+ - title: The Luminous Landscape
+ author: Darko Miletic
+
+ - title: Slovo
+ author: Abelturd
+
+ - title: Various Danish newspapers
+ author: Darko Miletic
+
+ - title: Heraldo de Aragon
+ author: Lorenzo Vigentini
+
+ - title: Orange County Register
+ author: Lorenzi Vigentini
+
+ - title: Open Left
+ author: Xanthan Gum
+
+ - title: Michelle Malkin
+ author: Walt Anthony
+
+ - title: The Metro Montreal
+ author: Jerry Clapperton
+
+ - title: The Gazette
+ author: Jerry Clapperton
+
+ - title: Macleans Magazine
+ author: Nick Redding
+
+ - title: NY Time Sunday Book Review
+ author: Krittika Goyal
+
+ - title: Various Italian newspapers
+ author: Lorenzo Vigentini
+
+
+ improved recipes:
+ - The Irish Times
+ - Washington Post
+ - NIN
+ - The Discover Magazine
+ - Pagina 12
+
+- version: 0.6.36
+ date: 2010-01-25
+
+ new features:
+ - title: Catalog generation in MOBI format
+
+ - title: "Driver for Inves Book 600"
+
+ - title: "Show notifications on OS X even when systray icon is disabled. "
+
+ bug fixes:
+ - title: Fix memory leak in catalog generation
+
+ - title: Fix regression that broke PML output
+
+ - title: Fix bug in MOBI Input
+ tickets: [4643]
+
+ - title: "Replace commas with semi-colons in download tags"
+ tickets: [4650]
+
+ - title: Fix catalog output format dropdown empty in linux
+ tickets: [4656]
+
+ - title: "Fix display of non-English characters in OS X notifications"
+ tickets: [4654]
+
+ - title: Add .cbc to list of book formats
+ tickets: [4662]
+
+ - title: "Content server: Mobile page breaks if library contains empty books. Now fixed."
+
+ - title: "Support old 212 byte header PDB files"
+ tickets: [4646]
+
+ - title: "Fix regression that caused wrong error message to be displayed when device is out of space"
+
+
+ new recipes:
+ - title: Harvard Business Review Blogs
+ author: Brian_G
+
+ - title: Neowin
+ author: Darko Miletic
+
+ - title: Greensboro News and Record
+ author: Walt Anthony
+
+ - title: Hot Air
+ author: Walt Anthony
+
+ - title: ionline
+ author: Darko Miletic
+
+ - title: The National Review Online
+ author: Walt Anthony
+
+ improved recipes:
+ - Ars Technica
+ - Sports Illustrated
+ - Common Dreams
+ - Wired Magazine
+
+
+- version: 0.6.35
+ date: 2010-01-22
+
+ new features:
+ - title: Catalog generation
+ type: major
+ description: >
+ "You can now easily generate a catlog of all books in your calibre library by clicking the arrow next to the convert button. The catalog can be in one of several formats: XML, CSV, EPUB and MOBI, with scope for future formats via plugins. If you generate the catalog in an e-book format, it will be automatically sent to your e-book reader the next time you connect it, allowing you to easily browse your collection on the reader itself. This feature is in Beta (may have bugs) so feedback is appreciated."
+
+ - title: "RTF Input: Support for unicode characters."
+ type: major
+ tickets: [4501]
+
+ - title: "Add Quick Start Guide by John Schember to calibre library on first run of calibre"
+ type: major
+
+ - title: "Improve handling of justification"
+ description: >
+ "Now calibre will explicitly change the justification of all left aligned paragraphs to justified or vice versa depending on the justification setting. This should make it possible to robustly convert all content to either justified or not. calibre will not touch centered or right aligned content."
+
+ - title: "E-book viewer: Fit images to viewer window (can be turned off via Preferences)"
+
+ - title: "Add section on E-book viewer to User Manual"
+
+ - title: "Development environment: First look for resources in the location pointed to by CALIBRE_DEVELOP_FROM. If not found, use the normal resource location"
+
+ - title: "When reading metadata from filenames, with the Swap author names option checked, improve the logic used to detect author last name."
+ tickets: [4620]
+
+ - title: "News downloads: When getting an article URL from a RSS feed, look first for an original article link. This speeds up the download of news services that use a syndication service like feedburner or pheedo to publish their RSS feeds."
+
+ bug fixes:
+ - title: "Windows device detection: Don't do expensive polling while waiting for device disconnect. This should fix the problems people have with their floppy drive being activated while an e-book reader is connected"
+
+ - title: "PML Input: Fix creation of metadata Table of Contents"
+ tickets: [5633]
+
+ - title: "Fix Tag browser not updating after using delete specific format actions"
+ tickets: [4632]
+
+ - title: "MOBI Output: Don't die when converting EPUB files with SVG covers"
+
+ - title: "Nook driver: Remove the # character from filenames when sending to device"
+ tickets: [4629]
+
+ - title: "Workaround for bug in QtWebKit on windows that could cause crashes when using the next page button in the e-book viewer for certain files"
+ tickets: [4606]
+
+ - title: "MOBI Input: Rescale img width and height attributes that were specified in em units"
+ tickets: [4608]
+
+ - title: "ebook-meta: Fix setting of series metadata"
+
+ - title: "RTF metadata: Fix reading metadata from very small files"
+
+ - title: "Conversion pipeline: Don't error out if the user sets an invalid chapter detection XPath"
+
+ - title: "Fix main mem and card being swapped in pocketbook detection on OS X"
+
+ - title: "Welcome wizard: Set the language to english if the user doesn't explicitly change the language. This ensures that the language will be english on windows by default"
+
+ - title: "Fix bug in OEBWriter that could cause writing out of resources in subdirectories with URL unsafe names to fail"
+
+ - title: "E-book viewer: Change highlight color to yellow on all platforms."
+ tickets: [4641]
+
+ new recipes:
+ - title: Frankfurter Rundschau
+ author: Justus Bisser
+
+ - title: The Columbia Hournalism Review
+ author: XanthanGum
+
+ - title: Various CanWest Canadian news sources
+ author: Nick Redding
+
+ - title: gigitaljournal.com
+ author: Darko Miletic
+
+ - title: Pajamas Media
+ author: Krittika Goyal
+
+ - title: Algemeen Dagbla
+ author: kwetal
+
+ - title: "The Reader's Digest"
+ author: BrianG
+
+ - title: The Yemen Times
+ author: kwetal
+
+ - title: The Kitsap Sun
+ author: Darko Miletic
+
+ - title: drivelry.com
+ author: Krittika Goyal
+
+ - title: New recipe for Google Reader that downloads unread articles instead of just starred ones
+ author: rollercoaster
+
+ - title: Le Devoir
+ author: Lorenzo Vigentini
+
+ - title: Joop
+ author: kwetal
+
+ - title: Various computer magazines
+ author: Lorenzo Vigentini
+
+ - title: The Wall Street journal (free parts)
+ author: Nick Redding
+
+ - title: Journal of Nephrology
+ author: Krittika Goyal
+
+ - title: stuff.co.nz
+ author: Krittika Goyal
+
+ - title: Editor and Publisher
+ author: XanthanGum
+
+ - title: The Week (free)
+ author: Darko Miletic
+
+ improved recipes:
+ - Physics Today
+ - Wall Street Journal
+ - American Spectator
+ - FTD
+ - The National Post
+ - Blic
+ - Ars Technica
+
+
- version: 0.6.34
date: 2010-01-15
diff --git a/icons/book.icns b/icons/book.icns
new file mode 100644
index 0000000000..ee305dc9dd
Binary files /dev/null and b/icons/book.icns differ
diff --git a/resources/catalog/DefaultCover.jpg b/resources/catalog/DefaultCover.jpg
new file mode 100644
index 0000000000..b3cc507bb7
Binary files /dev/null and b/resources/catalog/DefaultCover.jpg differ
diff --git a/resources/catalog/mastheadImage.gif b/resources/catalog/mastheadImage.gif
new file mode 100644
index 0000000000..06340a09a9
Binary files /dev/null and b/resources/catalog/mastheadImage.gif differ
diff --git a/resources/catalog/stylesheet.css b/resources/catalog/stylesheet.css
new file mode 100644
index 0000000000..b5770599e6
--- /dev/null
+++ b/resources/catalog/stylesheet.css
@@ -0,0 +1,73 @@
+body { background-color: white; }
+
+p.title {
+ margin-top:0em;
+ margin-bottom:1em;
+ text-align:center;
+ font-style:italic;
+ font-size:xx-large;
+ border-bottom: solid black 4px;
+ }
+
+p.author {
+ margin-top:0em;
+ margin-bottom:0em;
+ text-align: left;
+ text-indent: 1em;
+ font-size:large;
+ }
+
+p.tags {
+ margin-top:0em;
+ margin-bottom:0em;
+ text-align: left;
+ text-indent: 1em;
+ font-size:small;
+ }
+
+p.description {
+ text-align:left;
+ font-style:italic;
+ margin-top: 0em;
+ }
+
+p.date_index {
+ font-size:x-large;
+ text-align:center;
+ font-weight:bold;
+ margin-top:1em;
+ margin-bottom:0px;
+ }
+
+p.letter_index {
+ font-size:x-large;
+ text-align:center;
+ font-weight:bold;
+ margin-top:1em;
+ margin-bottom:0px;
+ }
+
+p.author_index {
+ font-size:large;
+ text-align:left;
+ margin-top:0px;
+ margin-bottom:0px;
+ text-indent: 0em;
+ }
+
+p.read_book {
+ text-align:left;
+ margin-top:0px;
+ margin-bottom:0px;
+ margin-left:2em;
+ text-indent:-2em;
+ }
+
+p.unread_book {
+ text-align:left;
+ margin-top:0px;
+ margin-bottom:0px;
+ margin-left:2em;
+ text-indent:-2em;
+ }
+
diff --git a/resources/images/news/ad.png b/resources/images/news/ad.png
new file mode 100644
index 0000000000..8b017910df
Binary files /dev/null and b/resources/images/news/ad.png differ
diff --git a/resources/images/news/digitaljournal.png b/resources/images/news/digitaljournal.png
new file mode 100644
index 0000000000..ea4637b8ad
Binary files /dev/null and b/resources/images/news/digitaljournal.png differ
diff --git a/resources/images/news/greensboro_news_and_record.png b/resources/images/news/greensboro_news_and_record.png
new file mode 100644
index 0000000000..91097cd15b
Binary files /dev/null and b/resources/images/news/greensboro_news_and_record.png differ
diff --git a/resources/images/news/hotair.png b/resources/images/news/hotair.png
new file mode 100644
index 0000000000..f6b391ce3f
Binary files /dev/null and b/resources/images/news/hotair.png differ
diff --git a/resources/images/news/information_dk.png b/resources/images/news/information_dk.png
new file mode 100644
index 0000000000..301e2992c7
Binary files /dev/null and b/resources/images/news/information_dk.png differ
diff --git a/resources/images/news/ionline_pt.png b/resources/images/news/ionline_pt.png
new file mode 100644
index 0000000000..f66d4aa797
Binary files /dev/null and b/resources/images/news/ionline_pt.png differ
diff --git a/resources/images/news/jp_dk.png b/resources/images/news/jp_dk.png
new file mode 100644
index 0000000000..c9553659aa
Binary files /dev/null and b/resources/images/news/jp_dk.png differ
diff --git a/resources/images/news/kamerabild.png b/resources/images/news/kamerabild.png
new file mode 100644
index 0000000000..401d5a33a3
Binary files /dev/null and b/resources/images/news/kamerabild.png differ
diff --git a/resources/images/news/ledevoir.png b/resources/images/news/ledevoir.png
new file mode 100644
index 0000000000..eabcf97004
Binary files /dev/null and b/resources/images/news/ledevoir.png differ
diff --git a/resources/images/news/michellemalkin_icon.png b/resources/images/news/michellemalkin_icon.png
new file mode 100644
index 0000000000..76842ec642
Binary files /dev/null and b/resources/images/news/michellemalkin_icon.png differ
diff --git a/resources/images/news/nationalreviewonline.png b/resources/images/news/nationalreviewonline.png
new file mode 100644
index 0000000000..9070b3c71d
Binary files /dev/null and b/resources/images/news/nationalreviewonline.png differ
diff --git a/resources/images/news/neowin.png b/resources/images/news/neowin.png
new file mode 100644
index 0000000000..5aee949c0b
Binary files /dev/null and b/resources/images/news/neowin.png differ
diff --git a/resources/images/news/nursingtimes.png b/resources/images/news/nursingtimes.png
new file mode 100644
index 0000000000..2806d3376f
Binary files /dev/null and b/resources/images/news/nursingtimes.png differ
diff --git a/resources/images/news/observer.png b/resources/images/news/observer.png
new file mode 100644
index 0000000000..5fbb7a6ccc
Binary files /dev/null and b/resources/images/news/observer.png differ
diff --git a/resources/images/news/politiken_dk.png b/resources/images/news/politiken_dk.png
new file mode 100644
index 0000000000..66f324a8c7
Binary files /dev/null and b/resources/images/news/politiken_dk.png differ
diff --git a/resources/images/news/the_week_magazine_free.png b/resources/images/news/the_week_magazine_free.png
new file mode 100644
index 0000000000..a7058ce2a2
Binary files /dev/null and b/resources/images/news/the_week_magazine_free.png differ
diff --git a/resources/images/news/theluminouslandscape.png b/resources/images/news/theluminouslandscape.png
new file mode 100644
index 0000000000..e6d452f5d2
Binary files /dev/null and b/resources/images/news/theluminouslandscape.png differ
diff --git a/resources/quick_start.epub b/resources/quick_start.epub
index d340d40996..7dd8d7e91c 100644
Binary files a/resources/quick_start.epub and b/resources/quick_start.epub differ
diff --git a/resources/recipes/ad.recipe b/resources/recipes/ad.recipe
new file mode 100644
index 0000000000..bc3fe40dad
--- /dev/null
+++ b/resources/recipes/ad.recipe
@@ -0,0 +1,86 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ADRecipe(BasicNewsRecipe):
+ __license__ = 'GPL v3'
+ __author__ = 'kwetal'
+ language = 'nl'
+ country = 'NL'
+ version = 1
+
+ title = u'AD'
+ publisher = u'de Persgroep Publishing Nederland NV'
+ category = u'News, Sports, the Netherlands'
+ description = u'News and Sports from the Netherlands'
+
+ oldest_article = 1.2
+ max_articles_per_feed = 100
+ use_embedded_content = False
+
+ remove_empty_feeds = True
+ no_stylesheets = True
+ remove_javascript = True
+
+ keep_only_tags = []
+ keep_only_tags.append(dict(name = 'div', attrs = {'id': 'art_box2'}))
+ keep_only_tags.append(dict(name = 'p', attrs = {'class': 'gen_footnote3'}))
+
+ remove_tags = []
+ remove_tags.append(dict(name = 'div', attrs = {'class': 'gen_clear'}))
+ remove_tags.append(dict(name = 'div', attrs = {'class': re.compile(r'gen_spacer.*')}))
+
+ remove_attributes = ['style']
+
+ # feeds from http://ad.nl/ad/nl/1401/home/integration/nmc/frameset/ad_footer/rssFeeds.dhtml
+ feeds = []
+ feeds.append((u'Binnenland', u'http://www.ad.nl/nieuws/binnenland/rss.xml'))
+ feeds.append((u'Buitenland', u'http://www.ad.nl/nieuws/buitenland/rss.xml'))
+ feeds.append((u'Bizar', u'http://www.ad.nl/nieuws/bizar/rss.xml'))
+ feeds.append((u'Gezondheid & Wetenschap', u'http://www.ad.nl/nieuws/gezondheidwetenschap/rss.xml'))
+ feeds.append((u'Economie', u'http://www.ad.nl/nieuws/economie/rss.xml'))
+ feeds.append((u'Nederlands Voetbal', u'http://www.ad.nl/sportwereld/nederlandsvoetbal/rss.xml'))
+ feeds.append((u'Buitenlands Voetbal', u'http://www.ad.nl/sportwereld/buitenlandsvoetbal/rss.xml'))
+ feeds.append((u'Champions League/Europa League', u'http://www.ad.nl/sportwereld/championsleagueeuropaleague/rss.xml'))
+ feeds.append((u'Wielrennen', u'http://www.ad.nl/sportwereld/wielrennen/rss.xml'))
+ feeds.append((u'Tennis', u'http://www.ad.nl/sportwereld/tennis/rss.xml'))
+ feeds.append((u'Formule 1', u'http://www.ad.nl/sportwereld/formule1/rss.xml'))
+ feeds.append((u'Meer Sport', u'http://www.ad.nl/sportwereld/meersport/rss.xml'))
+ feeds.append((u'Celebs', u'http://www.ad.nl/showbizz/celebs/rss.xml'))
+ feeds.append((u'Film', u'http://www.ad.nl/showbizz/film/rss.xml'))
+ feeds.append((u'Muziek', u'http://www.ad.nl/showbizz/muziek/rss.xml'))
+ feeds.append((u'TV', u'http://www.ad.nl/showbizz/tv/rss.xml'))
+ feeds.append((u'Kunst & Literatuur', u'http://www.ad.nl/showbizz/kunstenliteratuur/rss.xml'))
+ feeds.append((u'Jouw Wereld', u'http://www.ad.nl/you/rss.xml'))
+ feeds.append((u'Consument', u'http://www.ad.nl/consument/rss.xml'))
+ feeds.append((u'Autowereld', u'http://www.ad.nl/autowereld/rss.xml'))
+ feeds.append((u'Reiswereld', u'http://www.ad.nl/reiswereld/rss.xml'))
+ feeds.append((u'Internet', u'http://www.ad.nl/digitaal/internet/rss.xml'))
+ feeds.append((u'Games', u'http://www.ad.nl/digitaal/games/rss.xml'))
+ feeds.append((u'Multimedia', u'http://www.ad.nl/digitaal/multimedia/rss.xml'))
+ feeds.append((u'Planet Watch', u'http://www.ad.nl/planetwatch/rss.xml'))
+
+ extra_css = '''
+ body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
+ div.captionEmbeddedMasterObject {font-size: x-small; font-style: italic; color: #696969;}
+ .gen_footnote3 {font-size: small; color: #666666; margin-top: 0.6em;}
+ '''
+
+ conversion_options = {'comments': description, 'tags': category, 'language': 'en',
+ 'publisher': publisher}
+
+ def print_version(self, url):
+ parts = url.split('/')
+ print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \
+ + parts[10] + '/' + parts[7] + '/print/' + parts[8] + '/' + parts[9] + '/' + parts[13]
+
+ return print_url
+
+ def preprocess_html(self, soup):
+ for br in soup.findAll('br'):
+ prev = br.findPreviousSibling(True)
+ if hasattr(prev, 'name') and prev.name == 'br':
+ next = br.findNextSibling(True)
+ if hasattr(next, 'name') and next.name == 'br':
+ br.extract()
+
+ return soup
diff --git a/resources/recipes/amspec.recipe b/resources/recipes/amspec.recipe
index 62bec5ae18..e5a76a4f86 100644
--- a/resources/recipes/amspec.recipe
+++ b/resources/recipes/amspec.recipe
@@ -1,7 +1,5 @@
-#!/usr/bin/env python
-
__license__ = 'GPL v3'
-__copyright__ = '2009, Darko Miletic '
+__copyright__ = '2009-2010, Darko Miletic '
'''
spectator.org
'''
@@ -11,20 +9,22 @@ from calibre.web.feeds.news import BasicNewsRecipe
class TheAmericanSpectator(BasicNewsRecipe):
title = 'The American Spectator'
__author__ = 'Darko Miletic'
- language = 'en'
-
description = 'News from USA'
+ category = 'news, politics, USA, world'
+ publisher = 'The American Spectator'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
+ language = 'en'
INDEX = 'http://spectator.org'
- html2lrf_options = [
- '--comment' , description
- , '--category' , 'news, politics, USA'
- , '--publisher' , title
- ]
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
keep_only_tags = [
dict(name='div', attrs={'class':'post inner'})
@@ -33,13 +33,11 @@ class TheAmericanSpectator(BasicNewsRecipe):
remove_tags = [
dict(name='object')
- ,dict(name='div', attrs={'class':'col3' })
- ,dict(name='div', attrs={'class':'post-options' })
- ,dict(name='p' , attrs={'class':'letter-editor'})
- ,dict(name='div', attrs={'class':'social' })
+ ,dict(name='div', attrs={'class':['col3','post-options','social']})
+ ,dict(name='p' , attrs={'class':['letter-editor','meta']})
]
- feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')]
+ feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]
def get_cover_url(self):
cover_url = None
@@ -53,3 +51,7 @@ class TheAmericanSpectator(BasicNewsRecipe):
def print_version(self, url):
return url + '/print'
+
+ def get_article_url(self, article):
+ return article.get('guid', None)
+
diff --git a/resources/recipes/ars_technica.recipe b/resources/recipes/ars_technica.recipe
index e5b54edc03..0bf5a9a3b0 100644
--- a/resources/recipes/ars_technica.recipe
+++ b/resources/recipes/ars_technica.recipe
@@ -1,12 +1,12 @@
-#!/usr/bin/env python
__license__ = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic '
+__copyright__ = '2008-2010, Darko Miletic '
'''
arstechnica.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class ArsTechnica2(BasicNewsRecipe):
title = u'Ars Technica'
@@ -18,24 +18,24 @@ class ArsTechnica2(BasicNewsRecipe):
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
- encoding = 'utf8'
- remove_javascript = True
+ encoding = 'utf-8'
use_embedded_content = False
+ extra_css = ' body {font-family: sans-serif} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
- extra_css = '''
- .news-item-title{font-size: medium ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;}
- .news-item-teaser{font-size: small ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;}
- .news-item-byline{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
- .news-item-text{font-size:x-small;font-family:Arial,Helvetica,sans-serif;}
- .news-item-figure-caption-text{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:bold;}
- .news-item-figure-caption-byline{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
- '''
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
- keep_only_tags = [dict(name='div', attrs={'id':['news-item-info','news-item']})]
+
+
+ keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
remove_tags = [
dict(name=['object','link','embed'])
- ,dict(name='div', attrs={'class':'related-stories'})
+ ,dict(name='div', attrs={'class':'read-more-link'})
]
@@ -52,14 +52,19 @@ class ArsTechnica2(BasicNewsRecipe):
]
def append_page(self, soup, appendtag, position):
- pager = soup.find('div',attrs={'id':'pager'})
+ pager = soup.find('div',attrs={'class':'pager'})
if pager:
for atag in pager.findAll('a',href=True):
str = self.tag_to_string(atag)
if str.startswith('Next'):
- soup2 = self.index_to_soup(atag['href'])
+ nurl = 'http://arstechnica.com' + atag['href']
+ rawc = self.index_to_soup(nurl,True)
+ soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
- texttag = soup2.find('div', attrs={'class':'news-item-text'})
+ readmoretag = soup2.find('div', attrs={'class':'read-more-link'})
+ if readmoretag:
+ readmoretag.extract()
+ texttag = soup2.find('div', attrs={'class':'body'})
for it in texttag.findAll(style=True):
del it['style']
@@ -71,10 +76,12 @@ class ArsTechnica2(BasicNewsRecipe):
def preprocess_html(self, soup):
-
- ftag = soup.find('div', attrs={'class':'news-item-byline'})
+ ftag = soup.find('div', attrs={'class':'byline'})
if ftag:
- ftag.insert(4,'
')
+ brtag = Tag(soup,'br')
+ brtag2 = Tag(soup,'br')
+ ftag.insert(4,brtag)
+ ftag.insert(5,brtag2)
for item in soup.findAll(style=True):
del item['style']
@@ -83,5 +90,3 @@ class ArsTechnica2(BasicNewsRecipe):
return soup
-
-
diff --git a/resources/recipes/barrons.recipe b/resources/recipes/barrons.recipe
index 8106243cc0..9d79aed728 100644
--- a/resources/recipes/barrons.recipe
+++ b/resources/recipes/barrons.recipe
@@ -98,6 +98,9 @@ class Barrons(BasicNewsRecipe):
('Funds/Q&A', 'http://online.barrons.com/xml/rss/3_7519.xml'),
]
+ def get_article_url(self, article):
+ return article.get('link', None)
+
def get_cover_url(self):
cover_url = None
diff --git a/resources/recipes/bbc_fast.recipe b/resources/recipes/bbc_fast.recipe
new file mode 100644
index 0000000000..12ae9ce1eb
--- /dev/null
+++ b/resources/recipes/bbc_fast.recipe
@@ -0,0 +1,60 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, Darko Miletic '
+'''
+news.bbc.co.uk
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class BBC(BasicNewsRecipe):
+ title = 'BBC News (fast)'
+ __author__ = 'Darko Miletic'
+ description = 'News from UK. A much faster version that does not download pictures'
+ oldest_article = 2
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ #delay = 1
+ use_embedded_content = False
+ encoding = 'utf8'
+ publisher = 'BBC'
+ category = 'news, UK, world'
+ language = 'en'
+ extra_css = ' body{ font-family: sans-serif; } .headline{font-size: xx-large; font-weight: bold} .ibox{display: block; margin: 20px 50px; padding: 10px; border: 1px solid } '
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ remove_tags_before = dict(name='div',attrs={'class':'headline'})
+ remove_tags_after = dict(name='div', attrs={'class':'footer'})
+ remove_tags = [
+ dict(name=['object','link','script','iframe'])
+ ,dict(name='div', attrs={'class':'footer'})
+ ]
+
+ feeds = [
+ ('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
+ ('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
+ ('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
+ ('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
+ ('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
+ ('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
+ ('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
+ ('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
+ ('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
+ ('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
+ ('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
+ ('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
+ ('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
+ ]
+
+ def print_version(self, url):
+ emp,sep,rstrip = url.partition('http://')
+ return 'http://newsvote.bbc.co.uk/mpapps/pagetools/print/' + rstrip
+
+ def get_article_url(self, article):
+ return article.get('guid', None)
+
diff --git a/resources/recipes/calgary_herald.recipe b/resources/recipes/calgary_herald.recipe
new file mode 100644
index 0000000000..884a951d96
--- /dev/null
+++ b/resources/recipes/calgary_herald.recipe
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+ # un-comment the following three lines for the Calgary Herald
+ title = u'Calgary Herald'
+ url_prefix = 'http://www.calgaryherald.com'
+ description = u'News from Calgary, AB'
+
+ # un-comment the following three lines for the Regina Leader-Post
+ #title = u'Regina Leader-Post'
+ #url_prefix = 'http://www.leaderpost.com'
+ #description = u'News from Regina, SK'
+
+ # un-comment the following three lines for the Saskatoon Star-Phoenix
+ #title = u'Saskatoon Star-Phoenix'
+ #url_prefix = 'http://www.thestarphoenix.com'
+ #description = u'News from Saskatoon, SK'
+
+ # un-comment the following three lines for the Windsor Star
+ #title = u'Windsor Star'
+ #url_prefix = 'http://www.windsorstar.com'
+ #description = u'News from Windsor, ON'
+
+ # un-comment the following three lines for the Ottawa Citizen
+ #title = u'Ottawa Citizen'
+ #url_prefix = 'http://www.ottawacitizen.com'
+ #description = u'News from Ottawa, ON'
+
+ # un-comment the following three lines for the Montreal Gazette
+ #title = u'Montreal Gazette'
+ #url_prefix = 'http://www.montrealgazette.com'
+ #description = u'News from Montreal, QC'
+
+
+ language = 'en_CA'
+ __author__ = 'Nick Redding'
+ no_stylesheets = True
+ timefmt = ' [%b %d]'
+ extra_css = '''
+ .timestamp { font-size:xx-small; display: block; }
+ #storyheader { font-size: medium; }
+ #storyheader h1 { font-size: x-large; }
+ #storyheader h2 { font-size: large; font-style: italic; }
+ .byline { font-size:xx-small; }
+ #photocaption { font-size: small; font-style: italic }
+ #photocredit { font-size: xx-small; }'''
+ keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+ remove_tags = [{'class':'comments'},
+ dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+ dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+ dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+ dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+ dict(name='div', attrs={'class':'rule_grey_solid'}),
+ dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+ def preprocess_html(self,soup):
+ #delete iempty id attributes--they screw up the TOC for unknow reasons
+ divtags = soup.findAll('div',attrs={'id':''})
+ if divtags:
+ for div in divtags:
+ del(div['id'])
+ return soup
+
+
+ def parse_index(self):
+ soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+ articles = {}
+ key = 'News'
+ ans = ['News']
+
+ # Find each instance of class="sectiontitle", class="featurecontent"
+ for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+ #self.log(" div class = %s" % divtag['class'])
+ if divtag['class'].startswith('section_title'):
+ # div contains section title
+ if not divtag.h3:
+ continue
+ key = self.tag_to_string(divtag.h3,False)
+ ans.append(key)
+ self.log("Section name %s" % key)
+ continue
+ # div contains article data
+ h1tag = divtag.find('h1')
+ if not h1tag:
+ continue
+ atag = h1tag.find('a',href=True)
+ if not atag:
+ continue
+ url = self.url_prefix+'/news/todays-paper/'+atag['href']
+ #self.log("Section %s" % key)
+ #self.log("url %s" % url)
+ title = self.tag_to_string(atag,False)
+ #self.log("title %s" % title)
+ pubdate = ''
+ description = ''
+ ptag = divtag.find('p');
+ if ptag:
+ description = self.tag_to_string(ptag,False)
+ #self.log("description %s" % description)
+ author = ''
+ autag = divtag.find('h4')
+ if autag:
+ author = self.tag_to_string(autag,False)
+ #self.log("author %s" % author)
+ if not articles.has_key(key):
+ articles[key] = []
+ articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+ ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+ return ans
diff --git a/resources/recipes/cjr.recipe b/resources/recipes/cjr.recipe
new file mode 100644
index 0000000000..d581184c4e
--- /dev/null
+++ b/resources/recipes/cjr.recipe
@@ -0,0 +1,15 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CJR(BasicNewsRecipe):
+ title = u'Columbia Journalism Review'
+ __author__ = u'Xanthan Gum'
+ description = 'News about journalism.'
+ language = 'en'
+
+ oldest_article = 7
+ max_articles_per_feed = 100
+
+ feeds = [(u'News Stories', u'http://www.cjr.org/index.xml')]
+
+ def print_version(self, url):
+ return url + '?page=all&print=true'
diff --git a/resources/recipes/common_dreams.recipe b/resources/recipes/common_dreams.recipe
index b662cc3ee0..5443b5890b 100644
--- a/resources/recipes/common_dreams.recipe
+++ b/resources/recipes/common_dreams.recipe
@@ -2,17 +2,37 @@
from calibre.web.feeds.news import BasicNewsRecipe
class CommonDreams(BasicNewsRecipe):
+ # Identify the recipe
+
title = u'Common Dreams'
description = u'Progressive news and views'
__author__ = u'XanthanGum'
language = 'en'
+
+ # Format the text
+
+ extra_css = '''
+ body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
+ h1{font-size: xx-large;}
+ h2{font-size: large;}
+ '''
+ # Pick no article older than seven days and limit the number of articles per feed to 100
+
oldest_article = 7
max_articles_per_feed = 100
- feeds = [
- (u'Common Dreams Headlines',
- u'http://www.commondreams.org/feed/headlines_rss'),
- (u'Common Dreams Views', u'http://www.commondreams.org/feed/views_rss'),
- (u'Common Dreams Newswire', u'http://www.commondreams.org/feed/newswire_rss')
- ]
+ # Remove everything before the article
+
+ remove_tags_before = dict(name = 'div', attrs = {'id':'node-header'})
+
+ # Remove everything after the article
+
+ remove_tags_after = dict(name = 'div', attrs = {'class':'copyright-info'})
+
+ # Identify the news feeds
+
+ feeds = [(u'Headlines', u'http://www.commondreams.org/feed/headlines_rss'),
+ (u'Further News Articles', u'http://www.commondreams.org/feed/further_rss'),
+ (u'Views', u'http://www.commondreams.org/feed/views_rss'),
+ (u'Progressive Newswire', u'http://www.commondreams.org/feed/newswire_rss')]
diff --git a/resources/recipes/digitaljournal.recipe b/resources/recipes/digitaljournal.recipe
new file mode 100644
index 0000000000..c49caf9580
--- /dev/null
+++ b/resources/recipes/digitaljournal.recipe
@@ -0,0 +1,52 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, Darko Miletic '
+'''
+digitaljournal.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DigitalJournal(BasicNewsRecipe):
+ title = 'Digital Journal'
+ __author__ = 'Darko Miletic'
+ description = 'A Global Citizen Journalism News Network'
+ category = 'news, politics, USA, world'
+ publisher = 'Digital Journal'
+ oldest_article = 2
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ encoding = 'utf8'
+ language = 'en'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [dict(name='div', attrs={'class':['article','body']})]
+
+ remove_tags = [dict(name=['object','table'])]
+
+ feeds = [
+ (u'Latest News' , u'http://digitaljournal.com/rss/?feed=latest_news' )
+ ,(u'Business' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Business' )
+ ,(u'Entertainment', u'http://digitaljournal.com/rss/?feed=top_news&depname=Entertainment')
+ ,(u'Environment' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Environment' )
+ ,(u'Food' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Food' )
+ ,(u'Health' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Health' )
+ ,(u'Internet' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Internet' )
+ ,(u'Politics' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Politics' )
+ ,(u'Religion' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Religion' )
+ ,(u'Science' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Science' )
+ ,(u'Sports' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Sports' )
+ ,(u'Technology' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Technology' )
+ ,(u'World' , u'http://digitaljournal.com/rss/?feed=top_news&depname=World' )
+ ,(u'Arts' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Arts' )
+ ]
+
+ def print_version(self, url):
+ return url.replace('digitaljournal.com/','digitaljournal.com/print/')
+
diff --git a/resources/recipes/discover_magazine.recipe b/resources/recipes/discover_magazine.recipe
index a3562bbbd1..cd4a078231 100644
--- a/resources/recipes/discover_magazine.recipe
+++ b/resources/recipes/discover_magazine.recipe
@@ -4,19 +4,31 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
-doscovermagazine.com
+discovermagazine.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DiscoverMagazine(BasicNewsRecipe):
+
title = u'Discover Magazine'
description = u'Science, Technology and the Future'
__author__ = 'Mike Diaz'
- oldest_article = 33
language = 'en'
+
+ oldest_article = 33
+ max_articles_per_feed = 20
+ no_stylesheets = True
+ remove_javascript = True
+ use_embedded_content = False
+ encoding = 'utf-8'
+ extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
+
+ remove_tags = [dict(name='div', attrs={'id':['searchModule', 'mainMenu', 'tool-box']}),
+ dict(name='img', attrs={'src':'http://discovermagazine.com/onebyone.gif'})]
+
+ remove_tags_after = [dict(name='div', attrs={'class':'articlebody'})]
- max_articles_per_feed = 20
feeds = [
(u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'),
(u'Health - Medicine', u'http://discovermagazine.com/topics/health-medicine/rss.xml'),
diff --git a/resources/recipes/economist_free.recipe b/resources/recipes/economist_free.recipe
index 217b033b81..0a98c7da28 100644
--- a/resources/recipes/economist_free.recipe
+++ b/resources/recipes/economist_free.recipe
@@ -53,6 +53,8 @@ class Economist(BasicNewsRecipe):
self.feed_dict.items()])
def eco_sort_sections(self, feeds):
+ if not feeds:
+ raise ValueError('No new articles found')
order = {
'The World This Week': 1,
'Leaders': 2,
diff --git a/resources/recipes/editor_and_publisher.recipe b/resources/recipes/editor_and_publisher.recipe
new file mode 100644
index 0000000000..c8f287a0c7
--- /dev/null
+++ b/resources/recipes/editor_and_publisher.recipe
@@ -0,0 +1,34 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+class EandP(BasicNewsRecipe):
+ title = u'Editor and Publisher'
+ __author__ = u'Xanthan Gum'
+ description = 'News about newspapers and journalism.'
+ language = 'en'
+ no_stylesheets = True
+
+ oldest_article = 7
+ max_articles_per_feed = 100
+
+ # Font formatting code borrowed from kwetal
+
+ extra_css = '''
+ body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
+ h1{font-size: xx-large;}
+ h2{font-size: large;}
+ '''
+
+ # Delete everything before the article
+
+ remove_tags_before = dict(name='font', attrs={'class':'titlebar_black'})
+
+ # Delete everything after the article
+
+ preprocess_regexps = [(re.compile(r'.*