diff --git a/Changelog.yaml b/Changelog.yaml
index b297823841..c5eadc5e65 100644
--- a/Changelog.yaml
+++ b/Changelog.yaml
@@ -19,9 +19,93 @@
# new recipes:
# - title:
-# - title: "Launch of a new website that catalogues DRM free books. http://drmfree.calibre-ebook.com"
-# description: "A growing catalogue of DRM free books. Books that you actually own after buying instead of renting."
-# type: major
+- version: 0.7.48
+ date: 2011-03-04
+
+ new features:
+ - title: "Changes to the internal database structure used by calibre"
+ description: >
+ "These changes will allow calibre, in the future, to support book language, arbitrary book identifiers and keep track of when the metadata for a book was last modified. WARNING: Because of these changes, if you downgrade calibre versions after upgrading to 0.7.48, you will lose any changes you make to the ISBN of book entries in your calibre database, so do not downgrade unless you really have to. Also note that the first time you start calibre after this update, the startup will be slow as the database structure is being changed."
+
+ - title: "Launch of a new website that catalogues DRM free ebooks. http://drmfree.calibre-ebook.com"
+ description: "A growing catalogue of DRM free ebooks. Ebooks that you actually own after paying, instead of just renting."
+ type: major
+
+ - title: "News download: Add an option to keep at most x issues of a particular periodical in the calibre library. Use the Advanced tab in the Fetch news dialog for your news source to set this option."
+ tickets: [9168]
+
+ - title: "You can now right click on the cover in the book details panel to copy/paste a new cover."
+ tickets: [9255]
+
+ - title: "Add an entry to the add books drop down menu to easily add formats to an existing book record"
+
+ - title: "Tag browser: Clicking on a nested category now searches for the category alone. Clicking twice searches for the category and all its descendants and so on."
+ tickets: [9166, 9169]
+
+ - title: "Add a button to the Manage authors dialog to copy author sort values to author"
+
+ - title: "Decrease startup times on large libraries by using a faster algorithm to parse stored dates"
+
+ - title: "Add quick create links to easily create custom columns of commonly used types to the add custom column dialog"
+
+ - title: "Allow drag drop of images to change cover in book details window."
+ tickets: [9226]
+
+ - title: "Device susbsytem: Create a drive info file named driveinfo.calibre in the root of each device drive for USB connected devices. This file contains various useful data. API Change: The open method of the device plugins now accepts an extra parameter library_uuid which is the id of the calibre library connected tot eh device"
+
+ bug fixes:
+ - title: "Conversion pipeline: Fix regression in 0.7.46 that caused loss of some CSS information when converting HTML produced by Microsoft Word. Also remove empty tags from microsoft namespaces when parsing HTML"
+
+ - title: "Try harder to ensure that the worker log temporary files are deleted in windows"
+
+ - title: "CHM Input: Handle CHM files that dont specify a topics file."
+ tickets: [9253]
+
+ - title: "Fix regression that caused memory leak in Tag Browser. This would show up as the memory usage of calibre increasing when switching libraries."
+ tickets: [9246]
+
+ - title: "Fix bug that caused preferences->behavior to not show the output format set by the welcome wizard, and instead default to showing EPUB"
+
+ - title: "Fix bug that caused wrong books to be deleted from library if you choose 'delete from library and device' while the library is sorted by the On device column"
+
+ - title: "MOBI Input: Ignore all ASCII control codes except CR, NL and Tab."
+ tickets: [9219]
+
+ improved recipes:
+ - Credit Slips
+ - Seattle Times
+ - MacWorld
+ - Austin Statesman
+ - EPL Talk
+ - Gawker
+ - Deadspin
+
+ new recipes:
+ - title: "Thai Post Today and Daily Post"
+ author: "Chotechai P."
+
+ - title: "RBC.ru"
+ author: Chewi
+
+ - title: Helsingin Sanomat
+ author: oneillpt
+
+ - title: "LWN Weekly"
+ author: David Cavalca
+
+ - title: "New York Times Sports and Technology Blogs"
+ author: rylsfan
+
+ - title: "Historia and Buctaras"
+ author: Silviu Cotoara
+
+ - title: "Buffalo News"
+ author: ChappyOnIce
+
+ - title: "Dotpod"
+ author: Federico Escalada
+
+
- version: 0.7.47
date: 2011-02-25
@@ -90,7 +174,7 @@
author: Ricardo Jurado
- title: "Various Romanian news sources"
- author: Silviu Coatara
+ author: Silviu Cotoara
- title: "Osnews.pl and SwiatCzytnikow"
author: Tomasz Dlugosz
diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py
index 2303c6c108..38c1685b7c 100644
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@@ -349,3 +349,9 @@ public_smtp_relay_delay = 301
# after a restart of calibre.
draw_hidden_section_indicators = True
+#: The maximum width and height for covers saved in the calibre library
+# All covers in the calibre library will be resized, preserving aspect ratio,
+# to fit within this size. This is to prevent slowdowns caused by extremely
+# large covers
+maximum_cover_size = (1200, 1600)
+
diff --git a/resources/images/id_card.png b/resources/images/id_card.png
new file mode 100644
index 0000000000..80ac5fda11
Binary files /dev/null and b/resources/images/id_card.png differ
diff --git a/resources/images/news/cotidianul.png b/resources/images/news/cotidianul.png
new file mode 100644
index 0000000000..2e57dbde54
Binary files /dev/null and b/resources/images/news/cotidianul.png differ
diff --git a/resources/images/news/credit_slips.png b/resources/images/news/credit_slips.png
new file mode 100644
index 0000000000..50ac1dc02e
Binary files /dev/null and b/resources/images/news/credit_slips.png differ
diff --git a/resources/images/news/ele.png b/resources/images/news/ele.png
new file mode 100644
index 0000000000..82f66b5caa
Binary files /dev/null and b/resources/images/news/ele.png differ
diff --git a/resources/images/news/felicia.png b/resources/images/news/felicia.png
new file mode 100644
index 0000000000..4bc1fd35d8
Binary files /dev/null and b/resources/images/news/felicia.png differ
diff --git a/resources/images/news/financiarul.png b/resources/images/news/financiarul.png
new file mode 100644
index 0000000000..1d91a72a34
Binary files /dev/null and b/resources/images/news/financiarul.png differ
diff --git a/resources/images/news/hitro.png b/resources/images/news/hitro.png
new file mode 100644
index 0000000000..75c08a1c25
Binary files /dev/null and b/resources/images/news/hitro.png differ
diff --git a/resources/images/news/imperatortravel.png b/resources/images/news/imperatortravel.png
new file mode 100644
index 0000000000..c459759ed0
Binary files /dev/null and b/resources/images/news/imperatortravel.png differ
diff --git a/resources/images/news/kamikaze.png b/resources/images/news/kamikaze.png
new file mode 100644
index 0000000000..49ef2f50a1
Binary files /dev/null and b/resources/images/news/kamikaze.png differ
diff --git a/resources/images/news/kompiutierra.png b/resources/images/news/kompiutierra.png
new file mode 100644
index 0000000000..272e3d905f
Binary files /dev/null and b/resources/images/news/kompiutierra.png differ
diff --git a/resources/images/news/lwn_weekly.png b/resources/images/news/lwn_weekly.png
new file mode 100644
index 0000000000..0fc654add9
Binary files /dev/null and b/resources/images/news/lwn_weekly.png differ
diff --git a/resources/images/news/monden.png b/resources/images/news/monden.png
new file mode 100644
index 0000000000..fcf8ad42ae
Binary files /dev/null and b/resources/images/news/monden.png differ
diff --git a/resources/images/news/nytimes_sports.png b/resources/images/news/nytimes_sports.png
new file mode 100644
index 0000000000..b587be8de0
Binary files /dev/null and b/resources/images/news/nytimes_sports.png differ
diff --git a/resources/images/news/nytimes_tech.png b/resources/images/news/nytimes_tech.png
new file mode 100644
index 0000000000..64ff8b5eb2
Binary files /dev/null and b/resources/images/news/nytimes_tech.png differ
diff --git a/resources/images/news/promotor.png b/resources/images/news/promotor.png
new file mode 100644
index 0000000000..a479cf135b
Binary files /dev/null and b/resources/images/news/promotor.png differ
diff --git a/resources/images/news/rbc_ru.png b/resources/images/news/rbc_ru.png
new file mode 100644
index 0000000000..46c5d3fdce
Binary files /dev/null and b/resources/images/news/rbc_ru.png differ
diff --git a/resources/images/news/timesnewroman.png b/resources/images/news/timesnewroman.png
new file mode 100644
index 0000000000..6ba02939b4
Binary files /dev/null and b/resources/images/news/timesnewroman.png differ
diff --git a/resources/images/news/trombon.png b/resources/images/news/trombon.png
new file mode 100644
index 0000000000..641b04f1b7
Binary files /dev/null and b/resources/images/news/trombon.png differ
diff --git a/resources/images/news/wallstreetro.png b/resources/images/news/wallstreetro.png
new file mode 100644
index 0000000000..d72bc70ca0
Binary files /dev/null and b/resources/images/news/wallstreetro.png differ
diff --git a/resources/recipes/buffalo_news.recipe b/resources/recipes/buffalo_news.recipe
index 92c96757ae..51985a3c51 100644
--- a/resources/recipes/buffalo_news.recipe
+++ b/resources/recipes/buffalo_news.recipe
@@ -1,8 +1,8 @@
__license__ = 'GPL v3'
__author__ = 'Todd Chapman'
__copyright__ = 'Todd Chapman'
-__version__ = 'v0.1'
-__date__ = '26 February 2011'
+__version__ = 'v0.2'
+__date__ = '2 March 2011'
'''
http://www.buffalonews.com/RSS/
@@ -12,12 +12,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1298680852(BasicNewsRecipe):
title = u'Buffalo News'
- __author__ = 'ChappyOnIce'
- language = 'en'
oldest_article = 2
+ language = 'en'
+ __author__ = 'ChappyOnIce'
max_articles_per_feed = 20
encoding = 'utf-8'
+ masthead_url = 'http://www.buffalonews.com/buffalonews/skins/buffalonews/images/masthead/the_buffalo_news_logo.png'
remove_javascript = True
+ extra_css = 'body {text-align: justify;}\n \
+ p {text-indent: 20px;}'
+
keep_only_tags = [
dict(name='div', attrs={'class':['main-content-left']})
]
@@ -28,9 +32,7 @@ class AdvancedUserRecipe1298680852(BasicNewsRecipe):
]
remove_tags_after = dict(name='div', attrs={'class':['body storyContent']})
- conversion_options = {
- 'base_font_size' : 14,
- }
+
feeds = [(u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'),
(u'Southern Erie County', u'http://www.buffalonews.com/city/communities/southern-erie/?widget=rssfeed&view=feed&contentId=77944'),
(u'Eastern Erie County', u'http://www.buffalonews.com/city/communities/eastern-erie/?widget=rssfeed&view=feed&contentId=77944'),
diff --git a/resources/recipes/cotidianul.recipe b/resources/recipes/cotidianul.recipe
new file mode 100644
index 0000000000..f00196532c
--- /dev/null
+++ b/resources/recipes/cotidianul.recipe
@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+cotidianul.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Cotidianul(BasicNewsRecipe):
+ title = u'Cotidianul'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u''
+ publisher = u'Cotidianul'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Stiri'
+ encoding = 'utf-8'
+ cover_url = 'http://www.cotidianul.ro/images/cotidianul.png'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'titlu'})
+ , dict(name='div', attrs={'class':'gallery clearfix'})
+ , dict(name='div', attrs={'align':'justify'})
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['space']})
+ , dict(name='div', attrs={'id':['title_desc']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':['space']})
+ , dict(name='span', attrs={'class':['date']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.cotidianul.ro/rssfeed/ToateStirile.xml')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/credit_slips.recipe b/resources/recipes/credit_slips.recipe
index 19e19ca2fb..d4fb3a94c0 100644
--- a/resources/recipes/credit_slips.recipe
+++ b/resources/recipes/credit_slips.recipe
@@ -1,35 +1,44 @@
#!/usr/bin/env python
__license__ = 'GPL 3'
-__copyright__ = 'zotzot'
+__copyright__ = 'zotzo'
__docformat__ = 'restructuredtext en'
from calibre.web.feeds.news import BasicNewsRecipe
class CreditSlips(BasicNewsRecipe):
- __license__ = 'GPL v3'
- __author__ = 'zotzot'
language = 'en'
- version = 1
+ __author__ = 'zotzot'
+ version = 2
title = u'Credit Slips.org'
publisher = u'Bankr-L'
category = u'Economic blog'
- description = u'All things about credit.'
- cover_url = 'http://bit.ly/hyZSTr'
- oldest_article = 50
+ description = u'A discussion on credit and bankruptcy'
+ cover_url = 'http://bit.ly/eAKNCB'
+ oldest_article = 15
max_articles_per_feed = 100
use_embedded_content = True
+ no_stylesheets = True
+ remove_javascript = True
+
+ conversion_options = {
+ 'comments': description,
+ 'tags': category,
+ 'language': 'en',
+ 'publisher': publisher,
+ }
feeds = [
-(u'Credit Slips', u'http://www.creditslips.org/creditslips/atom.xml')
-]
- conversion_options = {
-'comments': description,
-'tags': category,
-'language': 'en',
-'publisher': publisher
-}
- extra_css = '''
- body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
- img {float: left; margin-right: 0.5em;}
- '''
+ (u'Credit Slips', u'http://www.creditslips.org/creditslips/atom.xml')
+ ]
+
+ extra_css = '''
+ .author {font-family:Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ h1 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ p {font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ body {font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ def populate_article_metadata(self, article, soup, first):
+ h2 = soup.find('h2')
+ h2.replaceWith(h2.prettify() + '
', re.DOTALL|re.IGNORECASE), # for entertainment page
- lambda match: '')
+ lambda match: ''),
+ # skip after title in life.mingpao.com fetched article
+ (re.compile(r"
", re.DOTALL|re.IGNORECASE),
+ lambda match: "
"),
+ (re.compile(r"
", re.DOTALL|re.IGNORECASE),
+ lambda match: "")
]
- def image_url_processor(cls, baseurl, url):
- # trick: break the url at the first occurance of digit, add an additional
- # '_' at the front
- # not working, may need to move this to preprocess_html() method
+ def image_url_processor(cls, baseurl, url):
+ # trick: break the url at the first occurance of digit, add an additional
+ # '_' at the front
+ # not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
@@ -99,253 +115,314 @@ class MPHKRecipe(BasicNewsRecipe):
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
- return url
+ return url
- def get_dtlocal(self):
- dt_utc = datetime.datetime.utcnow()
- # convert UTC to local hk time - at around HKT 6.00am, all news are available
- dt_local = dt_utc - datetime.timedelta(-2.0/24)
- return dt_local
+ def get_dtlocal(self):
+ dt_utc = datetime.datetime.utcnow()
+ # convert UTC to local hk time - at around HKT 6.00am, all news are available
+ dt_local = dt_utc - datetime.timedelta(-2.0/24)
+ return dt_local
- def get_fetchdate(self):
- return self.get_dtlocal().strftime("%Y%m%d")
+ def get_fetchdate(self):
+ return self.get_dtlocal().strftime("%Y%m%d")
- def get_fetchformatteddate(self):
- return self.get_dtlocal().strftime("%Y-%m-%d")
+ def get_fetchformatteddate(self):
+ return self.get_dtlocal().strftime("%Y-%m-%d")
- def get_fetchday(self):
- # convert UTC to local hk time - at around HKT 6.00am, all news are available
- return self.get_dtlocal().strftime("%d")
+ def get_fetchday(self):
+ # convert UTC to local hk time - at around HKT 6.00am, all news are available
+ return self.get_dtlocal().strftime("%d")
- def get_cover_url(self):
- cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
- br = BasicNewsRecipe.get_browser()
- try:
- br.open(cover)
- except:
- cover = None
- return cover
+ def get_cover_url(self):
+ cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
+ br = BasicNewsRecipe.get_browser()
+ try:
+ br.open(cover)
+ except:
+ cover = None
+ return cover
- def parse_index(self):
- feeds = []
- dateStr = self.get_fetchdate()
- for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
- (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
- (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
- (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
+ def parse_index(self):
+ feeds = []
+ dateStr = self.get_fetchdate()
+
+ for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
+ (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
+ (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
+
+ # special- editorial
+ ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
+ if ed_articles:
+ feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
+
+ for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
- (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
- ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
- (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
- (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
- (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
+ (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
+
+ # special - finance
+ #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
+ fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
+ if fin_articles:
+ feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+
+ for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+ (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
+
+ # special - entertainment
+ ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+ if ent_articles:
+ feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+
+ for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
- articles = self.parse_section(url)
- if articles:
- feeds.append((title, articles))
- # special - finance
- fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
- if fin_articles:
- feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
- # special - entertainment
- ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
- if ent_articles:
- feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
- return feeds
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
- def parse_section(self, url):
- dateStr = self.get_fetchdate()
- soup = self.index_to_soup(url)
- divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
- current_articles = []
- included_urls = []
- divs.reverse()
- for i in divs:
- a = i.find('a', href = True)
- title = self.tag_to_string(a)
- url = a.get('href', False)
- url = 'http://news.mingpao.com/' + dateStr + '/' +url
- if url not in included_urls and url.rfind('Redirect') == -1:
- current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
- included_urls.append(url)
- current_articles.reverse()
- return current_articles
- def parse_fin_section(self, url):
- dateStr = self.get_fetchdate()
- soup = self.index_to_soup(url)
- a = soup.findAll('a', href= True)
- current_articles = []
- included_urls = []
- for i in a:
- url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
- if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
- title = self.tag_to_string(i)
- current_articles.append({'title': title, 'url': url, 'description':''})
- included_urls.append(url)
- return current_articles
+ # special- columns
+ col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
+ if col_articles:
+ feeds.append((u'\u5c08\u6b04 Columns', col_articles))
- def parse_ent_section(self, url):
- self.get_fetchdate()
- soup = self.index_to_soup(url)
- a = soup.findAll('a', href=True)
- a.reverse()
- current_articles = []
- included_urls = []
- for i in a:
- title = self.tag_to_string(i)
- url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
- if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
- current_articles.append({'title': title, 'url': url, 'description': ''})
- included_urls.append(url)
- current_articles.reverse()
- return current_articles
+ return feeds
- def preprocess_html(self, soup):
- for item in soup.findAll(style=True):
- del item['style']
- for item in soup.findAll(style=True):
- del item['width']
- for item in soup.findAll(stype=True):
- del item['absmiddle']
- return soup
+ def parse_section(self, url):
+ dateStr = self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
+ current_articles = []
+ included_urls = []
+ divs.reverse()
+ for i in divs:
+ a = i.find('a', href = True)
+ title = self.tag_to_string(a)
+ url = a.get('href', False)
+ url = 'http://news.mingpao.com/' + dateStr + '/' +url
+ if url not in included_urls and url.rfind('Redirect') == -1:
+ current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
- def create_opf(self, feeds, dir=None):
- if dir is None:
- dir = self.output_dir
- if self.IsCJKWellSupported == True:
- # use Chinese title
- title = u'\u660e\u5831 (\u9999\u6e2f) ' + self.get_fetchformatteddate()
- else:
- # use English title
- title = self.short_title() + ' ' + self.get_fetchformatteddate()
- if True: # force date in title
- # title += strftime(self.timefmt)
- mi = MetaInformation(title, [self.publisher])
- mi.publisher = self.publisher
- mi.author_sort = self.publisher
- if self.IsCJKWellSupported == True:
- mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
- else:
- mi.publication_type = self.publication_type+':'+self.short_title()
- #mi.timestamp = nowf()
- mi.timestamp = self.get_dtlocal()
- mi.comments = self.description
- if not isinstance(mi.comments, unicode):
- mi.comments = mi.comments.decode('utf-8', 'replace')
- #mi.pubdate = nowf()
- mi.pubdate = self.get_dtlocal()
- opf_path = os.path.join(dir, 'index.opf')
- ncx_path = os.path.join(dir, 'index.ncx')
- opf = OPFCreator(dir, mi)
- # Add mastheadImage entry to section
- mp = getattr(self, 'masthead_path', None)
- if mp is not None and os.access(mp, os.R_OK):
- from calibre.ebooks.metadata.opf2 import Guide
- ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
- ref.type = 'masthead'
- ref.title = 'Masthead Image'
- opf.guide.append(ref)
+ def parse_ed_section(self, url):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href=True)
+ a.reverse()
+ current_articles = []
+ included_urls = []
+ for i in a:
+ title = self.tag_to_string(i)
+ url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+ if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
+ current_articles.append({'title': title, 'url': url, 'description': ''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
- manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
- manifest.append(os.path.join(dir, 'index.html'))
- manifest.append(os.path.join(dir, 'index.ncx'))
+ def parse_fin_section(self, url):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href= True)
+ current_articles = []
+ included_urls = []
+ for i in a:
+ #url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
+ url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+ #if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
+ if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
+ title = self.tag_to_string(i)
+ current_articles.append({'title': title, 'url': url, 'description':''})
+ included_urls.append(url)
+ return current_articles
- # Get cover
- cpath = getattr(self, 'cover_path', None)
- if cpath is None:
- pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
- if self.default_cover(pf):
- cpath = pf.name
- if cpath is not None and os.access(cpath, os.R_OK):
- opf.cover = cpath
- manifest.append(cpath)
+ def parse_ent_section(self, url):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href=True)
+ a.reverse()
+ current_articles = []
+ included_urls = []
+ for i in a:
+ title = self.tag_to_string(i)
+ url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
+ if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
+ current_articles.append({'title': title, 'url': url, 'description': ''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
- # Get masthead
- mpath = getattr(self, 'masthead_path', None)
- if mpath is not None and os.access(mpath, os.R_OK):
- manifest.append(mpath)
+ def parse_col_section(self, url):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href=True)
+ a.reverse()
+ current_articles = []
+ included_urls = []
+ for i in a:
+ title = self.tag_to_string(i)
+ url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+ if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
+ current_articles.append({'title': title, 'url': url, 'description': ''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
- opf.create_manifest_from_files_in(manifest)
- for mani in opf.manifest:
- if mani.path.endswith('.ncx'):
- mani.id = 'ncx'
- if mani.path.endswith('mastheadImage.jpg'):
- mani.id = 'masthead-image'
- entries = ['index.html']
- toc = TOC(base_path=dir)
- self.play_order_counter = 0
- self.play_order_map = {}
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+ for item in soup.findAll(style=True):
+ del item['width']
+ for item in soup.findAll(stype=True):
+ del item['absmiddle']
+ return soup
- def feed_index(num, parent):
- f = feeds[num]
- for j, a in enumerate(f):
- if getattr(a, 'downloaded', False):
- adir = 'feed_%d/article_%d/'%(num, j)
- auth = a.author
- if not auth:
- auth = None
- desc = a.text_summary
- if not desc:
- desc = None
- else:
- desc = self.description_limiter(desc)
- entries.append('%sindex.html'%adir)
- po = self.play_order_map.get(entries[-1], None)
- if po is None:
- self.play_order_counter += 1
- po = self.play_order_counter
- parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+ def create_opf(self, feeds, dir=None):
+ if dir is None:
+ dir = self.output_dir
+ if __UseChineseTitle__ == True:
+ title = u'\u660e\u5831 (\u9999\u6e2f)'
+ else:
+ title = self.short_title()
+ # if not generating a periodical, force date to apply in title
+ if __MakePeriodical__ == False:
+ title = title + ' ' + self.get_fetchformatteddate()
+ if True:
+ mi = MetaInformation(title, [self.publisher])
+ mi.publisher = self.publisher
+ mi.author_sort = self.publisher
+ if __MakePeriodical__ == True:
+ mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+ else:
+ mi.publication_type = self.publication_type+':'+self.short_title()
+ #mi.timestamp = nowf()
+ mi.timestamp = self.get_dtlocal()
+ mi.comments = self.description
+ if not isinstance(mi.comments, unicode):
+ mi.comments = mi.comments.decode('utf-8', 'replace')
+ #mi.pubdate = nowf()
+ mi.pubdate = self.get_dtlocal()
+ opf_path = os.path.join(dir, 'index.opf')
+ ncx_path = os.path.join(dir, 'index.ncx')
+ opf = OPFCreator(dir, mi)
+ # Add mastheadImage entry to section
+ mp = getattr(self, 'masthead_path', None)
+ if mp is not None and os.access(mp, os.R_OK):
+ from calibre.ebooks.metadata.opf2 import Guide
+ ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+ ref.type = 'masthead'
+ ref.title = 'Masthead Image'
+ opf.guide.append(ref)
+
+ manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+ manifest.append(os.path.join(dir, 'index.html'))
+ manifest.append(os.path.join(dir, 'index.ncx'))
+
+ # Get cover
+ cpath = getattr(self, 'cover_path', None)
+ if cpath is None:
+ pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+ if self.default_cover(pf):
+ cpath = pf.name
+ if cpath is not None and os.access(cpath, os.R_OK):
+ opf.cover = cpath
+ manifest.append(cpath)
+
+ # Get masthead
+ mpath = getattr(self, 'masthead_path', None)
+ if mpath is not None and os.access(mpath, os.R_OK):
+ manifest.append(mpath)
+
+ opf.create_manifest_from_files_in(manifest)
+ for mani in opf.manifest:
+ if mani.path.endswith('.ncx'):
+ mani.id = 'ncx'
+ if mani.path.endswith('mastheadImage.jpg'):
+ mani.id = 'masthead-image'
+ entries = ['index.html']
+ toc = TOC(base_path=dir)
+ self.play_order_counter = 0
+ self.play_order_map = {}
+
+ def feed_index(num, parent):
+ f = feeds[num]
+ for j, a in enumerate(f):
+ if getattr(a, 'downloaded', False):
+ adir = 'feed_%d/article_%d/'%(num, j)
+ auth = a.author
+ if not auth:
+ auth = None
+ desc = a.text_summary
+ if not desc:
+ desc = None
+ else:
+ desc = self.description_limiter(desc)
+ entries.append('%sindex.html'%adir)
+ po = self.play_order_map.get(entries[-1], None)
+ if po is None:
+ self.play_order_counter += 1
+ po = self.play_order_counter
+ parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
- last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
- for sp in a.sub_pages:
- prefix = os.path.commonprefix([opf_path, sp])
- relp = sp[len(prefix):]
- entries.append(relp.replace(os.sep, '/'))
- last = sp
+ last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+ for sp in a.sub_pages:
+ prefix = os.path.commonprefix([opf_path, sp])
+ relp = sp[len(prefix):]
+ entries.append(relp.replace(os.sep, '/'))
+ last = sp
- if os.path.exists(last):
- with open(last, 'rb') as fi:
- src = fi.read().decode('utf-8')
- soup = BeautifulSoup(src)
- body = soup.find('body')
- if body is not None:
- prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
- templ = self.navbar.generate(True, num, j, len(f),
+ if os.path.exists(last):
+ with open(last, 'rb') as fi:
+ src = fi.read().decode('utf-8')
+ soup = BeautifulSoup(src)
+ body = soup.find('body')
+ if body is not None:
+ prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+ templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix,
center=self.center_navbar)
- elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
- body.insert(len(body.contents), elem)
- with open(last, 'wb') as fi:
- fi.write(unicode(soup).encode('utf-8'))
- if len(feeds) == 0:
- raise Exception('All feeds are empty, aborting.')
+ elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+ body.insert(len(body.contents), elem)
+ with open(last, 'wb') as fi:
+ fi.write(unicode(soup).encode('utf-8'))
+ if len(feeds) == 0:
+ raise Exception('All feeds are empty, aborting.')
- if len(feeds) > 1:
- for i, f in enumerate(feeds):
- entries.append('feed_%d/index.html'%i)
- po = self.play_order_map.get(entries[-1], None)
- if po is None:
- self.play_order_counter += 1
- po = self.play_order_counter
- auth = getattr(f, 'author', None)
- if not auth:
- auth = None
- desc = getattr(f, 'description', None)
- if not desc:
- desc = None
- feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+ if len(feeds) > 1:
+ for i, f in enumerate(feeds):
+ entries.append('feed_%d/index.html'%i)
+ po = self.play_order_map.get(entries[-1], None)
+ if po is None:
+ self.play_order_counter += 1
+ po = self.play_order_counter
+ auth = getattr(f, 'author', None)
+ if not auth:
+ auth = None
+ desc = getattr(f, 'description', None)
+ if not desc:
+ desc = None
+ feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
- else:
- entries.append('feed_%d/index.html'%0)
- feed_index(0, toc)
+ else:
+ entries.append('feed_%d/index.html'%0)
+ feed_index(0, toc)
- for i, p in enumerate(entries):
- entries[i] = os.path.join(dir, p.replace('/', os.sep))
- opf.create_spine(entries)
- opf.set_toc(toc)
+ for i, p in enumerate(entries):
+ entries[i] = os.path.join(dir, p.replace('/', os.sep))
+ opf.create_spine(entries)
+ opf.set_toc(toc)
- with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
- opf.render(opf_file, ncx_file)
+ with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+ opf.render(opf_file, ncx_file)
diff --git a/resources/recipes/monden.recipe b/resources/recipes/monden.recipe
new file mode 100644
index 0000000000..22764ffe47
--- /dev/null
+++ b/resources/recipes/monden.recipe
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+monden.info
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Monden(BasicNewsRecipe):
+ title = u'Monden'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Arti\u015fti, interviuri, concerte.. MUZIC\u0102'
+ publisher = u'Monden'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Stiri,Muzica'
+ encoding = 'utf-8'
+ cover_url = 'http://www.monden.info/wp-content/uploads/2009/04/mondeninfo-logo.jpg'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='div', attrs={'id':'content'})
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['postAuthor']})
+ , dict(name='div', attrs={'class':['postLike']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':['postLike']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.monden.info/feed/')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/nationalgeoro.recipe b/resources/recipes/nationalgeoro.recipe
index a3c5727d38..8f989be74d 100644
--- a/resources/recipes/nationalgeoro.recipe
+++ b/resources/recipes/nationalgeoro.recipe
@@ -14,7 +14,7 @@ class NationalGeoRo(BasicNewsRecipe):
__author__ = u'Silviu Cotoar\u0103'
description = u'S\u0103 avem grij\u0103 de planet\u0103'
publisher = 'National Geographic'
- oldest_article = 5
+ oldest_article = 35
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
diff --git a/resources/recipes/nrc-nl-epub.recipe b/resources/recipes/nrc-nl-epub.recipe
index da9b9195ce..2d190e4d0a 100644
--- a/resources/recipes/nrc-nl-epub.recipe
+++ b/resources/recipes/nrc-nl-epub.recipe
@@ -1,14 +1,14 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
# -*- coding: utf-8 -*-
-#Based on Lars Jacob's Taz Digiabo recipe
+#Based on veezh's original recipe and Kovid Goyal's New York Times recipe
__license__ = 'GPL v3'
-__copyright__ = '2010, veezh'
+__copyright__ = '2011, Snaab'
'''
www.nrc.nl
'''
-import os, urllib2, zipfile
+import os, zipfile
import time
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
@@ -17,41 +17,59 @@ from calibre.ptempfile import PersistentTemporaryFile
class NRCHandelsblad(BasicNewsRecipe):
title = u'NRC Handelsblad'
- description = u'De EPUB-versie van NRC'
+ description = u'De ePaper-versie van NRC'
language = 'nl'
lang = 'nl-NL'
+ needs_subscription = True
- __author__ = 'veezh'
+ __author__ = 'Snaab'
conversion_options = {
'no_default_epub_cover' : True
}
+ def get_browser(self):
+ br = BasicNewsRecipe.get_browser()
+ if self.username is not None and self.password is not None:
+ br.open('http://login.nrc.nl/login')
+ br.select_form(nr=0)
+ br['username'] = self.username
+ br['password'] = self.password
+ br.submit()
+ return br
+
def build_index(self):
+
today = time.strftime("%Y%m%d")
+
domain = "http://digitaleeditie.nrc.nl"
url = domain + "/digitaleeditie/helekrant/epub/nrc_" + today + ".epub"
-# print url
+ #print url
try:
- f = urllib2.urlopen(url)
- except urllib2.HTTPError:
+ br = self.get_browser()
+ f = br.open(url)
+ except:
self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
raise ValueError('Krant van vandaag nog niet beschikbaar')
+
tmp = PersistentTemporaryFile(suffix='.epub')
self.report_progress(0,_('downloading epub'))
tmp.write(f.read())
- tmp.close()
-
- zfile = zipfile.ZipFile(tmp.name, 'r')
- self.report_progress(0,_('extracting epub'))
-
- zfile.extractall(self.output_dir)
+ f.close()
+ br.close()
+ if zipfile.is_zipfile(tmp):
+ try:
+ zfile = zipfile.ZipFile(tmp.name, 'r')
+ zfile.extractall(self.output_dir)
+ self.report_progress(0,_('extracting epub'))
+ except zipfile.BadZipfile:
+ self.report_progress(0,_('BadZip error, continuing'))
tmp.close()
- index = os.path.join(self.output_dir, 'content.opf')
+ index = os.path.join(self.output_dir, 'metadata.opf')
self.report_progress(1,_('epub downloaded and extracted'))
diff --git a/resources/recipes/nytimes_sports.recipe b/resources/recipes/nytimes_sports.recipe
new file mode 100644
index 0000000000..f394fc3232
--- /dev/null
+++ b/resources/recipes/nytimes_sports.recipe
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = 'zotzo'
+__docformat__ = 'restructuredtext en'
+"""
+http://fifthdown.blogs.nytimes.com/
+http://offthedribble.blogs.nytimes.com/
+http://thequad.blogs.nytimes.com/
+http://slapshot.blogs.nytimes.com/
+http://goal.blogs.nytimes.com/
+http://bats.blogs.nytimes.com/
+http://straightsets.blogs.nytimes.com/
+http://formulaone.blogs.nytimes.com/
+http://onpar.blogs.nytimes.com/
+"""
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class NYTimesSports(BasicNewsRecipe):
+ title = 'New York Times Sports Beat'
+ language = 'en'
+ __author__ = 'rylsfan'
+ description = 'Indepth sports from the New York Times'
+ publisher = 'The New York Times'
+ category = 'Sports'
+ oldest_article = 3
+ max_articles_per_feed = 25
+ no_stylesheets = True
+ language = 'en'
+ #cover_url ='http://bit.ly/h8F4DO'
+ feeds = [
+ (u'The Fifth Down', u'http://fifthdown.blogs.nytimes.com/feed/'),
+ (u'Off The Dribble', u'http://offthedribble.blogs.nytimes.com/feed/'),
+ (u'The Quad', u'http://thequad.blogs.nytimes.com/feed/'),
+ (u'Slap Shot', u'http://slapshot.blogs.nytimes.com/feed/'),
+ (u'Goal', u'http://goal.blogs.nytimes.com/feed/'),
+ (u'Bats', u'http://bats.blogs.nytimes.com/feed/'),
+ (u'Straight Sets', u'http://straightsets.blogs.nytimes.com/feed/'),
+ (u'Formula One', u'http://formulaone.blogs.nytimes.com/feed/'),
+ (u'On Par', u'http://onpar.blogs.nytimes.com/feed/'),
+ ]
+ keep_only_tags = [dict(name='div', attrs={'id':'header'}),
+ dict(name='h1'),
+ dict(name='h2'),
+ dict(name='div', attrs={'class':'entry-content'})]
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
diff --git a/resources/recipes/nytimes_tech.recipe b/resources/recipes/nytimes_tech.recipe
new file mode 100644
index 0000000000..7db2db9781
--- /dev/null
+++ b/resources/recipes/nytimes_tech.recipe
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = 'zotzo'
+__docformat__ = 'restructuredtext en'
+"""
+http://pogue.blogs.nytimes.com/
+"""
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class NYTimesTechnology(BasicNewsRecipe):
+ title = 'New York Times Technology Beat'
+ language = 'en'
+ __author__ = 'David Pogue'
+ description = 'The latest in technology from David Pogue'
+ publisher = 'The New York Times'
+ category = 'Technology'
+ oldest_article = 14
+ max_articles_per_feed = 25
+ no_stylesheets = True
+ language = 'en'
+ cover_url ='http://bit.ly/g0SKJT'
+ feeds = [
+ (u'Pogues Posts', u'http://pogue.blogs.nytimes.com/feed/'),
+ (u'Bits', u'http://bits.blogs.nytimes.com/feed/'),
+ (u'Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'),
+ (u'Open', u'http://open.blogs.nytimes.com/feed/')
+ ]
+ keep_only_tags = [dict(name='div', attrs={'id':'header'}),
+ dict(name='h1'),
+ dict(name='h2'),
+ dict(name='div', attrs={'class':'entry-content'})]
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif;
+ font-weight:bold;font-size:large;}
+
+ h2{font-family:Arial,Helvetica,sans-serif;
+ font-weight:normal;font-size:small;}
+
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
diff --git a/resources/recipes/post_today.recipe b/resources/recipes/post_today.recipe
new file mode 100644
index 0000000000..a86e154b84
--- /dev/null
+++ b/resources/recipes/post_today.recipe
@@ -0,0 +1,21 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1299061355(BasicNewsRecipe):
+ title = u'Post Today'
+ language = 'th'
+ __author__ = "Chotechai P."
+ oldest_article = 7
+ max_articles_per_feed = 100
+ cover_url = 'http://upload.wikimedia.org/wikipedia/th/2/2e/Posttoday_Logo.png'
+ feeds = [(u'Breaking News', u'http://www.posttoday.com/rss/src/breakingnews.xml'), (u'\u0e02\u0e48\u0e32\u0e27', u'http://www.posttoday.com/rss/src/news.xml'), (u'\u0e27\u0e34\u0e40\u0e04\u0e23\u0e32\u0e30\u0e2b\u0e4c', u'http://www.posttoday.com/rss/src/analyse.xml'), (u'\u0e40\u0e21\u0e32\u0e17\u0e4c\u0e01\u0e31\u0e19\u0e43\u0e2b\u0e49 z', u'http://www.posttoday.com/rss/src/mouth.xml'), (u'\u0e44\u0e17\u0e22\u0e42\u0e0b\u0e44\u0e0b\u0e15\u0e35\u0e49', u'http://www.posttoday.com/rss/src/thaisociety.xml'), (u'\u0e44\u0e25\u0e1f\u0e4c\u0e2a\u0e44\u0e15\u0e25\u0e4c', u'http://www.posttoday.com/rss/src/lifestyle.xml'), (u'\u0e0a\u0e35\u0e49\u0e0a\u0e48\u0e2d\u0e07\u0e23\u0e27\u0e22', u'http://www.posttoday.com/rss/src/moneyguide.xml'), (u'\u0e1a\u0e49\u0e32\u0e19-\u0e04\u0e2d\u0e19\u0e42\u0e14', u'http://www.posttoday.com/rss/src/homecondo.xml'), (u'\u0e22\u0e32\u0e19\u0e22\u0e19\u0e15\u0e4c', u'http://www.posttoday.com/rss/src/motor.xml'), (u'\u0e14\u0e34\u0e08\u0e34\u0e15\u0e2d\u0e25\u0e44\u0e25\u0e1f\u0e4c', u'http://www.posttoday.com/rss/src/digitallife.xml'), (u'\u0e01\u0e35\u0e2c\u0e32', u'http://www.posttoday.com/rss/src/sport.xml'), (u'\u0e23\u0e2d\u0e1a\u0e42\u0e25\u0e01', u'http://www.posttoday.com/rss/src/world.xml'), (u'\u0e01\u0e34\u0e19-\u0e40\u0e17\u0e35\u0e48\u0e22\u0e27', u'http://www.posttoday.com/rss/src/eattravel.xml'), (u'Mind & Soul', u'http://www.posttoday.com/rss/src/mindsoul.xml'), (u'\u0e1a\u0e25\u0e47\u0e2d\u0e01 \u0e1a\u0e01.', u'http://www.posttoday.com/rss/src/blogs.xml')]
+ keep_only_tags = []
+ keep_only_tags.append(dict(name = 'div', attrs = {'class' :
+'articleContents'}))
+
+ remove_tags = []
+ remove_tags.append(dict(name = 'label'))
+ remove_tags.append(dict(name = 'span'))
+ remove_tags.append(dict(name = 'div', attrs = {'class' :
+'socialBookmark'}))
+ remove_tags.append(dict(name = 'div', attrs = {'class' :
+'misc'}))
diff --git a/resources/recipes/promotor.recipe b/resources/recipes/promotor.recipe
new file mode 100644
index 0000000000..11a8499d7b
--- /dev/null
+++ b/resources/recipes/promotor.recipe
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+promotor.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Promotor(BasicNewsRecipe):
+ title = u'Promotor'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Auto-moto'
+ publisher = u'Promotor'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste,TV,Auto'
+ encoding = 'utf-8'
+ cover_url = 'http://www.promotor.ro/images/logo_promotor.gif'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+ .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
+ .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'casetatitluarticol'})
+ , dict(name='div', attrs={'style':'width: 273px; height: 210px; overflow: hidden; margin: 0pt auto;'})
+ , dict(name='div', attrs={'class':'textb'})
+ , dict(name='div', attrs={'class':'contentarticol'})
+ ]
+
+ remove_tags = [
+ dict(name='td', attrs={'class':['connect_widget_vertical_center connect_widget_button_cell']})
+ , dict(name='div', attrs={'class':['etichetagry']})
+ , dict(name='span', attrs={'class':['textb']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':['etichetagry']})
+ , dict(name='span', attrs={'class':['textb']})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.promotor.ro/rss')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/rbc_ru.recipe b/resources/recipes/rbc_ru.recipe
new file mode 100644
index 0000000000..4c377a334b
--- /dev/null
+++ b/resources/recipes/rbc_ru.recipe
@@ -0,0 +1,49 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1286819935(BasicNewsRecipe):
+ title = u'RBC.ru'
+ __author__ = 'A. Chewi'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ conversion_options = {'linearize_tables' : True}
+ remove_attributes = ['style']
+ language = 'ru'
+ timefmt = ' [%a, %d %b, %Y]'
+
+ keep_only_tags = [dict(name='h2', attrs={}),
+ dict(name='div', attrs={'class': 'box _ga1_on_'}),
+ dict(name='h1', attrs={'class': 'news_section'}),
+ dict(name='div', attrs={'class': 'news_body dotted_border_bottom'}),
+ dict(name='table', attrs={'class': 'newsBody'}),
+ dict(name='h2', attrs={'class': 'black'})]
+
+ feeds = [(u'Главные новости', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/mainnews.rss'),
+ (u'Политика', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/politics.rss'),
+ (u'Экономика', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/economics.rss'),
+ (u'Общество', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/society.rss'),
+ (u'Происшествия', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/incidents.rss'),
+ (u'Финансовые новости Quote.rbc.ru', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/quote.ru/mainnews.rss')]
+
+
+ remove_tags = [dict(name='div', attrs={'class': "video-frame"}),
+ dict(name='div', attrs={'class': "photo-container videoContainer videoSWFLinks videoPreviewSlideContainer notes"}),
+ dict(name='div', attrs={'class': "notes"}),
+ dict(name='div', attrs={'class': "publinks"}),
+ dict(name='a', attrs={'class': "print"}),
+ dict(name='div', attrs={'class': "photo-report_new notes newslider"}),
+ dict(name='div', attrs={'class': "videoContainer"}),
+ dict(name='div', attrs={'class': "videoPreviewSlideContainer"}),
+ dict(name='a', attrs={'class': "videoPreviewContainer"}),
+ dict(name='a', attrs={'class': "red"}),]
+
+ def preprocess_html(self, soup):
+ for alink in soup.findAll('a'):
+ if alink.string is not None:
+ tstr = alink.string
+ alink.replaceWith(tstr)
+ return soup
+
+ def print_version(self, url):
+ return url + '?print=true'
diff --git a/resources/recipes/seattle_times.recipe b/resources/recipes/seattle_times.recipe
index cd7f96fc8b..1d72df2991 100644
--- a/resources/recipes/seattle_times.recipe
+++ b/resources/recipes/seattle_times.recipe
@@ -69,12 +69,16 @@ class SeattleTimes(BasicNewsRecipe):
u'http://seattletimes.nwsource.com/rss/mostreadarticles.xml'),
]
+ keep_only_tags = [dict(id='content')]
remove_tags = [
- dict(name=['object','link','script'])
- ,dict(name='p', attrs={'class':'permission'})
+ dict(name=['object','link','script']),
+ {'class':['permission', 'note', 'bottomtools',
+ 'homedelivery']},
+ dict(id=["rightcolumn", 'footer', 'adbottom']),
]
def print_version(self, url):
+ return url
start_url, sep, rest_url = url.rpartition('_')
rurl, rsep, article_id = start_url.rpartition('/')
return u'http://seattletimes.nwsource.com/cgi-bin/PrintStory.pl?document_id=' + article_id
diff --git a/resources/recipes/statesman.recipe b/resources/recipes/statesman.recipe
index 1bbf94fa5b..727df2ae61 100644
--- a/resources/recipes/statesman.recipe
+++ b/resources/recipes/statesman.recipe
@@ -10,12 +10,14 @@ class AdvancedUserRecipe1278049615(BasicNewsRecipe):
max_articles_per_feed = 100
- feeds = [(u'News', u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
- (u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'),
- (u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'),
- (u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'),
- (u'Sports', u'http://www.statesman.com/section-rss.do?source=sports&includeSubSections=true')
- ]
+ feeds = [(u'News',
+ u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
+ (u'Local', u'http://www.statesman.com/section-rss.do?source=local&includeSubSections=true'),
+ (u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'),
+ (u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'),
+ (u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'),
+ (u'Sports', u'http://www.statesman.com/section-rss.do?source=sports&includeSubSections=true')
+ ]
masthead_url = "http://www.statesman.com/images/cmg-logo.gif"
#temp_files = []
#articles_are_obfuscated = True
@@ -28,8 +30,11 @@ class AdvancedUserRecipe1278049615(BasicNewsRecipe):
conversion_options = {'linearize_tables':True}
remove_tags = [
dict(name='div', attrs={'id':'cxArticleOptions'}),
+ {'class':['perma', 'comments', 'trail', 'share-buttons',
+ 'toggle_show_on']},
]
keep_only_tags = [
- dict(name='div', attrs={'class':'cxArticleHeader'}),
- dict(name='div', attrs={'id':'cxArticleBodyText'}),
+ dict(name='div', attrs={'class':'cxArticleHeader'}),
+ dict(name='div', attrs={'id':['cxArticleBodyText',
+ 'content']}),
]
diff --git a/resources/recipes/swiatkindle.recipe b/resources/recipes/swiatkindle.recipe
index a6bf225294..9847d1359e 100644
--- a/resources/recipes/swiatkindle.recipe
+++ b/resources/recipes/swiatkindle.recipe
@@ -7,6 +7,7 @@ swiatczytnikow.pl
'''
import re
+from calibre.web.feeds.news import BasicNewsRecipe
class swiatczytnikow(BasicNewsRecipe):
title = u'Swiat Czytnikow'
diff --git a/resources/recipes/thai_post_daily.recipe b/resources/recipes/thai_post_daily.recipe
new file mode 100644
index 0000000000..2be17cc37f
--- /dev/null
+++ b/resources/recipes/thai_post_daily.recipe
@@ -0,0 +1,17 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1299054026(BasicNewsRecipe):
+ title = u'Thai Post Daily'
+ __author__ = 'Chotechai P.'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ cover_url = 'http://upload.wikimedia.org/wikipedia/th/1/10/ThaiPost_Logo.png'
+ feeds = [(u'\u0e02\u0e48\u0e32\u0e27\u0e2b\u0e19\u0e49\u0e32\u0e2b\u0e19\u0e36\u0e48\u0e07', u'http://thaipost.net/taxonomy/term/1/all/feed'), (u'\u0e1a\u0e17\u0e1a\u0e23\u0e23\u0e13\u0e32\u0e18\u0e34\u0e01\u0e32\u0e23', u'http://thaipost.net/taxonomy/term/11/all/feed'), (u'\u0e40\u0e1b\u0e25\u0e27 \u0e2a\u0e35\u0e40\u0e07\u0e34\u0e19', u'http://thaipost.net/taxonomy/term/2/all/feed'), (u'\u0e2a\u0e20\u0e32\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19', u'http://thaipost.net/taxonomy/term/3/all/feed'), (u'\u0e16\u0e39\u0e01\u0e17\u0e38\u0e01\u0e02\u0e49\u0e2d', u'http://thaipost.net/taxonomy/term/4/all/feed'), (u'\u0e01\u0e32\u0e23\u0e40\u0e21\u0e37\u0e2d\u0e07', u'http://thaipost.net/taxonomy/term/5/all/feed'), (u'\u0e17\u0e48\u0e32\u0e19\u0e02\u0e38\u0e19\u0e19\u0e49\u0e2d\u0e22', u'http://thaipost.net/taxonomy/term/12/all/feed'), (u'\u0e1a\u0e17\u0e04\u0e27\u0e32\u0e21\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/66/all/feed'), (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/67/all/feed'), (u'\u0e1a\u0e31\u0e19\u0e17\u0e36\u0e01\u0e2b\u0e19\u0e49\u0e32 4', u'http://thaipost.net/taxonomy/term/13/all/feed'), (u'\u0e40\u0e2a\u0e35\u0e22\u0e1a\u0e0b\u0e36\u0e48\u0e07\u0e2b\u0e19\u0e49\u0e32', u'http://thaipost.net/taxonomy/term/64/all/feed'), (u'\u0e04\u0e31\u0e19\u0e1b\u0e32\u0e01\u0e2d\u0e22\u0e32\u0e01\u0e40\u0e25\u0e48\u0e32', u'http://thaipost.net/taxonomy/term/65/all/feed'), (u'\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01\u0e34\u0e08', u'http://thaipost.net/taxonomy/term/6/all/feed'), (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e44\u0e23\u0e49\u0e40\u0e07\u0e32', u'http://thaipost.net/taxonomy/term/14/all/feed'), (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e2b\u0e31\u0e01\u0e21\u0e38\u0e21', u'http://thaipost.net/taxonomy/term/71/all/feed'), (u'\u0e04\u0e34\u0e14\u0e40\u0e2b\u0e19\u0e37\u0e2d\u0e01\u0e23\u0e30\u0e41\u0e2a', u'http://thaipost.net/taxonomy/term/69/all/feed'), (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19', u'http://thaipost.net/taxonomy/term/68/all/feed'), (u'\u0e2d\u0e34\u0e42\u0e04\u0e42\u0e1f\u0e01\u0e31\u0e2a', u'http://thaipost.net/taxonomy/term/10/all/feed'), (u'\u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29\u0e32-\u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e2a\u0e38\u0e02', u'http://thaipost.net/taxonomy/term/7/all/feed'), (u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28', u'http://thaipost.net/taxonomy/term/8/all/feed'), (u'\u0e01\u0e35\u0e2c\u0e32', u'http://thaipost.net/taxonomy/term/9/all/feed')]
+
+ def print_version(self, url):
+ return url.replace(url, 'http://www.thaipost.net/print/' + url [32:])
+
+ remove_tags = []
+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-logo'}))
+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-site_name'}))
+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-breadcrumb'}))
diff --git a/resources/recipes/timesnewroman.recipe b/resources/recipes/timesnewroman.recipe
new file mode 100644
index 0000000000..12672aa888
--- /dev/null
+++ b/resources/recipes/timesnewroman.recipe
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+timesnewroman.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TimesNewRoman(BasicNewsRecipe):
+ title = u'Times New Roman'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Cotidian independent de umor voluntar'
+ publisher = u'Times New Roman'
+ oldest_article = 25
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste,Fun'
+ encoding = 'utf-8'
+ cover_url = 'http://www.timesnewroman.ro/templates/TNRV2/images/logo.gif'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='div', attrs={'id':'page'})
+ ]
+
+ remove_tags = [
+ dict(name='p', attrs={'class':['articleinfo']})
+ , dict(name='div',attrs={'class':['vergefacebooklike']})
+ , dict(name='div', attrs={'class':'cleared'})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':'cleared'})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://www.timesnewroman.ro/index.php?format=feed&type=rss')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/trombon.recipe b/resources/recipes/trombon.recipe
new file mode 100644
index 0000000000..1a4e488a43
--- /dev/null
+++ b/resources/recipes/trombon.recipe
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+trombon.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Trombon(BasicNewsRecipe):
+ title = u'Trombon'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Parodii si Pamflete'
+ publisher = u'Trombon'
+ oldest_article = 5
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare,Reviste,Fun'
+ encoding = 'utf-8'
+ cover_url = 'http://www.trombon.ro/i/trombon.gif'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'articol'})
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['info_2']})
+ , dict(name='iframe', attrs={'scrolling':['no']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'id':'article_vote'})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://feeds.feedburner.com/trombon/ABWb?format=xml')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/recipes/wallstreetro.recipe b/resources/recipes/wallstreetro.recipe
new file mode 100644
index 0000000000..8a66aa3673
--- /dev/null
+++ b/resources/recipes/wallstreetro.recipe
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+wall-street.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class WallStreetRo(BasicNewsRecipe):
+ title = u'Wall Street'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = ''
+ publisher = 'Wall Street'
+ oldest_article = 5
+ language = 'ro'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ category = 'Ziare'
+ encoding = 'utf-8'
+ cover_url = 'http://img.wall-street.ro/images/WS_new_logo.jpg'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'article_header'})
+ , dict(name='div', attrs={'class':'article_text'})
+ ]
+
+ remove_tags = [
+ dict(name='p', attrs={'class':['page_breadcrumbs']})
+ , dict(name='div', attrs={'id':['article_user_toolbox']})
+ , dict(name='p', attrs={'class':['comments_count_container']})
+ , dict(name='div', attrs={'class':['article_left_column']})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':'clearfloat'})
+ ]
+
+ feeds = [
+ (u'Feeds', u'http://img.wall-street.ro/rssfeeds/wall-street.xml')
+ ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/resources/template-functions.json b/resources/template-functions.json
index 5d9b6a11a3..fe4379d701 100644
--- a/resources/template-functions.json
+++ b/resources/template-functions.json
@@ -5,8 +5,9 @@
"strcat": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n res = ''\n for i in range(0, len(args)):\n res += args[i]\n return res\n",
"substr": "def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_):\n return str_[int(start_): len(str_) if int(end_) == 0 else int(end_)]\n",
"ifempty": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_empty):\n if val:\n return val\n else:\n return value_if_empty\n",
+ "select": "def evaluate(self, formatter, kwargs, mi, locals, val, key):\n if not val:\n return ''\n vals = [v.strip() for v in val.split(',')]\n for v in vals:\n if v.startswith(key+':'):\n return v[len(key)+1:]\n return ''\n",
"field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return formatter.get_value(name, [], kwargs)\n",
- "capitalize": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return capitalize(val)\n",
+ "subtract": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x - y)\n",
"list_item": "def evaluate(self, formatter, kwargs, mi, locals, val, index, sep):\n if not val:\n return ''\n index = int(index)\n val = val.split(sep)\n try:\n return val[index]\n except:\n return ''\n",
"shorten": "def evaluate(self, formatter, kwargs, mi, locals,\n val, leading, center_string, trailing):\n l = max(0, int(leading))\n t = max(0, int(trailing))\n if len(val) > l + len(center_string) + t:\n return val[0:l] + center_string + ('' if t == 0 else val[-t:])\n else:\n return val\n",
"re": "def evaluate(self, formatter, kwargs, mi, locals, val, pattern, replacement):\n return re.sub(pattern, replacement, val)\n",
@@ -19,11 +20,13 @@
"test": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):\n if val:\n return value_if_set\n else:\n return value_not_set\n",
"eval": "def evaluate(self, formatter, kwargs, mi, locals, template):\n from formatter import eval_formatter\n template = template.replace('[[', '{').replace(']]', '}')\n return eval_formatter.safe_format(template, locals, 'EVAL', None)\n",
"multiply": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x * y)\n",
- "subtract": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x - y)\n",
+ "format_date": "def evaluate(self, formatter, kwargs, mi, locals, val, format_string):\n if not val:\n return ''\n try:\n dt = parse_date(val)\n s = format_date(dt, format_string)\n except:\n s = 'BAD DATE'\n return s\n",
+ "capitalize": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return capitalize(val)\n",
"count": "def evaluate(self, formatter, kwargs, mi, locals, val, sep):\n return unicode(len(val.split(sep)))\n",
"lowercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.lower()\n",
"assign": "def evaluate(self, formatter, kwargs, mi, locals, target, value):\n locals[target] = value\n return value\n",
"switch": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if (len(args) % 2) != 1:\n raise ValueError(_('switch requires an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return args[i]\n if re.search(args[i], val):\n return args[i+1]\n i += 2\n",
"strcmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n v = strcmp(x, y)\n if v < 0:\n return lt\n if v == 0:\n return eq\n return gt\n",
+ "raw_field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return unicode(getattr(mi, name, None))\n",
"cmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n x = float(x if x else 0)\n y = float(y if y else 0)\n if x < y:\n return lt\n if x == y:\n return eq\n return gt\n"
}
\ No newline at end of file
diff --git a/resources/templates/fb2.xsl b/resources/templates/fb2.xsl
index 77c03cdc74..273edd71ae 100644
--- a/resources/templates/fb2.xsl
+++ b/resources/templates/fb2.xsl
@@ -4,6 +4,7 @@
# #
# #
# copyright 2002 Paul Henry Tremblay #
+# Copyright 2011 Kovid Goyal
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
@@ -19,21 +20,21 @@
#########################################################################
-->
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-