diff --git a/.bzrignore b/.bzrignore
index 4dc2b4b9d4..88fc9188fc 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -2,6 +2,7 @@
.check-cache.pickle
src/calibre/plugins
resources/images.qrc
+src/calibre/ebooks/oeb/display/test/*.js
src/calibre/manual/.build/
src/calibre/manual/cli/
src/calibre/manual/template_ref.rst
@@ -15,6 +16,7 @@ resources/ebook-convert-complete.pickle
resources/builtin_recipes.xml
resources/builtin_recipes.zip
resources/template-functions.json
+resources/display/*.js
setup/installer/windows/calibre/build.log
src/calibre/translations/.errors
src/cssutils/.svn/
diff --git a/Changelog.yaml b/Changelog.yaml
index f5f9895d6c..c2b9843036 100644
--- a/Changelog.yaml
+++ b/Changelog.yaml
@@ -19,6 +19,65 @@
# new recipes:
# - title:
+- version: 0.8.31
+ date: 2011-12-16
+
+ new features:
+ - title: "Conversion engine: When parsing invalid XHTML use the HTML 5 algorithm, for greater robustness."
+ tickets: [901466]
+
+ - title: "Driver for PocketBook 611 and Lenovo IdeaPad"
+
+ - title: "Allow customization of the order in which custom column editing is performed in the edit metadata dialog. Setting is available via Preferences->Tweaks."
+ tickets: [902731]
+
+ - title: "MOBI news download: Allow recipes to set a thumbnail for entries in the periodical table of contents. Currently used by the NYTimes, WSJ, Independent, GUardian and Globe and Mail recipes"
+ tickets: [900130]
+
+ - title: "E-book viewer: Add an option to the right click menu to search for the currently selected word"
+
+ - title: "Automatically hide the no internet connection available error message if the connection is restored before the user clicks OK"
+
+ bug fixes:
+ - title: "Fix comments not hidden in Book details panel when they are turned off via Preferences->Look & Feel->Book Details"
+
+ - title: "E-book viewer: Do not popup an error message if the user tries to use the mouse wheel to scroll before a document is loaded."
+ tickets: [903449]
+
+ - title: "Add docx to the list of ebook extensions."
+ tickets: [903452]
+
+ - title: "When downloading metadata from non-English Amazon websites, do not correct the case of book titles."
+
+ - title: "Fix regression in 0.8.30 that broke bulk conversion of a single book."
+ tickets: [902506]
+
+ - title: "When minimized to system tray do not display the no internet connection error as a dialog box, instead use a system tray notification"
+
+ - title: "Catalog generation: Include the series_index field for custom series columns as well"
+
+ - title: "Comic Input: Do not rescale images when using the Tablet output profile (or any output profile with a screen size larger than 3000x3000)"
+
+ - title: "HTML Input: Ignore unparseable URLs instead of crashing on them."
+ tickets: [902372]
+
+
+ improved recipes:
+ - La Republica
+ - CND
+ - Berliner Zeitung
+ - Zaman Gazetesi
+
+ new recipes:
+ - title: CND Weekly
+ author: Derek Liang
+
+ - title: descopera.org
+ author: Marius Ignatescu
+
+ - title: Rynek Zdrowia
+ author: spi630
+
- version: 0.8.30
date: 2011-12-09
diff --git a/recipes/berliner_zeitung.recipe b/recipes/berliner_zeitung.recipe
index 6df88835eb..c4190439c7 100644
--- a/recipes/berliner_zeitung.recipe
+++ b/recipes/berliner_zeitung.recipe
@@ -1,61 +1,44 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
-import re
+
+'''Calibre recipe to convert the RSS feeds of the Berliner Zeitung to an ebook.'''
class SportsIllustratedRecipe(BasicNewsRecipe) :
- __author__ = 'ape'
- __copyright__ = 'ape'
+ __author__ = 'a.peter'
+ __copyright__ = 'a.peter'
__license__ = 'GPL v3'
language = 'de'
- description = 'Berliner Zeitung'
- version = 2
+ description = 'Berliner Zeitung RSS'
+ version = 4
title = u'Berliner Zeitung'
timefmt = ' [%d.%m.%Y]'
+ #oldest_article = 7.0
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
publication_type = 'newspaper'
- keep_only_tags = [dict(name='div', attrs={'class':'teaser t_split t_artikel'})]
+ remove_tags_before = dict(name='div', attrs={'class':'newstype'})
+ remove_tags_after = [dict(id='article_text')]
- INDEX = 'http://www.berlinonline.de/berliner-zeitung/'
-
- def parse_index(self):
- base = 'http://www.berlinonline.de'
- answer = []
- articles = {}
- more = 1
-
- soup = self.index_to_soup(self.INDEX)
-
- # Get list of links to ressorts from index page
- ressort_list = soup.findAll('ul', attrs={'class': re.compile('ressortlist')})
- for ressort in ressort_list[0].findAll('a'):
- feed_title = ressort.string
- print 'Analyzing', feed_title
- if not articles.has_key(feed_title):
- articles[feed_title] = []
- answer.append(feed_title)
- # Load ressort page.
- feed = self.index_to_soup('http://www.berlinonline.de' + ressort['href'])
- # find mainbar div which contains the list of all articles
- for article_container in feed.findAll('div', attrs={'class': re.compile('mainbar')}):
- # iterate over all articles
- for article_teaser in article_container.findAll('div', attrs={'class': re.compile('teaser')}):
- # extract title of article
- if article_teaser.h3 != None:
- article = {'title' : article_teaser.h3.a.string, 'date' : u'', 'url' : base + article_teaser.h3.a['href'], 'description' : u''}
- articles[feed_title].append(article)
- else:
- # Skip teasers for missing photos
- if article_teaser.div.p.contents[0].find('Foto:') > -1:
- continue
- article = {'title': 'Weitere Artikel ' + str(more), 'date': u'', 'url': base + article_teaser.div.p.a['href'], 'description': u''}
- articles[feed_title].append(article)
- more += 1
- answer = [[key, articles[key]] for key in answer if articles.has_key(key)]
- return answer
+ feeds = [(u'Startseite', u'http://www.berliner-zeitung.de/home/10808950,10808950,view,asFeed.xml'),
+ (u'Politik', u'http://www.berliner-zeitung.de/home/10808018,10808018,view,asFeed.xml'),
+ (u'Wirtschaft', u'http://www.berliner-zeitung.de/home/10808230,10808230,view,asFeed.xml'),
+ (u'Berlin', u'http://www.berliner-zeitung.de/home/10809148,10809148,view,asFeed.xml'),
+ (u'Brandenburg', u'http://www.berliner-zeitung.de/home/10809312,10809312,view,asFeed.xml'),
+ (u'Wissenschaft', u'http://www.berliner-zeitung.de/home/10808894,10808894,view,asFeed.xml'),
+ (u'Digital', u'http://www.berliner-zeitung.de/home/10808718,10808718,view,asFeed.xml'),
+ (u'Kultur', u'http://www.berliner-zeitung.de/home/10809150,10809150,view,asFeed.xml'),
+ (u'Panorama', u'http://www.berliner-zeitung.de/home/10808334,10808334,view,asFeed.xml'),
+ (u'Sport', u'http://www.berliner-zeitung.de/home/10808794,10808794,view,asFeed.xml'),
+ (u'Hertha', u'http://www.berliner-zeitung.de/home/10808800,10808800,view,asFeed.xml'),
+ (u'Union', u'http://www.berliner-zeitung.de/home/10808802,10808802,view,asFeed.xml'),
+ (u'Verkehr', u'http://www.berliner-zeitung.de/home/10809298,10809298,view,asFeed.xml'),
+ (u'Polizei', u'http://www.berliner-zeitung.de/home/10809296,10809296,view,asFeed.xml'),
+ (u'Meinung', u'http://www.berliner-zeitung.de/home/10808020,10808020,view,asFeed.xml')]
def get_masthead_url(self):
- return 'http://www.berlinonline.de/.img/berliner-zeitung/blz_logo.gif'
+ return 'http://www.berliner-zeitung.de/image/view/10810244,7040611,data,logo.png'
+ def print_version(self, url):
+ return url.replace('.html', ',view,printVersion.html')
diff --git a/recipes/cnd.recipe b/recipes/cnd.recipe
index 0e8206d07a..c99166730c 100644
--- a/recipes/cnd.recipe
+++ b/recipes/cnd.recipe
@@ -23,7 +23,9 @@ class TheCND(BasicNewsRecipe):
remove_tags = [dict(name='table', attrs={'align':'right'}), dict(name='img', attrs={'src':'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
no_stylesheets = True
- preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')]
+ preprocess_regexps = [ (re.compile(r'', re.DOTALL), lambda m: ''),
+ (re.compile('
', re.DOTALL), lambda m: ''),
+ ]
def print_version(self, url):
if url.find('news/article.php') >= 0:
@@ -46,16 +48,18 @@ class TheCND(BasicNewsRecipe):
title = self.tag_to_string(a)
self.log('\tFound article: ', title, 'at', url)
date = a.nextSibling
+ if re.search('cm', date):
+ continue
if (date is not None) and len(date)>2:
if not articles.has_key(date):
articles[date] = []
articles[date].append({'title':title, 'url':url, 'description': '', 'date':''})
self.log('\t\tAppend to : ', date)
- self.log('log articles', articles)
+ #self.log('log articles', articles)
mostCurrent = sorted(articles).pop()
- self.title = 'CND ' + mostCurrent
-
+ self.title = 'CND ' + mostCurrent
+
feeds.append((self.title, articles[mostCurrent]))
return feeds
diff --git a/recipes/cnd_weekly.recipe b/recipes/cnd_weekly.recipe
new file mode 100644
index 0000000000..21839ae110
--- /dev/null
+++ b/recipes/cnd_weekly.recipe
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2010, Derek Liang '
+'''
+cnd.org
+'''
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TheCND(BasicNewsRecipe):
+
+ title = 'CND Weekly'
+ __author__ = 'Derek Liang'
+ description = ''
+ INDEX = 'http://cnd.org'
+ language = 'zh'
+ conversion_options = {'linearize_tables':True}
+
+ remove_tags_before = dict(name='div', id='articleHead')
+ remove_tags_after = dict(id='copyright')
+ remove_tags = [dict(name='table', attrs={'align':'right'}), dict(name='img', attrs={'src':'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
+ no_stylesheets = True
+
+ preprocess_regexps = [ (re.compile(r'', re.DOTALL), lambda m: ''),
+ (re.compile('', re.DOTALL), lambda m: ''),
+ ]
+
+ def print_version(self, url):
+ if url.find('news/article.php') >= 0:
+ return re.sub("^[^=]*", "http://my.cnd.org/modules/news/print.php?storyid", url)
+ else:
+ return re.sub("^[^=]*", "http://my.cnd.org/modules/wfsection/print.php?articleid", url)
+
+ def parse_index(self):
+ soup = self.index_to_soup(self.INDEX)
+
+ feeds = []
+ articles = {}
+
+ for a in soup.findAll('a', attrs={'target':'_cnd'}):
+ url = a['href']
+ if url.find('article.php') < 0 :
+ continue
+ if url.startswith('/'):
+ url = 'http://cnd.org'+url
+ title = self.tag_to_string(a)
+ date = a.nextSibling
+ if not re.search('cm', date):
+ continue
+ self.log('\tFound article: ', title, 'at', url, '@', date)
+ if (date is not None) and len(date)>2:
+ if not articles.has_key(date):
+ articles[date] = []
+ articles[date].append({'title':title, 'url':url, 'description': '', 'date':''})
+ self.log('\t\tAppend to : ', date)
+
+
+ sorted_articles = sorted(articles)
+ while sorted_articles:
+ mostCurrent = sorted_articles.pop()
+ self.title = 'CND ' + mostCurrent
+ feeds.append((self.title, articles[mostCurrent]))
+
+ return feeds
+
+ def populate_article_metadata(self, article, soup, first):
+ header = soup.find('h3')
+ self.log('header: ' + self.tag_to_string(header))
+ pass
+
diff --git a/recipes/datasport.recipe b/recipes/datasport.recipe
new file mode 100644
index 0000000000..235b262d48
--- /dev/null
+++ b/recipes/datasport.recipe
@@ -0,0 +1,15 @@
+__license__ = 'GPL v3'
+__author__ = 'faber1971'
+description = 'Italian soccer news website - v1.00 (17, December 2011)'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1324114272(BasicNewsRecipe):
+ title = u'Datasport'
+ language = 'it'
+ __author__ = 'faber1971'
+ oldest_article = 1
+ max_articles_per_feed = 100
+ auto_cleanup = True
+
+ feeds = [(u'Datasport', u'http://www.datasport.it/calcio/rss.xml')]
diff --git a/recipes/descopera_org.recipe b/recipes/descopera_org.recipe
index c76e88a588..c24046da12 100644
--- a/recipes/descopera_org.recipe
+++ b/recipes/descopera_org.recipe
@@ -1,27 +1,27 @@
-# -*- coding: utf-8 -*-
-'''
-descopera.org
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class Descopera(BasicNewsRecipe):
- title = u'Descoperă.org'
- __author__ = 'Marius Ignătescu'
- description = 'Descoperă. Placerea de a cunoaște'
- publisher = 'descopera.org'
- category = 'science, technology, culture, history, earth'
- language = 'ro'
- oldest_article = 14
- max_articles_per_feed = 100
- encoding = 'utf8'
- no_stylesheets = True
- extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
- keep_only_tags = [dict(name='div', attrs={'class':['post']})]
- remove_tags = [dict(name='div', attrs={'class':['topnav', 'box_a', 'shr-bookmarks shr-bookmarks-expand shr-bookmarks-center shr-bookmarks-bg-knowledge']})]
- remove_attributes = ['width','height']
- cover_url = 'http://www.descopera.org/wp-content/themes/dorg/styles/default/img/b_top.png?width=400'
- feeds = [(u'Articles', u'http://www.descopera.org/feed/')]
-
- def preprocess_html(self, soup):
- return self.adeify_images(soup)
+# -*- coding: utf-8 -*-
+'''
+descopera.org
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Descopera(BasicNewsRecipe):
+ title = u'Descoperă.org'
+ __author__ = 'Marius Ignătescu'
+ description = 'Descoperă. Placerea de a cunoaște'
+ publisher = 'descopera.org'
+ category = 'science, technology, culture, history, earth'
+ language = 'ro'
+ oldest_article = 14
+ max_articles_per_feed = 100
+ encoding = 'utf8'
+ no_stylesheets = True
+ extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+ keep_only_tags = [dict(name='div', attrs={'class':['post']})]
+ remove_tags = [dict(name='div', attrs={'class':['topnav', 'box_a', 'shr-bookmarks shr-bookmarks-expand shr-bookmarks-center shr-bookmarks-bg-knowledge']})]
+ remove_attributes = ['width','height']
+ cover_url = 'http://www.descopera.org/wp-content/themes/dorg/styles/default/img/b_top.png?width=400'
+ feeds = [(u'Articles', u'http://www.descopera.org/feed/')]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/recipes/fisco_oggi.recipe b/recipes/fisco_oggi.recipe
new file mode 100644
index 0000000000..2b99441a89
--- /dev/null
+++ b/recipes/fisco_oggi.recipe
@@ -0,0 +1,18 @@
+__license__ = 'GPL v3'
+__author__ = 'faber1971'
+description = 'Website of Italian Governament Income Agency (about revenue, taxation, taxes)- v1.00 (17, December 2011)'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1324112023(BasicNewsRecipe):
+ title = u'Fisco Oggi'
+ language = 'it'
+ __author__ = 'faber1971'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ auto_cleanup = True
+ remove_javascript = True
+ no_stylesheets = True
+
+ feeds = [(u'Attualit\xe0', u'http://www.fiscooggi.it/taxonomy/term/1/feed'), (u'Normativa', u'http://www.fiscooggi.it/taxonomy/term/5/feed'), (u'Giurisprudenza', u'http://www.fiscooggi.it/taxonomy/term/8/feed'), (u'Dati e statistiche', u'http://www.fiscooggi.it/taxonomy/term/12/feed'), (u'Analisi e commenti', u'http://www.fiscooggi.it/taxonomy/term/13/feed'), (u'Bilancio e contabilit\xe0', u'http://www.fiscooggi.it/taxonomy/term/576/feed'), (u'Dalle regioni', u'http://www.fiscooggi.it/taxonomy/term/16/feed'), (u'Dal mondo', u'http://www.fiscooggi.it/taxonomy/term/17/feed')]
+
diff --git a/recipes/globe_and_mail.recipe b/recipes/globe_and_mail.recipe
index 03061a2329..a7c78887c5 100644
--- a/recipes/globe_and_mail.recipe
+++ b/recipes/globe_and_mail.recipe
@@ -51,6 +51,13 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
{'class':['articleTools', 'pagination', 'Ads', 'topad',
'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]
+ def populate_article_metadata(self, article, soup, first):
+ if first and hasattr(self, 'add_toc_thumbnail'):
+ picdiv = soup.find('img')
+ if picdiv is not None:
+ self.add_toc_thumbnail(article,picdiv['src'])
+
+
#Use the mobile version rather than the web version
def print_version(self, url):
return url.rpartition('?')[0] + '?service=mobile'
diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe
index 840e8302af..8bff4f9be8 100644
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@@ -79,6 +79,12 @@ class Guardian(BasicNewsRecipe):
url = None
return url
+ def populate_article_metadata(self, article, soup, first):
+ if first and hasattr(self, 'add_toc_thumbnail'):
+ picdiv = soup.find('img')
+ if picdiv is not None:
+ self.add_toc_thumbnail(article,picdiv['src'])
+
def preprocess_html(self, soup):
# multiple html sections in soup, useful stuff in the first
diff --git a/recipes/hackernews.recipe b/recipes/hackernews.recipe
index 8d80008467..dca984eae5 100644
--- a/recipes/hackernews.recipe
+++ b/recipes/hackernews.recipe
@@ -9,9 +9,9 @@ from calibre.ptempfile import PersistentTemporaryFile
from urlparse import urlparse
import re
-class HackerNews(BasicNewsRecipe):
- title = 'Hacker News'
- __author__ = 'Tom Scholl'
+class HNWithCommentsLink(BasicNewsRecipe):
+ title = 'HN With Comments Link'
+ __author__ = 'Tom Scholl & David Kerschner'
description = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.'
publisher = 'Y Combinator'
category = 'news, programming, it, technology'
@@ -80,6 +80,11 @@ class HackerNews(BasicNewsRecipe):
body = body + comments
return u'' + title + u'' + body + ''
+ def parse_feeds(self):
+ a = super(HNWithCommentsLink, self).parse_feeds()
+ self.hn_articles = a[0].articles
+ return a
+
def get_obfuscated_article(self, url):
if url.startswith('http://news.ycombinator.com'):
content = self.get_hn_content(url)
@@ -97,6 +102,13 @@ class HackerNews(BasicNewsRecipe):
else:
content = self.get_readable_content(url)
+ article = 0
+ for a in self.hn_articles:
+ if a.url == url:
+ article = a
+
+ content = re.sub(r'