From aedb2cf959cb153b289e6c4b1ca5cda8d0b8fd54 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 9 Mar 2009 18:40:34 -0700 Subject: [PATCH 1/7] Fix #2025 (Many LIT file covers detected sideways) --- src/calibre/ebooks/metadata/lit.py | 16 ++++++++++++---- src/calibre/gui2/dialogs/config.py | 2 +- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py index c38450c64c..2129af76dd 100644 --- a/src/calibre/ebooks/metadata/lit.py +++ b/src/calibre/ebooks/metadata/lit.py @@ -19,15 +19,23 @@ def get_metadata(stream): for item in opf.iterguide(): if 'cover' not in item.get('type', '').lower(): continue + ctype = item.get('type') href = item.get('href', '') candidates = [href, href.replace('&', '%26')] for item in litfile.manifest.values(): if item.path in candidates: - covers.append(item.internal) + try: + covers.append((litfile.get_file('/data/'+item.internal), + ctype)) + except: + pass break - covers = [litfile.get_file('/data/' + i) for i in covers] - covers.sort(cmp=lambda x, y:cmp(len(x), len(y))) - mi.cover_data = ('jpg', covers[-1]) + covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True) + idx = 0 + if len(covers) > 1: + if covers[1][1] == covers[1][0]+'-standard': + idx = 1 + mi.cover_data = ('jpg', covers[idx][0]) return mi def main(args=sys.argv): diff --git a/src/calibre/gui2/dialogs/config.py b/src/calibre/gui2/dialogs/config.py index 5353f24544..9958ce53fa 100644 --- a/src/calibre/gui2/dialogs/config.py +++ b/src/calibre/gui2/dialogs/config.py @@ -196,7 +196,7 @@ class ConfigDialog(QDialog, Ui_Dialog): self.language.addItem(language_codes[lang], QVariant(lang)) else: lang = 'en' - self.language.addItem('English', 'en') + self.language.addItem('English', QVariant('en')) items = [(l, language_codes[l]) for l in translations.keys() \ if l != lang] if lang != 'en': From 8ea72440c8813920f9d077d90c11e0abc37b858d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 10 Mar 2009 10:40:56 -0700 Subject: [PATCH 2/7] New recipe for The Daily Mail UK by RufusA. Fix #998 (HTML2LRF and empty headings) --- src/calibre/ebooks/lrf/html/convert_from.py | 4 +++ src/calibre/web/feeds/recipes/__init__.py | 2 +- .../web/feeds/recipes/recipe_daily_mail.py | 33 +++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 src/calibre/web/feeds/recipes/recipe_daily_mail.py diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 2bd63d1d8f..c72bcfbfe5 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -99,6 +99,10 @@ class HTMLConverter(object, LoggingInterface): # Replace common line break patterns with line breaks (re.compile(r'

( |\s)*

', re.IGNORECASE), lambda m: '
'), + # Replace empty headers with line breaks + (re.compile(r'( |\s)*', + re.IGNORECASE), lambda m: '
'), + # Replace entities (re.compile(ur'&(\S+?);'), partial(entity_to_unicode, exceptions=['lt', 'gt', 'amp'])), diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index b2c18b26a8..793d5cf45d 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -33,7 +33,7 @@ recipe_modules = ['recipe_' + r for r in ( 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine', 'al_jazeera', 'winsupersite', 'borba', 'courrierinternational', 'lamujerdemivida', 'soldiers', 'theonion', 'news_times', - 'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', + 'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', 'daily_mail', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_daily_mail.py b/src/calibre/web/feeds/recipes/recipe_daily_mail.py new file mode 100644 index 0000000000..c64e328bf2 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_daily_mail.py @@ -0,0 +1,33 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class TheDailyMail(BasicNewsRecipe): + title = u'The Daily Mail' + oldest_article = 2 + language = _('English') + author = 'RufusA' + simultaneous_downloads= 1 + max_articles_per_feed = 50 + + extra_css = 'h1 {text-align: left;}' + + remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ] + remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'}) + remove_tags_before = dict(name='div', attrs={'id':'content'}) + no_stylesheets = True + + feeds = [ + (u'Home', u'http://www.dailymail.co.uk/home/index.rss'), + (u'News', u'http://www.dailymail.co.uk/news/index.rss'), + (u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'), + (u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'), + (u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'), + (u'Health', u'http://www.dailymail.co.uk/health/index.rss'), + (u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'), + (u'Money', u'http://www.dailymail.co.uk/money/index.rss'), + (u'Property', u'http://www.dailymail.co.uk/property/index.rss'), + (u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'), + (u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')] + + def print_version(self, url): + main = url.partition('?')[0] + return main + '?printingPage=true' From 72581c6e32f301db75ede4e9f62fb97fd17902ce Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 10 Mar 2009 12:56:47 -0700 Subject: [PATCH 3/7] MOBI Input: Strip and
tags as ADE refuses to hadle them gracefully when converted to EPUB --- src/calibre/ebooks/mobi/reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 3ca1fd6c18..967a68aea8 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -300,7 +300,7 @@ class MobiReader(object): mobi_version = self.book_header.mobi_version for tag in root.iter(etree.Element): if tag.tag in ('country-region', 'place', 'placetype', 'placename', - 'state', 'city'): + 'state', 'city', 'street', 'address'): tag.tag = 'span' for key in tag.attrib.keys(): tag.attrib.pop(key) From 06e5659d7968234f78d11e4c7c6ee98dd73fab77 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 10 Mar 2009 13:01:18 -0700 Subject: [PATCH 4/7] IGN:Better error handling when library is on a removable device that no longer exists --- src/calibre/gui2/main.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 163a9d8bd0..4ecfc08f58 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -1406,7 +1406,15 @@ class Main(MainWindow, Ui_MainWindow): dir = os.path.expanduser('~/Library') self.library_path = os.path.abspath(dir) if not os.path.exists(self.library_path): - os.makedirs(self.library_path) + try: + os.makedirs(self.library_path) + except: + self.library_path = os.path.expanduser('~/Library') + error_dialog(self, _('Invalid library location'), + _('Could not access %s. Using %s as the library.')% + (repr(self.library_path), repr(self.library_path)) + ).exec_() + os.makedirs(self.library_path) def read_settings(self): From a52286c594bb4b67d07db232b258e9a9fbb9f800 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 10 Mar 2009 13:02:11 -0700 Subject: [PATCH 5/7] IGN:Fix handling of input files that specify an encoding that python doesn't support --- src/calibre/ebooks/chardet/__init__.py | 3 ++- src/calibre/trac/donations/server.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py index af6d724883..971ac9bc9a 100644 --- a/src/calibre/ebooks/chardet/__init__.py +++ b/src/calibre/ebooks/chardet/__init__.py @@ -99,7 +99,8 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, try: raw = raw.decode(encoding, 'replace') except LookupError: - raw = raw.decode('utf-8', 'replace') + encoding = 'utf-8' + raw = raw.decode(encoding, 'replace') if strip_encoding_pats: raw = strip_encoding_declarations(raw) diff --git a/src/calibre/trac/donations/server.py b/src/calibre/trac/donations/server.py index 8e7a096353..24174db801 100644 --- a/src/calibre/trac/donations/server.py +++ b/src/calibre/trac/donations/server.py @@ -196,7 +196,7 @@ class Server(object): def calculate_month_trend(self, days=31): stats = self.get_slice(date.today()-timedelta(days=days-1), date.today()) - fig = plt.figure(2, (12, 4), 96)#, facecolor, edgecolor, frameon, FigureClass) + fig = plt.figure(2, (10, 4), 96)#, facecolor, edgecolor, frameon, FigureClass) fig.clear() ax = fig.add_subplot(111) x = list(range(days-1, -1, -1)) @@ -216,7 +216,7 @@ Donors per day: %(dpd).2f ad=stats.average_deviation, dpd=len(stats.totals)/float(stats.period.days), ) - text = ax.annotate(text, (0.6, 0.65), textcoords='axes fraction') + text = ax.annotate(text, (0.5, 0.65), textcoords='axes fraction') fig.savefig(self.MONTH_TRENDS) def calculate_trend(self): From 74486fc40df983a35ca4230a3d26904b0b4cf12b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 10 Mar 2009 19:23:43 -0700 Subject: [PATCH 6/7] EPUB Output: Strip
tags since ADE runs screaming when it sees one. Fixes #2029 (IHT resetting P505) --- src/calibre/ebooks/epub/from_html.py | 3 +++ src/calibre/web/feeds/__init__.py | 3 +-- src/calibre/web/feeds/news.py | 3 ++- src/calibre/web/feeds/recipes/recipe_iht.py | 8 +++++++- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index ffe402538f..47d278a2b6 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -197,6 +197,9 @@ class HTMLProcessor(Processor, Rationalizer): if not tag.text and not tag.get('src', False): tag.getparent().remove(tag) + for tag in self.root.xpath('//form'): + tag.getparent().remove(tag) + if self.opts.linearize_tables: for tag in self.root.xpath('//table | //tr | //th | //td'): tag.tag = 'div' diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index 3f0ec414a2..4a0f6b47f7 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -98,7 +98,7 @@ class Feed(object): if len(self.articles) >= max_articles_per_feed: break self.parse_article(item) - + def populate_from_preparsed_feed(self, title, articles, oldest_article=7, max_articles_per_feed=100): @@ -156,7 +156,6 @@ class Feed(object): content = None if not link and not content: return - article = Article(id, title, link, description, published, content) delta = datetime.utcnow() - article.utctime if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article: diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 4773d551c3..13a79201e2 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1012,7 +1012,8 @@ class BasicNewsRecipe(object, LoggingInterface): feed.description = unicode(err) parsed_feeds.append(feed) self.log_exception(msg) - + + return parsed_feeds @classmethod diff --git a/src/calibre/web/feeds/recipes/recipe_iht.py b/src/calibre/web/feeds/recipes/recipe_iht.py index c30be70dea..1bee27d061 100644 --- a/src/calibre/web/feeds/recipes/recipe_iht.py +++ b/src/calibre/web/feeds/recipes/recipe_iht.py @@ -3,6 +3,7 @@ __copyright__ = '2008, Derry FitzGerald' ''' iht.com ''' +import re from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import PersistentTemporaryFile @@ -16,7 +17,12 @@ class InternationalHeraldTribune(BasicNewsRecipe): max_articles_per_feed = 10 no_stylesheets = True - remove_tags = [dict(name='div', attrs={'class':'footer'})] + remove_tags = [dict(name='div', attrs={'class':'footer'}), + dict(name=['form'])] + preprocess_regexps = [ + (re.compile(r'