From e48a4c305ee978ca198612fb96316a675b5e7905 Mon Sep 17 00:00:00 2001 From: John Schember Date: Wed, 26 Jan 2011 09:34:58 -0500 Subject: [PATCH 1/3] PML Input: Retain soft scene breaks. --- src/calibre/ebooks/pml/pmlconverter.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index a0814ee0dd..3fdd627d7e 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -499,14 +499,15 @@ class PML_HTMLizer(object): indent_state = {'t': False, 'T': False} adv_indent_val = '' + # Keep track of the number of empty lines + # between paragraphs. When we reach a set number + # we assume it's a soft scene break. + empty_count = 0 for s in self.STATES: self.state[s] = [False, '']; for line in pml.splitlines(): - if not line: - continue - parsed = [] empty = True basic_indent = indent_state['t'] @@ -592,7 +593,12 @@ class PML_HTMLizer(object): parsed.append(text) c = line.read(1) - if not empty: + if empty: + empty_count += 1 + if empty_count == 3: + output.append('

 

') + else: + empty_count = 0 text = self.end_line() parsed.append(text) From 59d91a44c75275a5533bfd229bacef93592f871a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 26 Jan 2011 08:42:37 -0700 Subject: [PATCH 2/3] Don't crash if the prefs stored in the db are corrupted --- src/calibre/library/prefs.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/calibre/library/prefs.py b/src/calibre/library/prefs.py index b125fe9067..2921e1c936 100644 --- a/src/calibre/library/prefs.py +++ b/src/calibre/library/prefs.py @@ -9,6 +9,7 @@ import json from calibre.constants import preferred_encoding from calibre.utils.config import to_json, from_json +from calibre import prints class DBPrefs(dict): @@ -17,7 +18,11 @@ class DBPrefs(dict): self.db = db self.defaults = {} for key, val in self.db.conn.get('SELECT key,val FROM preferences'): - val = self.raw_to_object(val) + try: + val = self.raw_to_object(val) + except: + prints('Failed to read value for:', key, 'from db') + continue dict.__setitem__(self, key, val) def raw_to_object(self, raw): From e4de217f79d0991788fac1df21c8be3d586d43f1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 26 Jan 2011 08:47:47 -0700 Subject: [PATCH 3/3] Remove tweets link from economist download --- resources/recipes/economist.recipe | 7 +++++-- resources/recipes/economist_free.recipe | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/resources/recipes/economist.recipe b/resources/recipes/economist.recipe index 95b4a2ae05..17bf4c8c20 100644 --- a/resources/recipes/economist.recipe +++ b/resources/recipes/economist.recipe @@ -22,8 +22,11 @@ class Economist(BasicNewsRecipe): oldest_article = 7.0 cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg' - remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']), - dict(attrs={'class':['dblClkTrk', 'ec-article-info']})] + remove_tags = [ + dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']), + dict(attrs={'class':['dblClkTrk', 'ec-article-info']}), + {'class': lambda x: x and 'share-links-header' in x}, + ] keep_only_tags = [dict(id='ec-article-body')] needs_subscription = False no_stylesheets = True diff --git a/resources/recipes/economist_free.recipe b/resources/recipes/economist_free.recipe index 321c7d29ce..f4a4efd932 100644 --- a/resources/recipes/economist_free.recipe +++ b/resources/recipes/economist_free.recipe @@ -16,8 +16,11 @@ class Economist(BasicNewsRecipe): oldest_article = 7.0 cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg' - remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']), - dict(attrs={'class':['dblClkTrk', 'ec-article-info']})] + remove_tags = [ + dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']), + dict(attrs={'class':['dblClkTrk', 'ec-article-info']}), + {'class': lambda x: x and 'share-links-header' in x}, + ] keep_only_tags = [dict(id='ec-article-body')] no_stylesheets = True preprocess_regexps = [(re.compile('.*', re.DOTALL),