From efb222c534e7b68f6cf562517d79e56cdf90f20e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 19:59:18 -0700 Subject: [PATCH 1/3] ... --- resources/recipes/dallas.recipe | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/resources/recipes/dallas.recipe b/resources/recipes/dallas.recipe index 8666fbef30..d46427caa9 100644 --- a/resources/recipes/dallas.recipe +++ b/resources/recipes/dallas.recipe @@ -7,22 +7,29 @@ class DallasNews(BasicNewsRecipe): max_articles_per_feed = 25 no_stylesheets = True - remove_tags_before = dict(name='h2', attrs={'class':'vitstoryheadline'}) - remove_tags_after = dict(name='div', attrs={'style':'width: 100%; clear: right'}) - remove_tags_after = dict(name='div', attrs={'id':'article_tools_bottom'}) + use_embedded_content = False + remove_tags_before = dict(name='h1') + keep_only_tags = {'class':lambda x: x and 'article' in x} remove_tags = [ - dict(name='iframe'), - dict(name='div', attrs={'class':'biblockmore'}), - dict(name='div', attrs={'style':'width: 100%; clear: right'}), - dict(name='div', attrs={'id':'article_tools_bottom'}), - #dict(name='ul', attrs={'class':'articleTools'}), + {'class':['DMNSocialTools', 'article ', 'article first ', 'article premium']}, ] feeds = [ - ('Latest News', 'http://www.dallasnews.com/newskiosk/rss/dallasnewslatestnews.xml'), - ('Local News', 'http://www.dallasnews.com/newskiosk/rss/dallasnewslocalnews.xml'), - ('Nation and World', 'http://www.dallasnews.com/newskiosk/rss/dallasnewsnationworld.xml'), - ('Politics', 'http://www.dallasnews.com/newskiosk/rss/dallasnewsnationalpolitics.xml'), - ('Science', 'http://www.dallasnews.com/newskiosk/rss/dallasnewsscience.xml'), + ('Local News', + 'http://www.dallasnews.com/news/politics/local-politics/?rss'), + ('National Politics', + 'http://www.dallasnews.com/news/politics/national-politic/?rss'), + ('State Politics', + 'http://www.dallasnews.com/news/politics/state-politics/?rss'), + ('Religion', + 'http://www.dallasnews.com/news/religion/?rss'), + ('Crime', + 'http://www.dallasnews.com/news/crime/headlines/?rss'), + ('Celebrity News', + 'http://www.dallasnews.com/entertainment/celebrity-news/?rss&listname=TopStories'), + ('Nation', + 'http://www.dallasnews.com/news/nation-world/nation/?rss'), + ('World', + 'http://www.dallasnews.com/news/nation-world/world/?rss'), ] From d6b446de729abc94147b0b49ffaa08d00a32a878 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 20:25:51 -0700 Subject: [PATCH 2/3] ... --- src/calibre/ebooks/conversion/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 087d8ed486..f728bec52b 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -459,7 +459,7 @@ class HTMLPreProcessor(object): try: search_re = re.compile(search_pattern) replace_txt = getattr(self.extra_opts, replace, '') - if replace_txt == None: + if not replace_txt: replace_txt = '' rules.insert(0, (search_re, replace_txt)) except Exception as e: From 9a8f0398be60d46cf97972d373adaf0310b48e64 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 20:36:34 -0700 Subject: [PATCH 3/3] ... --- src/calibre/gui2/convert/heuristics.ui | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/convert/heuristics.ui b/src/calibre/gui2/convert/heuristics.ui index 4358512996..6863fcf8e6 100644 --- a/src/calibre/gui2/convert/heuristics.ui +++ b/src/calibre/gui2/convert/heuristics.ui @@ -17,11 +17,14 @@ - <b>Heuristic processing</b> means that calibre will scan your book for common patterns and fix them. As the name implies, this involves guesswork, which means that it could end up worsening the result of a conversion, if calibre guesses wrong. Therefore, it is disabled by default. Often, if a conversion does not turn out as you expect, turning on heuristics can improve matters. + <b>Heuristic processing</b> means that calibre will scan your book for common patterns and fix them. As the name implies, this involves guesswork, which means that it could end up worsening the result of a conversion, if calibre guesses wrong. Therefore, it is disabled by default. Often, if a conversion does not turn out as you expect, turning on heuristics can improve matters. Read more about the various heuristic processing options in the <a href="http://calibre-ebook.com/user_manual/conversion.html#heuristic-processing">User Manual</a>. true + + true +