From 2c37b1c36bcca29da702244ce592a7f472669c0a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 8 Apr 2010 17:19:47 +0530 Subject: [PATCH] PC Quest and Living Digital. News download: Implement is_link_wanted a method for context sensitive link filtering --- resources/recipes/living_digital.recipe | 14 ++++++++++++++ resources/recipes/pc_quest_india.recipe | 14 ++++++++++++++ src/calibre/ebooks/mobi/reader.py | 5 ++++- src/calibre/web/feeds/news.py | 20 +++++++++++++++++--- src/calibre/web/fetch/simple.py | 15 +++++++++++++-- 5 files changed, 62 insertions(+), 6 deletions(-) create mode 100644 resources/recipes/living_digital.recipe create mode 100644 resources/recipes/pc_quest_india.recipe diff --git a/resources/recipes/living_digital.recipe b/resources/recipes/living_digital.recipe new file mode 100644 index 0000000000..d8bba7b9f8 --- /dev/null +++ b/resources/recipes/living_digital.recipe @@ -0,0 +1,14 @@ +from calibre.web.feeds.news import CalibrePeriodical + +class LivingDigital(CalibrePeriodical): + + title = 'Living Digital' + calibre_periodicals_slug = 'living-digital' + + description = ''' + Catch the latest buzz in the digital world with Living Digital. Enjoy + reviews, news, features and recommendations on a wide range of consumer + technology products - from smartphones to flat panel TVs, netbooks to + cameras, and many more consumer lifestyle gadgets. + ''' + language = 'en_IN' diff --git a/resources/recipes/pc_quest_india.recipe b/resources/recipes/pc_quest_india.recipe new file mode 100644 index 0000000000..3528731d47 --- /dev/null +++ b/resources/recipes/pc_quest_india.recipe @@ -0,0 +1,14 @@ +from calibre.web.feeds.news import CalibrePeriodical + +class PCQ(CalibrePeriodical): + + title = 'PCQuest' + calibre_periodicals_slug = 'pc-quest-india' + + description = ''' + Buying a tech product? Seeking a tech solution? Consult PCQuest, India's + market-leading selection and implementation guide for the latest + technologies: servers, business apps, security, open source, gadgets and + more. + ''' + language = 'en_IN' diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index e84ae2547c..84e6208086 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -296,6 +296,10 @@ class MobiReader(object): self.add_anchors() self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore') + self.processed_html = self.processed_html.replace('<', + self.processed_html) + for pat in ENCODING_PATS: self.processed_html = pat.sub('', self.processed_html) e2u = functools.partial(entity_to_unicode, @@ -320,7 +324,6 @@ class MobiReader(object): from lxml.html import soupparser self.log.warning('Malformed markup, parsing using BeautifulSoup') try: - self.processed_html = self.processed_html.replace('