diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index ffe402538f..47d278a2b6 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -197,6 +197,9 @@ class HTMLProcessor(Processor, Rationalizer): if not tag.text and not tag.get('src', False): tag.getparent().remove(tag) + for tag in self.root.xpath('//form'): + tag.getparent().remove(tag) + if self.opts.linearize_tables: for tag in self.root.xpath('//table | //tr | //th | //td'): tag.tag = 'div' diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index 3f0ec414a2..4a0f6b47f7 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -98,7 +98,7 @@ class Feed(object): if len(self.articles) >= max_articles_per_feed: break self.parse_article(item) - + def populate_from_preparsed_feed(self, title, articles, oldest_article=7, max_articles_per_feed=100): @@ -156,7 +156,6 @@ class Feed(object): content = None if not link and not content: return - article = Article(id, title, link, description, published, content) delta = datetime.utcnow() - article.utctime if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article: diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 4773d551c3..13a79201e2 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1012,7 +1012,8 @@ class BasicNewsRecipe(object, LoggingInterface): feed.description = unicode(err) parsed_feeds.append(feed) self.log_exception(msg) - + + return parsed_feeds @classmethod diff --git a/src/calibre/web/feeds/recipes/recipe_iht.py b/src/calibre/web/feeds/recipes/recipe_iht.py index c30be70dea..1bee27d061 100644 --- a/src/calibre/web/feeds/recipes/recipe_iht.py +++ b/src/calibre/web/feeds/recipes/recipe_iht.py @@ -3,6 +3,7 @@ __copyright__ = '2008, Derry FitzGerald' ''' iht.com ''' +import re from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import PersistentTemporaryFile @@ -16,7 +17,12 @@ class InternationalHeraldTribune(BasicNewsRecipe): max_articles_per_feed = 10 no_stylesheets = True - remove_tags = [dict(name='div', attrs={'class':'footer'})] + remove_tags = [dict(name='div', attrs={'class':'footer'}), + dict(name=['form'])] + preprocess_regexps = [ + (re.compile(r'