EPUB Output: Strip <form> tags since ADE runs screaming when it sees one. Fixes #2029 (IHT resetting P505)

This commit is contained in:
Kovid Goyal 2009-03-10 19:23:43 -07:00
parent a52286c594
commit 74486fc40d
4 changed files with 13 additions and 4 deletions

View File

@ -197,6 +197,9 @@ class HTMLProcessor(Processor, Rationalizer):
if not tag.text and not tag.get('src', False):
tag.getparent().remove(tag)
for tag in self.root.xpath('//form'):
tag.getparent().remove(tag)
if self.opts.linearize_tables:
for tag in self.root.xpath('//table | //tr | //th | //td'):
tag.tag = 'div'

View File

@ -156,7 +156,6 @@ class Feed(object):
content = None
if not link and not content:
return
article = Article(id, title, link, description, published, content)
delta = datetime.utcnow() - article.utctime
if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article:

View File

@ -1013,6 +1013,7 @@ class BasicNewsRecipe(object, LoggingInterface):
parsed_feeds.append(feed)
self.log_exception(msg)
return parsed_feeds
@classmethod

View File

@ -3,6 +3,7 @@ __copyright__ = '2008, Derry FitzGerald'
'''
iht.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
@ -16,7 +17,12 @@ class InternationalHeraldTribune(BasicNewsRecipe):
max_articles_per_feed = 10
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'class':'footer'})]
remove_tags = [dict(name='div', attrs={'class':'footer'}),
dict(name=['form'])]
preprocess_regexps = [
(re.compile(r'<!-- webtrends.*', re.DOTALL),
lambda m:'</body></html>')
]
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
feeds = [