diff --git a/resources/recipes/foxnews.recipe b/resources/recipes/foxnews.recipe index e7e76390b5..916bd28ad2 100644 --- a/resources/recipes/foxnews.recipe +++ b/resources/recipes/foxnews.recipe @@ -4,7 +4,6 @@ __copyright__ = '2010, Darko Miletic ' foxnews.com ''' -import re from calibre.web.feeds.news import BasicNewsRecipe class FoxNews(BasicNewsRecipe): @@ -21,11 +20,10 @@ class FoxNews(BasicNewsRecipe): language = 'en' publication_type = 'newsportal' remove_empty_feeds = True - extra_css = ' body{font-family: Arial,sans-serif } img{margin-bottom: 0.4em} .caption{font-size: x-small} ' - - preprocess_regexps = [ - (re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '') - ] + extra_css = """ + body{font-family: Arial,sans-serif } + .caption{font-size: x-small} + """ conversion_options = { 'comment' : description @@ -34,27 +32,15 @@ class FoxNews(BasicNewsRecipe): , 'language' : language } - remove_attributes = ['xmlns'] - - keep_only_tags = [ - dict(name='div', attrs={'id' :['story','browse-story-content']}) - ,dict(name='div', attrs={'class':['posts articles','slideshow']}) - ,dict(name='h4' , attrs={'class':'storyDate'}) - ,dict(name='h1' , attrs={'xmlns:functx':'http://www.functx.com'}) - ,dict(name='div', attrs={'class':'authInfo'}) - ,dict(name='div', attrs={'id':'articleCont'}) - ] + remove_attributes = ['xmlns','lang'] remove_tags = [ - dict(name='div', attrs={'class':['share-links','quigo quigo2','share-text','storyControls','socShare','btm-links']}) - ,dict(name='div', attrs={'id' :['otherMedia','loomia_display','img-all-path','story-vcmId','story-url','pane-browse-story-comments','story_related']}) - ,dict(name='ul' , attrs={'class':['tools','tools alt','tools alt2','tabs']}) - ,dict(name='a' , attrs={'class':'join-discussion'}) - ,dict(name='ul' , attrs={'class':['tools','tools alt','tools alt2']}) - ,dict(name='p' , attrs={'class':'see_fullarchive'}) - ,dict(name=['object','embed','link','script']) + dict(name=['object','embed','link','script','iframe','meta','base']) + ,dict(attrs={'class':['user-control','url-description','ad-context']}) ] + remove_tags_before=dict(name='h1') + remove_tags_after =dict(attrs={'class':'url-description'}) feeds = [ (u'Latest Headlines', u'http://feeds.foxnews.com/foxnews/latest' ) @@ -67,8 +53,5 @@ class FoxNews(BasicNewsRecipe): ,(u'Entertainment' , u'http://feeds.foxnews.com/foxnews/entertainment' ) ] - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return self.adeify_images(soup) - + def print_version(self, url): + return url + 'print'