calibre/recipes/starwars.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

54 lines
1.6 KiB
Plaintext

# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class TheForce(BasicNewsRecipe):
title = u'The Force'
language = 'en'
__author__ = 'Krittika Goyal'
oldest_article = 1 # days
max_articles_per_feed = 25
encoding = 'cp1252'
remove_stylesheets = True
conversion_options = {'linearize_tables': True}
remove_tags_after = dict(name='div', attrs={'class': 'KonaBody'})
keep_only_tags = dict(
name='td', attrs={'background': '/images/span/tile_story_bgtile.gif'})
remove_tags = [
dict(name='iframe'),
]
feeds = [
('The Force',
'http://www.theforce.net/outnews/tfnrdf.xml'),
]
def preprocess_html(self, soup):
for tag in soup.findAll(name='i'):
if 'Remember to join the Star Wars Insider Facebook' in self.tag_to_string(tag):
for x in tag.findAllNext():
x.extract()
tag.extract()
break
tag = soup.find(attrs={'class': 'articleoption'})
if tag is not None:
tag = tag.findParent('table')
if tag is not None:
for x in tag.findAllNext():
x.extract()
tag.extract()
for img in soup.findAll('img', src=True):
a = img.findParent('a', href=True)
if a is None:
continue
url = a.get('href').split('?')[-1].partition('=')[-1]
if url:
img.extract()
a.name = 'img'
a['src'] = url
del a['href']
img['src'] = url
return soup