calibre/recipes/thenews.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

89 lines
3.3 KiB
Plaintext

from calibre.web.feeds.news import BasicNewsRecipe
class TheNewsRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en_PK'
version = 1
title = u'The News'
publisher = u'Jang Group'
category = u'News, Pakistan'
description = u'English Newspaper from Pakistan'
use_embedded_content = False
remove_empty_feeds = True
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
encoding = 'iso-8859-1'
remove_tags = []
remove_tags.append(dict(name='img', attrs={'src': 'images/thenews.gif'}))
remove_tags.append(dict(name='img', attrs={'src': 'images/shim.gif'}))
# Feeds from http://thenews.com.pk/rss.asp
feeds = []
feeds.append(
(u'Latest Stories', u'http://www.thenews.com.pk/rss/thenews_updates.xml'))
feeds.append(
(u'Top Stories', u'http://www.thenews.com.pk/rss/thenews_topstories.xml'))
feeds.append(
(u'World News', u'http://www.thenews.com.pk/rss/thenews_world.xml'))
feeds.append(
(u'National News', u'http://www.thenews.com.pk/rss/thenews_national.xml'))
feeds.append(
(u'Business News', u'http://www.thenews.com.pk/rss/thenews_business.xml'))
feeds.append(
(u'Karachi News', u'http://www.thenews.com.pk/rss/thenews_karachi.xml'))
feeds.append(
(u'Lahore News', u'http://www.thenews.com.pk/rss/thenews_lahore.xml'))
feeds.append(
(u'Islamabad News', u'http://www.thenews.com.pk/rss/thenews_islamabad.xml'))
feeds.append(
(u'Peshawar News', u'http://www.thenews.com.pk/rss/thenews_peshawar.xml'))
feeds.append(
(u'Editorial', u'http://www.thenews.com.pk/rss/thenews_editorial.xml'))
feeds.append(
(u'Opinion', u'http://www.thenews.com.pk/rss/thenews_opinion.xml'))
feeds.append(
(u'Sports News', u'http://www.thenews.com.pk/rss/thenews_sports.xml'))
feeds.append(
(u'Newspost', u'http://www.thenews.com.pk/rss/thenews_newspost.xml'))
conversion_options = {'comments': description, 'tags': category, 'language': 'en',
'publisher': publisher, 'linearize_tables': True}
extra_css = '''
body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
.heading_txt {font-size: x-large; font-weight: bold; text-align: left;}
.small_txt {text-align: left;}
.dateline {font-size: x-small; color: #696969; margin-top: 1em; margin-bottom: 1em}
'''
def print_version(self, url):
ignore, sep, main = url.rpartition('/')
if main.startswith('updates.asp'):
return url.replace('updates.asp', 'print.asp')
elif main.startswith('top_story_detail.asp'):
return url.replace('top_story_detail.asp', 'print3.asp')
elif main.startswith('daily_detail.asp'):
return url.replace('daily_detail.asp', 'print1.asp')
else:
return None
def preprocess_html(self, soup):
for tr in soup.findAll('tr', attrs={'bgcolor': True}):
del tr['bgcolor']
td = soup.find('td', attrs={'class': 'small_txt', 'height': '20'})
if td:
del td['height']
td['class'] = 'dateline'
return soup