mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
89 lines
3.3 KiB
Plaintext
89 lines
3.3 KiB
Plaintext
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
class TheNewsRecipe(BasicNewsRecipe):
|
|
__license__ = 'GPL v3'
|
|
__author__ = 'kwetal'
|
|
language = 'en_PK'
|
|
version = 1
|
|
|
|
title = u'The News'
|
|
publisher = u'Jang Group'
|
|
category = u'News, Pakistan'
|
|
description = u'English Newspaper from Pakistan'
|
|
|
|
use_embedded_content = False
|
|
remove_empty_feeds = True
|
|
oldest_article = 2
|
|
max_articles_per_feed = 100
|
|
|
|
no_stylesheets = True
|
|
remove_javascript = True
|
|
encoding = 'iso-8859-1'
|
|
|
|
remove_tags = []
|
|
remove_tags.append(dict(name='img', attrs={'src': 'images/thenews.gif'}))
|
|
remove_tags.append(dict(name='img', attrs={'src': 'images/shim.gif'}))
|
|
|
|
# Feeds from http://thenews.com.pk/rss.asp
|
|
feeds = []
|
|
feeds.append(
|
|
(u'Latest Stories', u'http://www.thenews.com.pk/rss/thenews_updates.xml'))
|
|
feeds.append(
|
|
(u'Top Stories', u'http://www.thenews.com.pk/rss/thenews_topstories.xml'))
|
|
feeds.append(
|
|
(u'World News', u'http://www.thenews.com.pk/rss/thenews_world.xml'))
|
|
feeds.append(
|
|
(u'National News', u'http://www.thenews.com.pk/rss/thenews_national.xml'))
|
|
feeds.append(
|
|
(u'Business News', u'http://www.thenews.com.pk/rss/thenews_business.xml'))
|
|
feeds.append(
|
|
(u'Karachi News', u'http://www.thenews.com.pk/rss/thenews_karachi.xml'))
|
|
feeds.append(
|
|
(u'Lahore News', u'http://www.thenews.com.pk/rss/thenews_lahore.xml'))
|
|
feeds.append(
|
|
(u'Islamabad News', u'http://www.thenews.com.pk/rss/thenews_islamabad.xml'))
|
|
feeds.append(
|
|
(u'Peshawar News', u'http://www.thenews.com.pk/rss/thenews_peshawar.xml'))
|
|
feeds.append(
|
|
(u'Editorial', u'http://www.thenews.com.pk/rss/thenews_editorial.xml'))
|
|
feeds.append(
|
|
(u'Opinion', u'http://www.thenews.com.pk/rss/thenews_opinion.xml'))
|
|
feeds.append(
|
|
(u'Sports News', u'http://www.thenews.com.pk/rss/thenews_sports.xml'))
|
|
feeds.append(
|
|
(u'Newspost', u'http://www.thenews.com.pk/rss/thenews_newspost.xml'))
|
|
|
|
conversion_options = {'comments': description, 'tags': category, 'language': 'en',
|
|
'publisher': publisher, 'linearize_tables': True}
|
|
|
|
extra_css = '''
|
|
body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
|
|
.heading_txt {font-size: x-large; font-weight: bold; text-align: left;}
|
|
.small_txt {text-align: left;}
|
|
.dateline {font-size: x-small; color: #696969; margin-top: 1em; margin-bottom: 1em}
|
|
'''
|
|
|
|
def print_version(self, url):
|
|
ignore, sep, main = url.rpartition('/')
|
|
|
|
if main.startswith('updates.asp'):
|
|
return url.replace('updates.asp', 'print.asp')
|
|
elif main.startswith('top_story_detail.asp'):
|
|
return url.replace('top_story_detail.asp', 'print3.asp')
|
|
elif main.startswith('daily_detail.asp'):
|
|
return url.replace('daily_detail.asp', 'print1.asp')
|
|
else:
|
|
return None
|
|
|
|
def preprocess_html(self, soup):
|
|
for tr in soup.findAll('tr', attrs={'bgcolor': True}):
|
|
del tr['bgcolor']
|
|
|
|
td = soup.find('td', attrs={'class': 'small_txt', 'height': '20'})
|
|
if td:
|
|
del td['height']
|
|
td['class'] = 'dateline'
|
|
|
|
return soup
|