calibre/recipes/birmingham_post.recipe
2024-03-30 13:03:29 +05:30

57 lines
1.9 KiB
Python

from __future__ import print_function
import re
import mechanize
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Birmingham post'
description = 'Author D.Asbury. News for Birmingham UK'
__author__ = 'Dave Asbury'
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
oldest_article = 2
max_articles_per_feed = 20
linearize_tables = True
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
auto_cleanup = True
language = 'en_GB'
compress_news_images = True
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
def get_cover_url(self):
soup = self.index_to_soup('http://www.birminghampost.net')
# look for the block containing the sun button and url
cov = soup.find(attrs={'height': re.compile(
'3'), 'alt': re.compile('Post')})
print()
print('%%%%%%%%%%%%%%%', cov)
print()
cov2 = str(cov['src'])
print('88888888 ', cov2, ' 888888888888')
# cover_url=cov2
# return cover_url
br = mechanize.Browser()
br.set_handle_redirect(False)
try:
br.open_novisit(cov2)
cover_url = cov2
except:
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
return cover_url
feeds = [
(u'West Mids. News',
u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
(u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'),
(u'Sports', u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'),
(u'Bloggs & Comments', u'http://www.birminghampost.net/comment/rss.xml')
]