mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Update Metro UK
This commit is contained in:
parent
6ed7a71a4d
commit
c9004c1e98
@ -1,81 +1,56 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre import strftime
|
||||
import re
|
||||
import datetime
|
||||
import time
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
class AdvancedUserRecipe1390146870(BasicNewsRecipe):
|
||||
title = u'Metro UK'
|
||||
description = 'News from The Metro, UK'
|
||||
|
||||
cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
max_articles_per_feed = 12
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
#encoding = 'UTF-8'
|
||||
|
||||
language = 'en_GB'
|
||||
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
||||
__author__ = 'D Asbury'
|
||||
# 20.6.2015 rss using feed 43 - D Asbury
|
||||
# item repeatable string = <h3 class="title"><a href="{%}" {*}<span class="colour">{%}</span></a></h3>
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 10
|
||||
scale_news_images_to_device = True
|
||||
compress_news_images = True
|
||||
compress_news_images_max_size = 30
|
||||
remove_attributes = ['style', 'font']
|
||||
preprocess_regexps = [
|
||||
compress_news_images_max_size = 16
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
masthead_url = 'http://cdn.images.dailyexpress.co.uk/img/page/express_logo.png'
|
||||
auto_cleanup = True
|
||||
no_stylesheets = True
|
||||
cover_url = 'https://lh4.ggpht.com/aaKY88SbQyB-vK-pgjo22-QVi6dUnOt7aVsRueTU8Fg-zPwOdsDdOVElGM8O0BBDSIsuB9rciyc=w300'
|
||||
masthead_url = 'http://s1.wp.com/wp-content/themes/vip/metrouk/img/branding/metro_logo_300x95.png?m=1363331170g'
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'\| Metro News', re.IGNORECASE | re.DOTALL), lambda match: ''),
|
||||
|
||||
(re.compile(r'\| Metro News', re.IGNORECASE | re.DOTALL), lambda match: ''),
|
||||
]
|
||||
|
||||
def parse_index(self):
|
||||
articles = {}
|
||||
key = None
|
||||
ans = []
|
||||
feeds = [('UK', 'http://metro.co.uk/news/uk/'),
|
||||
('World', 'http://metro.co.uk/news/world/'),
|
||||
('Weird', 'http://metro.co.uk/news/weird/'),
|
||||
('Money', 'http://metro.co.uk/news/money/'),
|
||||
('Sport', 'http://metro.co.uk/sport/'),
|
||||
('Guilty Pleasures', 'http://metro.co.uk/guilty-pleasures/')
|
||||
feeds = [
|
||||
(u'UK','http://feed43.com/5636207287684703.xml'), # 'http://metro.co.uk/news/uk/rss'),
|
||||
(u'World News','http://feed43.com/4555301018714738.xml'),
|
||||
(u'Weird', 'http://feed43.com/0483673464615441.xml'),
|
||||
(u'Sport', 'http://feed43.com/4655536887165433.xml'),
|
||||
(u'Entertainment', 'http://feed43.com/6342124113153248.xml'),
|
||||
(u'Lifestyle','http://feed43.com/7603240345000555.xml'),
|
||||
]
|
||||
for key, feed in feeds:
|
||||
soup = self.index_to_soup(feed)
|
||||
articles[key] = []
|
||||
ans.append(key)
|
||||
|
||||
today = datetime.date.today()
|
||||
today = time.mktime(today.timetuple())-60*60*24
|
||||
# starsons code
|
||||
def parse_feeds(self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
for feed in feeds:
|
||||
for article in feed.articles[:]:
|
||||
print 'article.title is: ', article.title
|
||||
if 'VIDEO:' in article.title.upper():
|
||||
feed.articles.remove(article)
|
||||
|
||||
for a in soup.findAll('a'):
|
||||
for name, value in a.attrs:
|
||||
if name == "class" and value=="post":
|
||||
url = a['href']
|
||||
title = a['title']
|
||||
print title
|
||||
description = ''
|
||||
m = re.search('^.*uk/([^/]*)/([^/]*)/([^/]*)/', url)
|
||||
skip = 1
|
||||
if len(m.groups()) == 3:
|
||||
g = m.groups()
|
||||
dt = datetime.datetime.strptime(''+g[0]+'-'+g[1]+'-'+g[2], '%Y-%m-%d')
|
||||
pubdate = time.strftime('%a, %d %b', dt.timetuple())
|
||||
return feeds
|
||||
|
||||
dt = time.mktime(dt.timetuple())
|
||||
if dt >= today:
|
||||
print pubdate
|
||||
skip = 0
|
||||
else:
|
||||
pubdate = strftime('%a, %d %b')
|
||||
|
||||
summary = a.find(True, attrs={'class':'excerpt'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
|
||||
if skip == 0:
|
||||
articles[key].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description,
|
||||
content=''))
|
||||
#ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
|
||||
ans = [(key, articles[key]) for key in ans if key in articles]
|
||||
return ans
|
||||
extra_css = '''
|
||||
h1{font-weight:bold;font-size:175%;}
|
||||
h2{display: block;margin-left: auto;margin-right: auto;width:125%;font-weight:bold;font-size:150%;}
|
||||
#p{font-size:14px;}
|
||||
#body{font-size:14px;}
|
||||
.figcaption {display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;}
|
||||
.alignnone{display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;}
|
||||
.publish-info {font-size:50%;}
|
||||
.aligncenter{display: block;margin-left: auto;margin-right: auto;width:100%;}
|
||||
'''
|
||||
|
Loading…
x
Reference in New Issue
Block a user