Update Metro UK

This commit is contained in:
Kovid Goyal 2013-08-19 08:40:15 +05:30
parent 3690241ab1
commit 6c7ff4e4e6

View File

@ -7,71 +7,75 @@ import time
class AdvancedUserRecipe1306097511(BasicNewsRecipe): class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro UK' title = u'Metro UK'
description = 'News from The Metro, UK' description = 'News from The Metro, UK'
#timefmt = ''
__author__ = 'Dave Asbury'
#last update 4/4/13
#cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg' cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
remove_empty_feeds = True remove_empty_feeds = True
remove_javascript = True remove_javascript = True
no_stylesheets = True
auto_cleanup = True auto_cleanup = True
max_articles_per_feed = 12 max_articles_per_feed = 12
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
encoding = 'UTF-8' #encoding = 'UTF-8'
language = 'en_GB' language = 'en_GB'
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif' masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
compress_news_images = True compress_news_images = True
compress_news_images_max_size = 30
remove_attributes = ['style', 'font']
preprocess_regexps = [
(re.compile(r'\| Metro News', re.IGNORECASE | re.DOTALL), lambda match: ''),
]
def parse_index(self): def parse_index(self):
articles = {} articles = {}
key = None key = None
ans = [] ans = []
feeds = [ ('UK', 'http://metro.co.uk/news/uk/'), feeds = [('UK', 'http://metro.co.uk/news/uk/'),
('World', 'http://metro.co.uk/news/world/'), ('World', 'http://metro.co.uk/news/world/'),
('Weird', 'http://metro.co.uk/news/weird/'), ('Weird', 'http://metro.co.uk/news/weird/'),
('Money', 'http://metro.co.uk/news/money/'), ('Money', 'http://metro.co.uk/news/money/'),
('Sport', 'http://metro.co.uk/sport/'), ('Sport', 'http://metro.co.uk/sport/'),
('Guilty Pleasures', 'http://metro.co.uk/guilty-pleasures/') ('Guilty Pleasures', 'http://metro.co.uk/guilty-pleasures/')
] ]
for key, feed in feeds: for key, feed in feeds:
soup = self.index_to_soup(feed) soup = self.index_to_soup(feed)
articles[key] = [] articles[key] = []
ans.append(key) ans.append(key)
today = datetime.date.today() today = datetime.date.today()
today = time.mktime(today.timetuple())-60*60*24 today = time.mktime(today.timetuple())-60*60*24
for a in soup.findAll('a'): for a in soup.findAll('a'):
for name, value in a.attrs: for name, value in a.attrs:
if name == "class" and value=="post": if name == "class" and value=="post":
url = a['href'] url = a['href']
title = a['title'] title = a['title']
print title print title
description = '' description = ''
m = re.search('^.*uk/([^/]*)/([^/]*)/([^/]*)/', url) m = re.search('^.*uk/([^/]*)/([^/]*)/([^/]*)/', url)
skip = 1 skip = 1
if len(m.groups()) == 3: if len(m.groups()) == 3:
g = m.groups() g = m.groups()
dt = datetime.datetime.strptime(''+g[0]+'-'+g[1]+'-'+g[2], '%Y-%m-%d') dt = datetime.datetime.strptime(''+g[0]+'-'+g[1]+'-'+g[2], '%Y-%m-%d')
pubdate = time.strftime('%a, %d %b', dt.timetuple()) pubdate = time.strftime('%a, %d %b', dt.timetuple())
dt = time.mktime(dt.timetuple()) dt = time.mktime(dt.timetuple())
if dt >= today: if dt >= today:
print pubdate print pubdate
skip = 0 skip = 0
else: else:
pubdate = strftime('%a, %d %b') pubdate = strftime('%a, %d %b')
summary = a.find(True, attrs={'class':'excerpt'}) summary = a.find(True, attrs={'class':'excerpt'})
if summary: if summary:
description = self.tag_to_string(summary, use_alt=False) description = self.tag_to_string(summary, use_alt=False)
if skip == 0: if skip == 0:
articles[key].append( articles[key].append(
dict(title=title, url=url, date=pubdate, dict(title=title, url=url, date=pubdate,
description=description, description=description,
content='')) content=''))
#ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2}) #ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if key in articles]
return ans return ans