From c9004c1e98b68dc93aea50e7b5fd14ffae417097 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 20 Jun 2015 13:42:48 +0530 Subject: [PATCH] Update Metro UK --- recipes/metro_uk.recipe | 113 ++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 69 deletions(-) diff --git a/recipes/metro_uk.recipe b/recipes/metro_uk.recipe index 7990ef5676..8863cd1752 100644 --- a/recipes/metro_uk.recipe +++ b/recipes/metro_uk.recipe @@ -1,81 +1,56 @@ -from calibre.web.feeds.news import BasicNewsRecipe -from calibre import strftime import re -import datetime -import time +from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1306097511(BasicNewsRecipe): +class AdvancedUserRecipe1390146870(BasicNewsRecipe): title = u'Metro UK' - description = 'News from The Metro, UK' - - cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg' - remove_empty_feeds = True - remove_javascript = True - no_stylesheets = True - auto_cleanup = True - max_articles_per_feed = 12 - ignore_duplicate_articles = {'title', 'url'} - #encoding = 'UTF-8' - language = 'en_GB' - masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif' + __author__ = 'D Asbury' + # 20.6.2015 rss using feed 43 - D Asbury + # item repeatable string =

{%}

+ oldest_article = 1 + max_articles_per_feed = 10 + scale_news_images_to_device = True compress_news_images = True - compress_news_images_max_size = 30 - remove_attributes = ['style', 'font'] - preprocess_regexps = [ + compress_news_images_max_size = 16 + ignore_duplicate_articles = {'title', 'url'} + masthead_url = 'http://cdn.images.dailyexpress.co.uk/img/page/express_logo.png' + auto_cleanup = True + no_stylesheets = True + cover_url = 'https://lh4.ggpht.com/aaKY88SbQyB-vK-pgjo22-QVi6dUnOt7aVsRueTU8Fg-zPwOdsDdOVElGM8O0BBDSIsuB9rciyc=w300' + masthead_url = 'http://s1.wp.com/wp-content/themes/vip/metrouk/img/branding/metro_logo_300x95.png?m=1363331170g' + + preprocess_regexps = [ + (re.compile(r'\| Metro News', re.IGNORECASE | re.DOTALL), lambda match: ''), - (re.compile(r'\| Metro News', re.IGNORECASE | re.DOTALL), lambda match: ''), ] - def parse_index(self): - articles = {} - key = None - ans = [] - feeds = [('UK', 'http://metro.co.uk/news/uk/'), - ('World', 'http://metro.co.uk/news/world/'), - ('Weird', 'http://metro.co.uk/news/weird/'), - ('Money', 'http://metro.co.uk/news/money/'), - ('Sport', 'http://metro.co.uk/sport/'), - ('Guilty Pleasures', 'http://metro.co.uk/guilty-pleasures/') + feeds = [ + (u'UK','http://feed43.com/5636207287684703.xml'), # 'http://metro.co.uk/news/uk/rss'), + (u'World News','http://feed43.com/4555301018714738.xml'), + (u'Weird', 'http://feed43.com/0483673464615441.xml'), + (u'Sport', 'http://feed43.com/4655536887165433.xml'), + (u'Entertainment', 'http://feed43.com/6342124113153248.xml'), + (u'Lifestyle','http://feed43.com/7603240345000555.xml'), ] - for key, feed in feeds: - soup = self.index_to_soup(feed) - articles[key] = [] - ans.append(key) - today = datetime.date.today() - today = time.mktime(today.timetuple())-60*60*24 +# starsons code + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for feed in feeds: + for article in feed.articles[:]: + print 'article.title is: ', article.title + if 'VIDEO:' in article.title.upper(): + feed.articles.remove(article) - for a in soup.findAll('a'): - for name, value in a.attrs: - if name == "class" and value=="post": - url = a['href'] - title = a['title'] - print title - description = '' - m = re.search('^.*uk/([^/]*)/([^/]*)/([^/]*)/', url) - skip = 1 - if len(m.groups()) == 3: - g = m.groups() - dt = datetime.datetime.strptime(''+g[0]+'-'+g[1]+'-'+g[2], '%Y-%m-%d') - pubdate = time.strftime('%a, %d %b', dt.timetuple()) + return feeds - dt = time.mktime(dt.timetuple()) - if dt >= today: - print pubdate - skip = 0 - else: - pubdate = strftime('%a, %d %b') - - summary = a.find(True, attrs={'class':'excerpt'}) - if summary: - description = self.tag_to_string(summary, use_alt=False) - - if skip == 0: - articles[key].append( - dict(title=title, url=url, date=pubdate, - description=description, - content='')) - #ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2}) - ans = [(key, articles[key]) for key in ans if key in articles] - return ans + extra_css = ''' + h1{font-weight:bold;font-size:175%;} + h2{display: block;margin-left: auto;margin-right: auto;width:125%;font-weight:bold;font-size:150%;} + #p{font-size:14px;} + #body{font-size:14px;} + .figcaption {display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;} + .alignnone{display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;} + .publish-info {font-size:50%;} + .aligncenter{display: block;margin-left: auto;margin-right: auto;width:100%;} + '''