Update Metro UK

2025-07-07 10:14:46 -04:00 · 2015-06-20 13:42:48 +05:30 · 2015-06-20 13:42:48 +05:30 · c9004c1e98
commit c9004c1e98
parent 6ed7a71a4d
1 changed files with 44 additions and 69 deletions
--- a/recipes/metro_uk.recipe
+++ b/recipes/metro_uk.recipe
@ -1,81 +1,56 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-from calibre import strftime
 import re
-import datetime
-import time
+from calibre.web.feeds.news import BasicNewsRecipe

-class AdvancedUserRecipe1306097511(BasicNewsRecipe):
+class AdvancedUserRecipe1390146870(BasicNewsRecipe):
    title          = u'Metro UK'
-    description = 'News from The Metro, UK'
-
-    cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
-    remove_empty_feeds = True
-    remove_javascript     = True
-    no_stylesheets        = True
-    auto_cleanup = True
-    max_articles_per_feed = 12
-    ignore_duplicate_articles = {'title', 'url'}
-    #encoding = 'UTF-8'
-
    language = 'en_GB'
-    masthead_url        = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
+    __author__ = 'D Asbury'
+    # 20.6.2015 rss using feed 43  - D Asbury
+    # item repeatable string = <h3 class="title"><a href="{%}" {*}<span class="colour">{%}</span></a></h3>
+    oldest_article = 1
+    max_articles_per_feed = 10
+    scale_news_images_to_device = True
    compress_news_images = True
-    compress_news_images_max_size = 30
-    remove_attributes = ['style', 'font']
-    preprocess_regexps = [
+    compress_news_images_max_size = 16
+    ignore_duplicate_articles = {'title', 'url'}
+    masthead_url = 'http://cdn.images.dailyexpress.co.uk/img/page/express_logo.png'
+    auto_cleanup = True
+    no_stylesheets        = True
+    cover_url = 'https://lh4.ggpht.com/aaKY88SbQyB-vK-pgjo22-QVi6dUnOt7aVsRueTU8Fg-zPwOdsDdOVElGM8O0BBDSIsuB9rciyc=w300'
+    masthead_url        = 'http://s1.wp.com/wp-content/themes/vip/metrouk/img/branding/metro_logo_300x95.png?m=1363331170g'
+
+    preprocess_regexps = [
+                 (re.compile(r'\| Metro News', re.IGNORECASE | re.DOTALL), lambda match: ''),

-                (re.compile(r'\| Metro News', re.IGNORECASE | re.DOTALL), lambda match: ''),
                        ]

-    def parse_index(self):
-        articles = {}
-        key = None
-        ans = []
-        feeds = [('UK', 'http://metro.co.uk/news/uk/'),
-                ('World', 'http://metro.co.uk/news/world/'),
-                ('Weird', 'http://metro.co.uk/news/weird/'),
-                ('Money', 'http://metro.co.uk/news/money/'),
-                ('Sport', 'http://metro.co.uk/sport/'),
-                ('Guilty Pleasures', 'http://metro.co.uk/guilty-pleasures/')
+    feeds          = [
+                (u'UK','http://feed43.com/5636207287684703.xml'),  # 'http://metro.co.uk/news/uk/rss'),
+                (u'World News','http://feed43.com/4555301018714738.xml'),
+                (u'Weird', 'http://feed43.com/0483673464615441.xml'),
+                (u'Sport', 'http://feed43.com/4655536887165433.xml'),
+                (u'Entertainment', 'http://feed43.com/6342124113153248.xml'),
+                (u'Lifestyle','http://feed43.com/7603240345000555.xml'),
                ]
-        for key, feed in feeds:
-            soup = self.index_to_soup(feed)
-            articles[key] = []
-            ans.append(key)

-            today = datetime.date.today()
-            today = time.mktime(today.timetuple())-60*60*24
+# starsons code
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        for feed in feeds:
+            for article in feed.articles[:]:
+                print 'article.title is: ', article.title
+                if 'VIDEO:' in article.title.upper():
+                    feed.articles.remove(article)

-            for a in soup.findAll('a'):
-                for name, value in a.attrs:
-                    if name == "class" and value=="post":
-                        url = a['href']
-                        title = a['title']
-                        print title
-                        description = ''
-                        m = re.search('^.*uk/([^/]*)/([^/]*)/([^/]*)/', url)
-                        skip = 1
-                        if len(m.groups()) == 3:
-                            g = m.groups()
-                            dt = datetime.datetime.strptime(''+g[0]+'-'+g[1]+'-'+g[2], '%Y-%m-%d')
-                            pubdate = time.strftime('%a, %d %b', dt.timetuple())
+        return feeds

-                            dt = time.mktime(dt.timetuple())
-                            if dt >= today:
-                                print pubdate
-                                skip = 0
-                        else:
-                            pubdate = strftime('%a, %d %b')
-
-                        summary = a.find(True, attrs={'class':'excerpt'})
-                        if summary:
-                            description = self.tag_to_string(summary, use_alt=False)
-
-                        if skip == 0:
-                            articles[key].append(
-                                                    dict(title=title, url=url, date=pubdate,
-                                                                    description=description,
-                                                                    content=''))
-        #ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
-        ans = [(key, articles[key]) for key in ans if key in articles]
-        return ans
+    extra_css = '''
+                    h1{font-weight:bold;font-size:175%;}
+                    h2{display: block;margin-left: auto;margin-right: auto;width:125%;font-weight:bold;font-size:150%;}
+                    #p{font-size:14px;}
+                    #body{font-size:14px;}
+                    .figcaption {display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;}
+                    .alignnone{display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;}
+                    .publish-info {font-size:50%;}
+                    .aligncenter{display: block;margin-left: auto;margin-right: auto;width:100%;}
+      '''