Sunday Times UK & Sunday Times Style

Supplementary to The Times' daily and Sunday recipe.
2025-07-09 03:04:10 -04:00 · 2017-01-19 16:06:10 +00:00 · 2017-01-19 16:06:10 +00:00 · 003707f187
commit 003707f187
parent d85f985e9c
1 changed files with 90 additions and 0 deletions
--- a/recipes/sunday_times_magazine
+++ b/recipes/sunday_times_magazine
@ -0,0 +1,90 @@
+
+__license__ = 'GPL v3'
+__copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.thetimes.co.uk
+'''
+import urllib
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class TimesOnline(BasicNewsRecipe):
+    title = 'The Sunday Times Magazine UK'
+    __author__ = 'Bobby Steel & Darko Miletic'
+    description = 'newsmagazine from United Kingdom and World'
+    language = 'en_GB'
+    publisher = 'Times Newspapers Ltd'
+    category = 'news, politics, UK'
+    oldest_article = 3
+    max_articles_per_feed = 500
+    no_stylesheets = True
+    use_embedded_content = False
+    encoding = 'utf-8'
+    delay = 1
+    needs_subscription = True
+    publication_type = 'newspaper'
+    INDEX = 'http://www.thetimes.co.uk/'
+    PREFIX = u'http://www.thetimes.co.uk/'
+    extra_css             = """
+                                .author-name,.authorName{font-style: italic}
+                                .published-date,.multi-position-photo-text{font-family: Arial,Helvetica,sans-serif;
+                                                                           font-size: small; color: gray;
+                                                                           display:block; margin-bottom: 0.5em}
+                                body{font-family: Georgia,"Times New Roman",Times,serif}
+                            """
+
+    conversion_options = {
+        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
+    }
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        br.open('http://www.thetimes.co.uk/')
+        if self.username is not None and self.password is not None:
+            data = urllib.urlencode({
+                'gotoUrl': self.INDEX, 'username': self.username, 'password': self.password
+            })
+            br.open('https://login.thetimes.co.uk/', data)
+        return br
+
+    remove_tags = [
+        {'name': ['object', 'link', 'iframe', 'base', 'meta', 'script']},
+        {'attrs': {'class': ['tools comments-parent','u-hide','Tooltip','Toolbar Toolbar--bottom','Comments Article-container','ArticlePager','Media-caption','RelatedLinks']}},
+        {'attrs': {'class': lambda x: x and 'Toolbar' in x}} 
+    ]
+    remove_attributes = ['lang']
+    keep_only_tags = [
+        dict(attrs={'class': 'Article Article--default'}
+             ), dict(attrs={'class': 'f-author'}), dict(attrs={'id': 'bodycopy'})
+    ]
+    remove_tags_after = dict(attrs={'class': 'Article-content'})
+
+    feeds = [
+    (u'The Sunday Times Magazine', u'http://www.thetimes.co.uk/magazine/the-sunday-times-magazine/'),
+    (u'Sunday Times Style', u'http://www.thetimes.co.uk/magazine/style/')
+    ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return self.adeify_images(soup)
+
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, _('Fetching feed') + ' %s...' %
+                                 (feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            for atag in soup.findAll('a', href=True):
+                parentName = atag.parent.name
+                title = self.tag_to_string(atag).strip()
+                if (parentName == 'h2' or parentName == 'h3') and title is not None and title != '':
+                    url = self.INDEX + atag['href']
+                    articles.append({
+                        'title': title, 'date': '', 'url': url, 'description': ''
+                    })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds