The Workingham Times by DM. Fixes #7672 (New recipe for The Workingham Times)

2025-07-09 03:04:10 -04:00 · 2010-11-26 07:41:30 -07:00 · 2010-11-26 07:41:30 -07:00 · 64ede2a0ea
commit 64ede2a0ea
parent ca3f9b841d
2 changed files with 59 additions and 0 deletions
--- a/resources/images/news/the_workingham_times.png
+++ b/resources/images/news/the_workingham_times.png
--- a/resources/recipes/the_workingham_times.recipe
+++ b/resources/recipes/the_workingham_times.recipe
@ -0,0 +1,59 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.getwokingham.co.uk
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class TheWorkinghamTimes(BasicNewsRecipe):
+    title                  = 'The Workingham Times'
+    __author__             = 'Darko Miletic'
+    description            = 'News from UK'
+    oldest_article         = 2
+    max_articles_per_feed  = 100
+    no_stylesheets         = True
+    use_embedded_content   = False
+    encoding               = 'utf8'
+    publisher              = 'The Wokingham Times - S&B media'
+    category               = 'news, UK, world'
+    language               = 'en_GB'
+    publication_type       = 'newsportal'
+    extra_css              = """
+                                body{ font-family: Arial,sans-serif }
+                                img{display: block; margin-bottom: 0.4em}
+                             """
+
+    conversion_options = {
+                             'comments'  : description
+                            ,'tags'      : category
+                            ,'language'  : language
+                            ,'publisher' : publisher
+                         }
+
+    keep_only_tags = [dict(name='div', attrs={'id':'article-body'})]
+    remove_tags    = [
+                      dict(name='div'  , attrs={'class':['ad']})
+                     ,dict(name=['meta','base','iframe','embed','object'])
+                     ,dict(name='span' , attrs={'class':'caption small'})
+                     ]
+    remove_attributes = ['width','height','lang']
+
+    feeds          = [
+                       ('Home'         , 'http://www.getwokingham.co.uk/rss.xml'              )
+                      ,('News'         , 'http://www.getwokingham.co.uk/news/rss.xml'         )
+                      ,('Entertainment', 'http://www.getwokingham.co.uk/entertainment/rss.xml')
+                      ,('Lifestyle'    , 'http://www.getwokingham.co.uk/lifestyle/rss.xml'    )
+                    ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               item.name = 'span'
+               del item['href']
+        return soup