Implement #2690 (Various Belgian recipes)

2025-11-27 08:45:00 -05:00 · 2009-06-23 11:03:55 -07:00 · 2009-06-23 11:03:55 -07:00 · e311bc02a8
commit e311bc02a8
parent 521890f655
8 changed files with 261 additions and 0 deletions
--- a/src/calibre/gui2/images/news/gva_be.png
+++ b/src/calibre/gui2/images/news/gva_be.png
--- a/src/calibre/gui2/images/news/hln.png
+++ b/src/calibre/gui2/images/news/hln.png
--- a/src/calibre/gui2/images/news/tijd.png
+++ b/src/calibre/gui2/images/news/tijd.png
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -50,6 +50,7 @@ recipe_modules = ['recipe_' + r for r in (
           'marca', 'kellog_faculty', 'kellog_insight',
           'theeconomictimes_india', '7dias', 'buenosaireseconomico',
           'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres',
+           'gva_be', 'hln', 'tijd', 'degentenaar',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_degentenaar.py
+++ b/src/calibre/web/feeds/recipes/recipe_degentenaar.py
@ -0,0 +1,75 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.nieuwsblad.be
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class DeGentenaarOnline(BasicNewsRecipe):
+    title                 = 'De Gentenaar Online'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Belgium in Dutch'
+    publisher             = 'De Gentenaar'
+    category              = 'news, politics, Belgium'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf-8'
+    language              = _('Dutch')
+    lang                  = 'nl-BE'
+    direction             = 'ltr'
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
+
+    keep_only_tags = [dict(name='span', attrs={'id':['lblArticleTitle','lblArticleIntroduction','lblArticleMainText']})]
+    remove_tags    = [dict(name=['embed','object'])]
+
+
+
+    feeds = [
+              (u'Snelnieuws' , u'http://feeds.nieuwsblad.be/nieuws/snelnieuws'     )
+             ,(u'Binnenland' , u'http://feeds.nieuwsblad.be/nieuws/binnenland'     )
+             ,(u'Buitenland' , u'http://feeds.nieuwsblad.be/nieuwsblad/buitenland' )
+             ,(u'Economie'   , u'http://feeds.nieuwsblad.be/economie/home'         )
+             ,(u'Economie'   , u'http://feeds.nieuwsblad.be/economie/home'         )
+             ,(u'Algemeen'   , u'http://feeds.nieuwsblad.be/life/algemeen'         )
+             ,(u'Film'       , u'http://feeds.nieuwsblad.be/life/film'             )
+             ,(u'Boek'       , u'http://feeds.nieuwsblad.be/life/boeken'           )
+             ,(u'Muziek'     , u'http://feeds.nieuwsblad.be/life/muziek'           )
+             ,(u'Podium'     , u'http://feeds.nieuwsblad.be/life/podium'           )
+             ,(u'TV & radio' , u'http://feeds.nieuwsblad.be/life/tv'               )
+            ]
+
+    def print_version(self, url):
+        return url.replace('/Detail.aspx?articleid','/PrintArticle.aspx?ArticleID')
+
+    def get_article_url(self, article):
+        return article.get('guid',  None)
+
+    def preprocess_html(self, soup):
+        del soup.body['onload']
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('span'):
+            item.name='div'
+            if item.has_key('id') and item['id'] == 'lblArticleTitle':
+               item.name='h3'
+
+        soup.html['lang']     = self.lang
+        soup.html['dir' ]     = self.direction
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mlang)
+        soup.head.insert(1,mcharset)
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_gva_be.py
+++ b/src/calibre/web/feeds/recipes/recipe_gva_be.py
@ -0,0 +1,63 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.gva.be
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class GazetvanAntwerpen(BasicNewsRecipe):
+    title                 = 'Gazet van Antwerpen'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Belgium in Dutch'
+    publisher             = 'Gazet van Antwerpen'
+    category              = 'news, politics, Belgium'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf-8'
+    language              = _('Dutch')
+    lang                  = 'nl-BE'
+    direction             = 'ltr'
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
+
+    keep_only_tags = [dict(name='div', attrs={'id':'article'})]
+    remove_tags    = [
+                         dict(name=['embed','object'])
+                       , dict (name='div',attrs={'class':['note NotePortrait','note']})
+                     ]
+    remove_tags_after  = dict(name='span', attrs={'class':'author'})
+
+    feeds = [
+              (u'Overzicht & Blikvanger', u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/overview/overzicht'       )
+             ,(u'Stad & Regio'          , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/stadenregio'   )
+             ,(u'Economie'              , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/economie'      )
+             ,(u'Binnenland'            , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/binnenland'    )
+             ,(u'Buitenland'            , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/buitenland'    )
+             ,(u'Media & Cultur'        , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/mediaencultuur')
+             ,(u'Wetenschap'            , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/mediaencultuur')
+             ,(u'Sport'                 , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/sport'         )
+            ]
+
+    def preprocess_html(self, soup):
+        del soup.body['onload']
+        for item in soup.findAll(style=True):
+            del item['style']
+        soup.html['lang']     = self.lang
+        soup.html['dir' ]     = self.direction
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mlang)
+        soup.head.insert(1,mcharset)
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_hln.py
+++ b/src/calibre/web/feeds/recipes/recipe_hln.py
@ -0,0 +1,52 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.hln.be
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class HLN_be(BasicNewsRecipe):
+    title                 = 'Het Belang Van Limburg'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Belgium in Dutch'
+    publisher             = 'Het Belang Van Limburg'
+    category              = 'news, politics, Belgium'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf-8'
+    language              = _('Dutch')
+    lang                  = 'nl-BE'
+    direction             = 'ltr'
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
+
+    keep_only_tags = [dict(name='div', attrs={'class':'art_box2'})]
+    remove_tags    = [
+                         dict(name=['embed','object'])
+                     ]
+
+    feeds = [(u'Alle nieuws', u'http://www.hln.be/rss.xml')]
+
+    def preprocess_html(self, soup):
+        del soup.body['onload']
+        for item in soup.findAll(style=True):
+            del item['style']
+        soup.html['lang']     = self.lang
+        soup.html['dir' ]     = self.direction
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mlang)
+        soup.head.insert(1,mcharset)
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_tijd.py
+++ b/src/calibre/web/feeds/recipes/recipe_tijd.py
@ -0,0 +1,70 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.tijd.be
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class DeTijd(BasicNewsRecipe):
+    title                 = 'De Tijd'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Belgium in Dutch'
+    publisher             = 'De Tijd'
+    category              = 'news, politics, Belgium'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf-8'
+    language              = _('Dutch')
+    lang                  = 'nl-BE'
+    direction             = 'ltr'
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
+
+    keep_only_tags = [dict(name='div', attrs={'id':'lcol'})]
+    remove_tags    = [
+                         dict(name=['embed','object'])
+                       , dict (name='div',attrs={'id':'art_reactwrap'})
+                     ]
+    remove_tags_after  = dict(name='div', attrs={'id':'art_author'})
+
+    feeds = [
+              (u'Volledig nieuwsaanbod', u'http://www.tijd.be/rss/nieuws.xml'        )
+             ,(u'Markten'              , u'http://www.tijd.be/rss/markten.xml'       )
+             ,(u'Ondernemingen'        , u'http://www.tijd.be/rss/ondernemingen.xml' )
+             ,(u'Chemie-Farma'         , u'http://www.tijd.be/rss/chemie_farma.xml'  )
+             ,(u'Consumptie'           , u'http://www.tijd.be/rss/consumptie.xml'    )
+             ,(u'Diensten'             , u'http://www.tijd.be/rss/diensten.xml'      )
+             ,(u'Energie'              , u'http://www.tijd.be/rss/energie.xml'       )
+             ,(u'Financen'             , u'http://www.tijd.be/rss/financien.xml'     )
+             ,(u'Industrie'            , u'http://www.tijd.be/rss/industrie.xml'     )
+             ,(u'Media'                , u'http://www.tijd.be/rss/media_telecom.xml' )
+             ,(u'Technologie'          , u'http://www.tijd.be/rss/technologie.xml'   )
+             ,(u'Economie & Financien' , u'http://www.tijd.be/rss/economie.xml'      )
+             ,(u'Binnenland'           , u'http://www.tijd.be/rss/binnenland.xml'    )
+             ,(u'Buitenland'           , u'http://www.tijd.be/rss/buitenland.xml'    )
+             ,(u'De wijde wereld'      , u'http://www.tijd.be/rss/cultuur.xml'       )
+            ]
+
+    def preprocess_html(self, soup):
+        del soup.body['onload']
+        for item in soup.findAll(style=True):
+            del item['style']
+        soup.html['lang']     = self.lang
+        soup.html['dir' ]     = self.direction
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mlang)
+        soup.head.insert(1,mcharset)
+        return soup
+