Update De Tijd and Het Laatste Nieuws

This commit is contained in:
Kovid Goyal 2017-02-18 09:07:26 +05:30
parent 6b69cb960e
commit 7cfda558ed
2 changed files with 48 additions and 36 deletions

View File

@ -23,11 +23,16 @@ class HLN_be(BasicNewsRecipe):
language = 'nl_BE' language = 'nl_BE'
conversion_options = { conversion_options = {
'comments': description, 'tags': category, 'language': 'nl-NL', 'publisher': publisher 'comments': description,
'tags': category,
'language': 'nl-NL',
'publisher': publisher
} }
remove_tags = [dict(name=['form', 'object', 'embed'])] remove_tags = [dict(name=['form', 'object', 'embed'])]
keep_only_tags = [dict(name='div', attrs={'id': 'art_box2'})] keep_only_tags = [
dict(name='article', attrs={'class': 'article art_detail'}),
]
feeds = [(u'Articles', u'http://www.hln.be/rss.xml')] feeds = [(u'Articles', u'http://www.hln.be/rss.xml')]

View File

@ -25,24 +25,28 @@ class DeTijd(BasicNewsRecipe):
lang = 'nl-BE' lang = 'nl-BE'
direction = 'ltr' direction = 'ltr'
html2lrf_options = [ keep_only_tags = [
'--comment', description, '--category', category, '--publisher', publisher dict(name='div', attrs={'id': 'lcol'}), dict(
name='div', attrs={'class': 'l-main-container-article__asset-container'}
), dict(
name='div',
attrs={
'class': 'l-main-container-article__body clearfix highlightable '
}
), dict(
name='div',
attrs={'class': 'l-main-container-article__intro highlightable '}
), dict(
name='div', attrs={'class': 'l-main-container-article__sidebar-inline'}
), dict(name='div', attrs={'class': 'l-main-container-article__title '})
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + \
category + \
'"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
keep_only_tags = [dict(name='div', attrs={'id': 'lcol'})]
remove_tags = [ remove_tags = [
dict(name=['embed', 'object']), dict( dict(name=['embed', 'object']),
name='div', attrs={'id': 'art_reactwrap'}) dict(name='div', attrs={'id': 'art_reactwrap'})
] ]
remove_tags_after = dict(name='div', attrs={'id': 'art_author'}) remove_tags_after = dict(name='div', attrs={'id': 'art_author'})
feeds = [ feeds = [(u'Volledig nieuwsaanbod', u'http://www.tijd.be/rss/nieuws.xml'),
(u'Volledig nieuwsaanbod', u'http://www.tijd.be/rss/nieuws.xml'),
(u'Markten', u'http://www.tijd.be/rss/markten.xml'), (u'Markten', u'http://www.tijd.be/rss/markten.xml'),
(u'Ondernemingen', u'http://www.tijd.be/rss/ondernemingen.xml'), (u'Ondernemingen', u'http://www.tijd.be/rss/ondernemingen.xml'),
(u'Chemie-Farma', u'http://www.tijd.be/rss/chemie_farma.xml'), (u'Chemie-Farma', u'http://www.tijd.be/rss/chemie_farma.xml'),
@ -56,8 +60,7 @@ class DeTijd(BasicNewsRecipe):
(u'Economie & Financien', u'http://www.tijd.be/rss/economie.xml'), (u'Economie & Financien', u'http://www.tijd.be/rss/economie.xml'),
(u'Binnenland', u'http://www.tijd.be/rss/binnenland.xml'), (u'Binnenland', u'http://www.tijd.be/rss/binnenland.xml'),
(u'Buitenland', u'http://www.tijd.be/rss/buitenland.xml'), (u'Buitenland', u'http://www.tijd.be/rss/buitenland.xml'),
(u'De wijde wereld', u'http://www.tijd.be/rss/cultuur.xml') (u'De wijde wereld', u'http://www.tijd.be/rss/cultuur.xml')]
]
def preprocess_html(self, soup): def preprocess_html(self, soup):
del soup.body['onload'] del soup.body['onload']
@ -65,10 +68,14 @@ class DeTijd(BasicNewsRecipe):
del item['style'] del item['style']
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [ mlang = Tag(
("http-equiv", "Content-Language"), ("content", self.lang)]) soup, 'meta', [("http-equiv", "Content-Language"),
mcharset = Tag(soup, 'meta', [ ("content", self.lang)]
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) )
mcharset = Tag(
soup, 'meta', [("http-equiv", "Content-Type"),
("content", "text/html; charset=utf-8")]
)
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)
return soup return soup