mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update De Tijd and Het Laatste Nieuws
This commit is contained in:
parent
6b69cb960e
commit
7cfda558ed
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python2
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
@ -23,11 +23,16 @@ class HLN_be(BasicNewsRecipe):
|
||||
language = 'nl_BE'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': 'nl-NL', 'publisher': publisher
|
||||
'comments': description,
|
||||
'tags': category,
|
||||
'language': 'nl-NL',
|
||||
'publisher': publisher
|
||||
}
|
||||
|
||||
remove_tags = [dict(name=['form', 'object', 'embed'])]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'art_box2'})]
|
||||
keep_only_tags = [
|
||||
dict(name='article', attrs={'class': 'article art_detail'}),
|
||||
]
|
||||
|
||||
feeds = [(u'Articles', u'http://www.hln.be/rss.xml')]
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python2
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
@ -25,39 +25,42 @@ class DeTijd(BasicNewsRecipe):
|
||||
lang = 'nl-BE'
|
||||
direction = 'ltr'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description, '--category', category, '--publisher', publisher
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id': 'lcol'}), dict(
|
||||
name='div', attrs={'class': 'l-main-container-article__asset-container'}
|
||||
), dict(
|
||||
name='div',
|
||||
attrs={
|
||||
'class': 'l-main-container-article__body clearfix highlightable '
|
||||
}
|
||||
), dict(
|
||||
name='div',
|
||||
attrs={'class': 'l-main-container-article__intro highlightable '}
|
||||
), dict(
|
||||
name='div', attrs={'class': 'l-main-container-article__sidebar-inline'}
|
||||
), dict(name='div', attrs={'class': 'l-main-container-article__title '})
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + \
|
||||
category + \
|
||||
'"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'lcol'})]
|
||||
remove_tags = [
|
||||
dict(name=['embed', 'object']), dict(
|
||||
name='div', attrs={'id': 'art_reactwrap'})
|
||||
dict(name=['embed', 'object']),
|
||||
dict(name='div', attrs={'id': 'art_reactwrap'})
|
||||
]
|
||||
remove_tags_after = dict(name='div', attrs={'id': 'art_author'})
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'Volledig nieuwsaanbod', u'http://www.tijd.be/rss/nieuws.xml'),
|
||||
(u'Markten', u'http://www.tijd.be/rss/markten.xml'),
|
||||
(u'Ondernemingen', u'http://www.tijd.be/rss/ondernemingen.xml'),
|
||||
(u'Chemie-Farma', u'http://www.tijd.be/rss/chemie_farma.xml'),
|
||||
(u'Consumptie', u'http://www.tijd.be/rss/consumptie.xml'),
|
||||
(u'Diensten', u'http://www.tijd.be/rss/diensten.xml'),
|
||||
(u'Energie', u'http://www.tijd.be/rss/energie.xml'),
|
||||
(u'Financen', u'http://www.tijd.be/rss/financien.xml'),
|
||||
(u'Industrie', u'http://www.tijd.be/rss/industrie.xml'),
|
||||
(u'Media', u'http://www.tijd.be/rss/media_telecom.xml'),
|
||||
(u'Technologie', u'http://www.tijd.be/rss/technologie.xml'),
|
||||
(u'Economie & Financien', u'http://www.tijd.be/rss/economie.xml'),
|
||||
(u'Binnenland', u'http://www.tijd.be/rss/binnenland.xml'),
|
||||
(u'Buitenland', u'http://www.tijd.be/rss/buitenland.xml'),
|
||||
(u'De wijde wereld', u'http://www.tijd.be/rss/cultuur.xml')
|
||||
]
|
||||
feeds = [(u'Volledig nieuwsaanbod', u'http://www.tijd.be/rss/nieuws.xml'),
|
||||
(u'Markten', u'http://www.tijd.be/rss/markten.xml'),
|
||||
(u'Ondernemingen', u'http://www.tijd.be/rss/ondernemingen.xml'),
|
||||
(u'Chemie-Farma', u'http://www.tijd.be/rss/chemie_farma.xml'),
|
||||
(u'Consumptie', u'http://www.tijd.be/rss/consumptie.xml'),
|
||||
(u'Diensten', u'http://www.tijd.be/rss/diensten.xml'),
|
||||
(u'Energie', u'http://www.tijd.be/rss/energie.xml'),
|
||||
(u'Financen', u'http://www.tijd.be/rss/financien.xml'),
|
||||
(u'Industrie', u'http://www.tijd.be/rss/industrie.xml'),
|
||||
(u'Media', u'http://www.tijd.be/rss/media_telecom.xml'),
|
||||
(u'Technologie', u'http://www.tijd.be/rss/technologie.xml'),
|
||||
(u'Economie & Financien', u'http://www.tijd.be/rss/economie.xml'),
|
||||
(u'Binnenland', u'http://www.tijd.be/rss/binnenland.xml'),
|
||||
(u'Buitenland', u'http://www.tijd.be/rss/buitenland.xml'),
|
||||
(u'De wijde wereld', u'http://www.tijd.be/rss/cultuur.xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
del soup.body['onload']
|
||||
@ -65,10 +68,14 @@ class DeTijd(BasicNewsRecipe):
|
||||
del item['style']
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir'] = self.direction
|
||||
mlang = Tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
mlang = Tag(
|
||||
soup, 'meta', [("http-equiv", "Content-Language"),
|
||||
("content", self.lang)]
|
||||
)
|
||||
mcharset = Tag(
|
||||
soup, 'meta', [("http-equiv", "Content-Type"),
|
||||
("content", "text/html; charset=utf-8")]
|
||||
)
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
return soup
|
||||
|
Loading…
x
Reference in New Issue
Block a user