mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update aktualne.cz. Fixes #1419375 [Fetching aktualne.cz results in wrong encoding in article details](https://bugs.launchpad.net/calibre/+bug/1419375)
This commit is contained in:
parent
05ee6b477e
commit
e1c136e3c3
@ -10,6 +10,7 @@ class aktualneRecipe(BasicNewsRecipe):
|
|||||||
description = 'aktuálně.cz'
|
description = 'aktuálně.cz'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 20
|
max_articles_per_feed = 20
|
||||||
|
encoding = 'utf-8'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Domácí', u'http://aktualne.centrum.cz/feeds/rss/domaci/?photo=0'),
|
(u'Domácí', u'http://aktualne.centrum.cz/feeds/rss/domaci/?photo=0'),
|
||||||
@ -20,7 +21,6 @@ class aktualneRecipe(BasicNewsRecipe):
|
|||||||
(u'Blogy a názory', u'http://blog.aktualne.centrum.cz/export-all.php')
|
(u'Blogy a názory', u'http://blog.aktualne.centrum.cz/export-all.php')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
language = 'cs'
|
language = 'cs'
|
||||||
cover_url = 'http://img.aktualne.centrum.cz/design/akt4/o/l/logo-akt-ciste.png'
|
cover_url = 'http://img.aktualne.centrum.cz/design/akt4/o/l/logo-akt-ciste.png'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
@ -43,7 +43,8 @@ class aktualneRecipe(BasicNewsRecipe):
|
|||||||
dict(name='div', attrs={'class':'boxP'}),
|
dict(name='div', attrs={'class':'boxP'}),
|
||||||
dict(name='div', attrs={'class':'box2'})]
|
dict(name='div', attrs={'class':'box2'})]
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'<div class="(contenttitle"|socialni-site|wiki|facebook-promo|facebook-like-button"|meta-akce).*', re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
|
(re.compile(r'<div class="(contenttitle"|socialni-site|wiki|facebook-promo|facebook-like-button"|meta-akce).*',
|
||||||
|
re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
|
||||||
(re.compile(r'<div class="[^"]*poutak-clanek-trojka".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
(re.compile(r'<div class="[^"]*poutak-clanek-trojka".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
||||||
|
|
||||||
keep_only_tags = []
|
keep_only_tags = []
|
||||||
@ -58,12 +59,3 @@ class aktualneRecipe(BasicNewsRecipe):
|
|||||||
self.visited_urls[url] = True
|
self.visited_urls[url] = True
|
||||||
self.log.debug('Accepting: ' + url)
|
self.log.debug('Accepting: ' + url)
|
||||||
return url
|
return url
|
||||||
|
|
||||||
def encoding(self, source):
|
|
||||||
if source.newurl.find('blog.aktualne') >= 0:
|
|
||||||
enc = 'utf-8'
|
|
||||||
else:
|
|
||||||
enc = 'iso-8859-2'
|
|
||||||
self.log.debug('Called encoding ' + enc + " " + str(source.newurl))
|
|
||||||
return source.decode(enc, 'replace')
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user