mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #3416 (Receipt Spiegel Online - German => no articles)
This commit is contained in:
parent
ebfc8ec40f
commit
4efa4d7bb1
@ -24,7 +24,6 @@ class DerStandardRecipe(BasicNewsRecipe):
|
|||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
.artikelBody{font-family:Arial,Helvetica,sans-serif;}
|
.artikelBody{font-family:Arial,Helvetica,sans-serif;}
|
||||||
.artikelLeft{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
.artikelLeft{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
||||||
@ -59,14 +58,15 @@ class DerStandardRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
filter_regexps = [r'/r[1-9]*']
|
filter_regexps = [r'/r[1-9]*']
|
||||||
|
|
||||||
#def print_version(self, url):
|
|
||||||
# return url.replace('?id=', 'txt/?id=')
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
'''if the article links to a index page (ressort) or a picture gallery
|
'''if the article links to a index page (ressort) or a picture gallery
|
||||||
(ansichtssache), don't add it'''
|
(ansichtssache), don't add it'''
|
||||||
if ( article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0 ):
|
if ( article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0 ):
|
||||||
return None
|
return None
|
||||||
|
matchObj = re.search( re.compile(r'/r'+'[1-9]*',flags=0), article.link,flags=0)
|
||||||
|
|
||||||
|
if matchObj:
|
||||||
|
return None
|
||||||
|
|
||||||
return article.link
|
return article.link
|
||||||
|
|
||||||
|
@ -7,7 +7,6 @@ spiegel.de
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
class Spiegel_ger(BasicNewsRecipe):
|
class Spiegel_ger(BasicNewsRecipe):
|
||||||
title = 'Spiegel Online - German'
|
title = 'Spiegel Online - German'
|
||||||
@ -17,49 +16,31 @@ class Spiegel_ger(BasicNewsRecipe):
|
|||||||
category = 'SPIEGEL ONLINE, DER SPIEGEL, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget'
|
category = 'SPIEGEL ONLINE, DER SPIEGEL, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = 'de'
|
language = 'de'
|
||||||
|
|
||||||
lang = 'de-DE'
|
lang = 'de-DE'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
}
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'spMainContent'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'spArticleContent'})]
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link','base'])]
|
remove_tags = [dict(name=['object','link','base','iframe'])]
|
||||||
|
|
||||||
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})
|
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})
|
||||||
|
|
||||||
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/index.rss')]
|
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/index.rss')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
main, sep, rest = url.rpartition(',')
|
rmt = url.rpartition('#')[0]
|
||||||
|
main, sep, rest = rmt.rpartition(',')
|
||||||
rmain, rsep, rrest = main.rpartition(',')
|
rmain, rsep, rrest = main.rpartition(',')
|
||||||
return rmain + ',druck-' + rrest + ',' + rest
|
purl = rmain + ',druck-' + rrest + ',' + rest
|
||||||
|
return purl
|
||||||
def preprocess_html(self, soup):
|
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
|
||||||
soup.head.insert(0,mlang)
|
|
||||||
soup.head.insert(1,mcharset)
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
htmltag = soup.find('html')
|
|
||||||
if not htmltag:
|
|
||||||
thtml = Tag(soup,'html',[("lang",self.lang),("xml:lang",self.lang),("dir","ltr")])
|
|
||||||
soup.insert(0,thtml)
|
|
||||||
thead = soup.head
|
|
||||||
tbody = soup.body
|
|
||||||
thead.extract()
|
|
||||||
tbody.extract()
|
|
||||||
soup.html.insert(0,tbody)
|
|
||||||
soup.html.insert(0,thead)
|
|
||||||
return soup
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user