mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #3416 (Receipt Spiegel Online - German => no articles)
This commit is contained in:
parent
ebfc8ec40f
commit
4efa4d7bb1
@ -24,7 +24,6 @@ class DerStandardRecipe(BasicNewsRecipe):
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
|
||||
extra_css = '''
|
||||
.artikelBody{font-family:Arial,Helvetica,sans-serif;}
|
||||
.artikelLeft{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
||||
@ -59,14 +58,15 @@ class DerStandardRecipe(BasicNewsRecipe):
|
||||
|
||||
filter_regexps = [r'/r[1-9]*']
|
||||
|
||||
#def print_version(self, url):
|
||||
# return url.replace('?id=', 'txt/?id=')
|
||||
|
||||
def get_article_url(self, article):
|
||||
'''if the article links to a index page (ressort) or a picture gallery
|
||||
(ansichtssache), don't add it'''
|
||||
if ( article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0 ):
|
||||
return None
|
||||
matchObj = re.search( re.compile(r'/r'+'[1-9]*',flags=0), article.link,flags=0)
|
||||
|
||||
if matchObj:
|
||||
return None
|
||||
|
||||
return article.link
|
||||
|
||||
|
@ -7,7 +7,6 @@ spiegel.de
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Spiegel_ger(BasicNewsRecipe):
|
||||
title = 'Spiegel Online - German'
|
||||
@ -17,49 +16,31 @@ class Spiegel_ger(BasicNewsRecipe):
|
||||
category = 'SPIEGEL ONLINE, DER SPIEGEL, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = 'de'
|
||||
|
||||
language = 'de'
|
||||
lang = 'de-DE'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
}
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'spMainContent'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'spArticleContent'})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','base'])]
|
||||
remove_tags = [dict(name=['object','link','base','iframe'])]
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})
|
||||
|
||||
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/index.rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, rest = url.rpartition(',')
|
||||
rmt = url.rpartition('#')[0]
|
||||
main, sep, rest = rmt.rpartition(',')
|
||||
rmain, rsep, rrest = main.rpartition(',')
|
||||
return rmain + ',druck-' + rrest + ',' + rest
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
htmltag = soup.find('html')
|
||||
if not htmltag:
|
||||
thtml = Tag(soup,'html',[("lang",self.lang),("xml:lang",self.lang),("dir","ltr")])
|
||||
soup.insert(0,thtml)
|
||||
thead = soup.head
|
||||
tbody = soup.body
|
||||
thead.extract()
|
||||
tbody.extract()
|
||||
soup.html.insert(0,tbody)
|
||||
soup.html.insert(0,thead)
|
||||
return soup
|
||||
purl = rmain + ',druck-' + rrest + ',' + rest
|
||||
return purl
|
||||
|
Loading…
x
Reference in New Issue
Block a user