Fix #3416 (Receipt Spiegel Online - German => no articles)

This commit is contained in:
Kovid Goyal 2009-09-19 20:53:04 -06:00
parent ebfc8ec40f
commit 4efa4d7bb1
2 changed files with 17 additions and 36 deletions

View File

@ -24,7 +24,6 @@ class DerStandardRecipe(BasicNewsRecipe):
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
extra_css = ''' extra_css = '''
.artikelBody{font-family:Arial,Helvetica,sans-serif;} .artikelBody{font-family:Arial,Helvetica,sans-serif;}
.artikelLeft{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} .artikelLeft{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
@ -59,14 +58,15 @@ class DerStandardRecipe(BasicNewsRecipe):
filter_regexps = [r'/r[1-9]*'] filter_regexps = [r'/r[1-9]*']
#def print_version(self, url):
# return url.replace('?id=', 'txt/?id=')
def get_article_url(self, article): def get_article_url(self, article):
'''if the article links to a index page (ressort) or a picture gallery '''if the article links to a index page (ressort) or a picture gallery
(ansichtssache), don't add it''' (ansichtssache), don't add it'''
if ( article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0 ): if ( article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0 ):
return None return None
matchObj = re.search( re.compile(r'/r'+'[1-9]*',flags=0), article.link,flags=0)
if matchObj:
return None
return article.link return article.link

View File

@ -7,7 +7,6 @@ spiegel.de
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class Spiegel_ger(BasicNewsRecipe): class Spiegel_ger(BasicNewsRecipe):
title = 'Spiegel Online - German' title = 'Spiegel Online - German'
@ -17,49 +16,31 @@ class Spiegel_ger(BasicNewsRecipe):
category = 'SPIEGEL ONLINE, DER SPIEGEL, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget' category = 'SPIEGEL ONLINE, DER SPIEGEL, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'de' language = 'de'
lang = 'de-DE' lang = 'de-DE'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'spMainContent'})] keep_only_tags = [dict(name='div', attrs={'id':'spArticleContent'})]
remove_tags = [dict(name=['object','link','base'])] remove_tags = [dict(name=['object','link','base','iframe'])]
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'}) remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/index.rss')] feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/index.rss')]
def print_version(self, url): def print_version(self, url):
main, sep, rest = url.rpartition(',') rmt = url.rpartition('#')[0]
main, sep, rest = rmt.rpartition(',')
rmain, rsep, rrest = main.rpartition(',') rmain, rsep, rrest = main.rpartition(',')
return rmain + ',druck-' + rrest + ',' + rest purl = rmain + ',druck-' + rrest + ',' + rest
return purl
def preprocess_html(self, soup):
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
htmltag = soup.find('html')
if not htmltag:
thtml = Tag(soup,'html',[("lang",self.lang),("xml:lang",self.lang),("dir","ltr")])
soup.insert(0,thtml)
thead = soup.head
tbody = soup.body
thead.extract()
tbody.extract()
soup.html.insert(0,tbody)
soup.html.insert(0,thead)
return soup