Fix #1639 (Calibre can not handle properly URL's with non-ascii characters). New recipe for Sueddeutsche by Oliver Niesner

This commit is contained in:
Kovid Goyal 2009-01-18 16:07:15 -08:00
parent d724f09059
commit a83de9ce2d
4 changed files with 11 additions and 2 deletions

View File

@ -40,6 +40,7 @@ def convert(opts, recipe_arg, notification=None):
c.smart_update(recipe_opts, opts) c.smart_update(recipe_opts, opts)
opts = recipe_opts opts = recipe_opts
opts.chapter_mark = 'none' opts.chapter_mark = 'none'
opts.dont_split_on_page_breaks = True
opf = glob.glob(os.path.join(tdir, '*.opf')) opf = glob.glob(os.path.join(tdir, '*.opf'))
if not opf: if not opf:
raise Exception('Downloading of recipe: %s failed'%recipe_arg) raise Exception('Downloading of recipe: %s failed'%recipe_arg)

View File

@ -22,7 +22,7 @@ recipe_modules = ['recipe_' + r for r in (
'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik', 'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik',
'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet', 'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de', 'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de',
'pagina12', 'infobae', 'ambito', 'elargentino', 'pagina12', 'infobae', 'ambito', 'elargentino', 'sueddeutsche',
)] )]
import re, imp, inspect, time, os import re, imp, inspect, time, os

View File

@ -56,3 +56,7 @@ class Sueddeutsche(BasicNewsRecipe):
feeds = [ (u'Sueddeutsche', u'http://www.sueddeutsche.de/app/service/rss/alles/rss.xml') ] feeds = [ (u'Sueddeutsche', u'http://www.sueddeutsche.de/app/service/rss/alles/rss.xml') ]
def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
return soup

View File

@ -395,7 +395,11 @@ class RecursiveFetcher(object, LoggingInterface):
if self.download_stylesheets: if self.download_stylesheets:
self.process_stylesheets(soup, newbaseurl) self.process_stylesheets(soup, newbaseurl)
res = os.path.join(linkdiskpath, basename(iurl)) _fname = basename(iurl)
if not isinstance(_fname, unicode):
_fname.decode('latin1', 'replace')
_fname.encode('ascii', 'replace').replace('%', '')
res = os.path.join(linkdiskpath, _fname)
self.downloaded_paths.append(res) self.downloaded_paths.append(res)
self.filemap[nurl] = res self.filemap[nurl] = res
if recursion_level < self.max_recursions: if recursion_level < self.max_recursions: