mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
News download: Handle titles with ASCII control codes in them. Fixes #739322 (News fetching - NULL bytes problem)
This commit is contained in:
parent
de1e2369b3
commit
9385758a28
@ -8,15 +8,18 @@ import re, htmlentitydefs
|
||||
_ascii_pat = None
|
||||
|
||||
def clean_ascii_chars(txt, charlist=None):
|
||||
'''
|
||||
Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
|
||||
This is all control chars except \\t,\\n and \\r
|
||||
r'''
|
||||
Remove ASCII control chars.
|
||||
This is all control chars except \t, \n and \r
|
||||
'''
|
||||
if not txt:
|
||||
return ''
|
||||
global _ascii_pat
|
||||
if _ascii_pat is None:
|
||||
chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))
|
||||
chars = set(xrange(32))
|
||||
chars.add(127)
|
||||
for x in (9, 10, 13):
|
||||
chars.remove(x)
|
||||
_ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
|
||||
|
||||
if charlist is None:
|
||||
|
@ -28,6 +28,7 @@ class Article(object):
|
||||
pass
|
||||
if not isinstance(self._title, unicode):
|
||||
self._title = self._title.decode('utf-8', 'replace')
|
||||
self._title = clean_ascii_chars(self._title)
|
||||
self.url = url
|
||||
self.author = author
|
||||
if author and not isinstance(author, unicode):
|
||||
@ -75,7 +76,7 @@ class Article(object):
|
||||
t = t.decode('utf-8', 'replace')
|
||||
return t
|
||||
def fset(self, val):
|
||||
self._title = val
|
||||
self._title = clean_ascii_chars(val)
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user