mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix some news downloads in east asian languages not working because the truncation of article descriptions could cause invalid UTF-16 bytes in the string
This commit is contained in:
parent
a3eee6a22a
commit
d75e5323a9
@ -274,7 +274,10 @@ class TOC(list):
|
|||||||
desc = getattr(np, 'description', None)
|
desc = getattr(np, 'description', None)
|
||||||
if desc:
|
if desc:
|
||||||
desc = re.sub(r'\s+', ' ', desc)
|
desc = re.sub(r'\s+', ' ', desc)
|
||||||
elem.append(C.meta(desc, name='description'))
|
try:
|
||||||
|
elem.append(C.meta(desc, name='description'))
|
||||||
|
except ValueError:
|
||||||
|
elem.append(C.meta(clean_xml_chars(desc), name='description'))
|
||||||
idx = getattr(np, 'toc_thumbnail', None)
|
idx = getattr(np, 'toc_thumbnail', None)
|
||||||
if idx:
|
if idx:
|
||||||
elem.append(C.meta(idx, name='toc_thumbnail'))
|
elem.append(C.meta(idx, name='toc_thumbnail'))
|
||||||
|
@ -1039,12 +1039,13 @@ class BasicNewsRecipe(Recipe):
|
|||||||
def description_limiter(cls, src):
|
def description_limiter(cls, src):
|
||||||
if not src:
|
if not src:
|
||||||
return ''
|
return ''
|
||||||
|
src = force_unicode(src, 'utf-8')
|
||||||
pos = cls.summary_length
|
pos = cls.summary_length
|
||||||
fuzz = 50
|
fuzz = 50
|
||||||
si = src.find(';', pos)
|
si = src.find(u';', pos)
|
||||||
if si > 0 and si-pos > fuzz:
|
if si > 0 and si-pos > fuzz:
|
||||||
si = -1
|
si = -1
|
||||||
gi = src.find('>', pos)
|
gi = src.find(u'>', pos)
|
||||||
if gi > 0 and gi-pos > fuzz:
|
if gi > 0 and gi-pos > fuzz:
|
||||||
gi = -1
|
gi = -1
|
||||||
npos = max(si, gi)
|
npos = max(si, gi)
|
||||||
@ -1052,8 +1053,9 @@ class BasicNewsRecipe(Recipe):
|
|||||||
npos = pos
|
npos = pos
|
||||||
ans = src[:npos+1]
|
ans = src[:npos+1]
|
||||||
if len(ans) < len(src):
|
if len(ans) < len(src):
|
||||||
return (ans+u'\u2026') if isinstance(ans, unicode) else (ans +
|
from calibre.utils.cleantext import clean_xml_chars
|
||||||
'...')
|
# Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
|
||||||
|
ans = clean_xml_chars(ans) + u'\u2026'
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def feed2index(self, f, feeds):
|
def feed2index(self, f, feeds):
|
||||||
|
@ -135,6 +135,7 @@ class FeedTemplate(Template):
|
|||||||
return navbar
|
return navbar
|
||||||
|
|
||||||
def _generate(self, f, feeds, cutoff, extra_css=None, style=None):
|
def _generate(self, f, feeds, cutoff, extra_css=None, style=None):
|
||||||
|
from calibre.utils.cleantext import clean_xml_chars
|
||||||
feed = feeds[f]
|
feed = feeds[f]
|
||||||
head = HEAD(TITLE(feed.title))
|
head = HEAD(TITLE(feed.title))
|
||||||
if style:
|
if style:
|
||||||
@ -173,7 +174,7 @@ class FeedTemplate(Template):
|
|||||||
style='padding-bottom:0.5em')
|
style='padding-bottom:0.5em')
|
||||||
)
|
)
|
||||||
if article.summary:
|
if article.summary:
|
||||||
li.append(DIV(cutoff(article.text_summary),
|
li.append(DIV(clean_xml_chars(cutoff(article.text_summary)),
|
||||||
CLASS('article_description', 'calibre_rescale_70')))
|
CLASS('article_description', 'calibre_rescale_70')))
|
||||||
ul.append(li)
|
ul.append(li)
|
||||||
div.append(ul)
|
div.append(ul)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user