mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix some news downloads in east asian languages not working because the truncation of article descriptions could cause invalid UTF-16 bytes in the string
This commit is contained in:
parent
a3eee6a22a
commit
d75e5323a9
@ -274,7 +274,10 @@ class TOC(list):
|
||||
desc = getattr(np, 'description', None)
|
||||
if desc:
|
||||
desc = re.sub(r'\s+', ' ', desc)
|
||||
elem.append(C.meta(desc, name='description'))
|
||||
try:
|
||||
elem.append(C.meta(desc, name='description'))
|
||||
except ValueError:
|
||||
elem.append(C.meta(clean_xml_chars(desc), name='description'))
|
||||
idx = getattr(np, 'toc_thumbnail', None)
|
||||
if idx:
|
||||
elem.append(C.meta(idx, name='toc_thumbnail'))
|
||||
|
@ -1039,12 +1039,13 @@ class BasicNewsRecipe(Recipe):
|
||||
def description_limiter(cls, src):
|
||||
if not src:
|
||||
return ''
|
||||
src = force_unicode(src, 'utf-8')
|
||||
pos = cls.summary_length
|
||||
fuzz = 50
|
||||
si = src.find(';', pos)
|
||||
si = src.find(u';', pos)
|
||||
if si > 0 and si-pos > fuzz:
|
||||
si = -1
|
||||
gi = src.find('>', pos)
|
||||
gi = src.find(u'>', pos)
|
||||
if gi > 0 and gi-pos > fuzz:
|
||||
gi = -1
|
||||
npos = max(si, gi)
|
||||
@ -1052,8 +1053,9 @@ class BasicNewsRecipe(Recipe):
|
||||
npos = pos
|
||||
ans = src[:npos+1]
|
||||
if len(ans) < len(src):
|
||||
return (ans+u'\u2026') if isinstance(ans, unicode) else (ans +
|
||||
'...')
|
||||
from calibre.utils.cleantext import clean_xml_chars
|
||||
# Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
|
||||
ans = clean_xml_chars(ans) + u'\u2026'
|
||||
return ans
|
||||
|
||||
def feed2index(self, f, feeds):
|
||||
|
@ -135,6 +135,7 @@ class FeedTemplate(Template):
|
||||
return navbar
|
||||
|
||||
def _generate(self, f, feeds, cutoff, extra_css=None, style=None):
|
||||
from calibre.utils.cleantext import clean_xml_chars
|
||||
feed = feeds[f]
|
||||
head = HEAD(TITLE(feed.title))
|
||||
if style:
|
||||
@ -173,7 +174,7 @@ class FeedTemplate(Template):
|
||||
style='padding-bottom:0.5em')
|
||||
)
|
||||
if article.summary:
|
||||
li.append(DIV(cutoff(article.text_summary),
|
||||
li.append(DIV(clean_xml_chars(cutoff(article.text_summary)),
|
||||
CLASS('article_description', 'calibre_rescale_70')))
|
||||
ul.append(li)
|
||||
div.append(ul)
|
||||
|
Loading…
x
Reference in New Issue
Block a user