diff --git a/src/calibre/ebooks/metadata/toc.py b/src/calibre/ebooks/metadata/toc.py index dd577cc476..985b46a2c4 100644 --- a/src/calibre/ebooks/metadata/toc.py +++ b/src/calibre/ebooks/metadata/toc.py @@ -274,7 +274,10 @@ class TOC(list): desc = getattr(np, 'description', None) if desc: desc = re.sub(r'\s+', ' ', desc) - elem.append(C.meta(desc, name='description')) + try: + elem.append(C.meta(desc, name='description')) + except ValueError: + elem.append(C.meta(clean_xml_chars(desc), name='description')) idx = getattr(np, 'toc_thumbnail', None) if idx: elem.append(C.meta(idx, name='toc_thumbnail')) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 2573f85114..e69f973e99 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1039,12 +1039,13 @@ class BasicNewsRecipe(Recipe): def description_limiter(cls, src): if not src: return '' + src = force_unicode(src, 'utf-8') pos = cls.summary_length fuzz = 50 - si = src.find(';', pos) + si = src.find(u';', pos) if si > 0 and si-pos > fuzz: si = -1 - gi = src.find('>', pos) + gi = src.find(u'>', pos) if gi > 0 and gi-pos > fuzz: gi = -1 npos = max(si, gi) @@ -1052,8 +1053,9 @@ class BasicNewsRecipe(Recipe): npos = pos ans = src[:npos+1] if len(ans) < len(src): - return (ans+u'\u2026') if isinstance(ans, unicode) else (ans + - '...') + from calibre.utils.cleantext import clean_xml_chars + # Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it + ans = clean_xml_chars(ans) + u'\u2026' return ans def feed2index(self, f, feeds): diff --git a/src/calibre/web/feeds/templates.py b/src/calibre/web/feeds/templates.py index ebe9ead97a..9d7479520d 100644 --- a/src/calibre/web/feeds/templates.py +++ b/src/calibre/web/feeds/templates.py @@ -135,6 +135,7 @@ class FeedTemplate(Template): return navbar def _generate(self, f, feeds, cutoff, extra_css=None, style=None): + from calibre.utils.cleantext import clean_xml_chars feed = feeds[f] head = HEAD(TITLE(feed.title)) if style: @@ -173,7 +174,7 @@ class FeedTemplate(Template): style='padding-bottom:0.5em') ) if article.summary: - li.append(DIV(cutoff(article.text_summary), + li.append(DIV(clean_xml_chars(cutoff(article.text_summary)), CLASS('article_description', 'calibre_rescale_70'))) ul.append(li) div.append(ul)