mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #5237 (Metadata "Comments" don't handle HTML markup correctly)
This commit is contained in:
parent
65b0138e27
commit
66162850ca
@ -320,7 +320,6 @@ class HTMLInput(InputFormatPlugin):
|
||||
if not metadata.title:
|
||||
oeb.logger.warn('Title not specified')
|
||||
metadata.add('title', self.oeb.translate(__('Unknown')))
|
||||
|
||||
bookid = str(uuid.uuid4())
|
||||
metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
|
||||
for ident in metadata.identifier:
|
||||
@ -328,7 +327,6 @@ class HTMLInput(InputFormatPlugin):
|
||||
self.oeb.uid = metadata.identifier[0]
|
||||
break
|
||||
|
||||
|
||||
filelist = get_filelist(htmlpath, basedir, opts, log)
|
||||
filelist = [f for f in filelist if not f.is_binary]
|
||||
htmlfile_map = {}
|
||||
|
@ -14,7 +14,7 @@ from lxml import etree
|
||||
|
||||
from calibre.ebooks.oeb.base import XPath, XPNSMAP
|
||||
from calibre import guess_type
|
||||
|
||||
from calibre.library.comments import comments_to_html
|
||||
class Jacket(object):
|
||||
'''
|
||||
Book jacket manipulation. Remove first image and insert comments at start of
|
||||
@ -25,6 +25,7 @@ class Jacket(object):
|
||||
<html xmlns="%(xmlns)s">
|
||||
<head>
|
||||
<title>%(title)s</title>
|
||||
<meta name="calibre-content" content="jacket"/>
|
||||
</head>
|
||||
<body>
|
||||
<div class="calibre_rescale_100">
|
||||
@ -83,7 +84,9 @@ class Jacket(object):
|
||||
comments = ''
|
||||
if not comments.strip():
|
||||
comments = ''
|
||||
comments = comments.replace('\r\n', '\n').replace('\n\n', '<br/><br/>')
|
||||
orig_comments = comments
|
||||
if comments:
|
||||
comments = comments_to_html(comments)
|
||||
series = '<b>Series: </b>' + escape(mi.series if mi.series else '')
|
||||
if mi.series and mi.series_index is not None:
|
||||
series += escape(' [%s]'%mi.format_series_index())
|
||||
@ -103,12 +106,19 @@ class Jacket(object):
|
||||
title = mi.title if mi.title else unicode(self.oeb.metadata.title[0])
|
||||
except:
|
||||
title = _('Unknown')
|
||||
html = self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'],
|
||||
title=escape(title), comments=escape(comments),
|
||||
|
||||
def generate_html(comments):
|
||||
return self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'],
|
||||
title=escape(title), comments=comments,
|
||||
jacket=escape(_('Book Jacket')), series=series,
|
||||
tags=tags, rating=self.get_rating(mi.rating))
|
||||
id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml')
|
||||
root = etree.fromstring(html)
|
||||
from calibre.ebooks.oeb.base import RECOVER_PARSER
|
||||
try:
|
||||
root = etree.fromstring(generate_html(comments), parser=RECOVER_PARSER)
|
||||
except:
|
||||
root = etree.fromstring(generate_html(escape(orig_comments)),
|
||||
parser=RECOVER_PARSER)
|
||||
item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root)
|
||||
self.oeb.spine.insert(0, item, True)
|
||||
|
||||
|
@ -71,7 +71,7 @@ class MetadataWidget(Widget, Ui_Form):
|
||||
self.author_sort.setText(mi.author_sort if mi.author_sort else '')
|
||||
self.tags.setText(', '.join(mi.tags if mi.tags else []))
|
||||
self.tags.update_tags_cache(self.db.all_tags())
|
||||
self.comment.setText(mi.comments if mi.comments else '')
|
||||
self.comment.setPlainText(mi.comments if mi.comments else '')
|
||||
if mi.series:
|
||||
self.series.setCurrentIndex(self.series.findText(mi.series))
|
||||
if mi.series_index is not None:
|
||||
|
@ -11,6 +11,7 @@ from PyQt4.QtGui import QDialog, QPixmap, QGraphicsScene, QIcon, QDesktopService
|
||||
from calibre.gui2.dialogs.book_info_ui import Ui_BookInfo
|
||||
from calibre.gui2 import dynamic
|
||||
from calibre import fit_image
|
||||
from calibre.library.comments import comments_to_html
|
||||
|
||||
class BookInfo(QDialog, Ui_BookInfo):
|
||||
|
||||
@ -96,6 +97,8 @@ class BookInfo(QDialog, Ui_BookInfo):
|
||||
self.setWindowTitle(info[_('Title')])
|
||||
self.title.setText('<b>'+info.pop(_('Title')))
|
||||
comments = info.pop(_('Comments'), '')
|
||||
if comments:
|
||||
comments = comments_to_html(comments)
|
||||
if re.search(r'<[a-zA-Z]+>', comments) is None:
|
||||
lines = comments.splitlines()
|
||||
lines = [x if x.strip() else '<br><br>' for x in lines]
|
||||
|
@ -231,7 +231,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
||||
if mi.series_index is not None:
|
||||
self.series_index.setValue(float(mi.series_index))
|
||||
if mi.comments and mi.comments.strip():
|
||||
self.comments.setText(mi.comments)
|
||||
self.comments.setPlainText(mi.comments)
|
||||
|
||||
|
||||
def set_cover(self):
|
||||
@ -590,7 +590,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
||||
prefix = unicode(self.comments.toPlainText())
|
||||
if prefix:
|
||||
prefix += '\n'
|
||||
self.comments.setText(prefix + summ)
|
||||
self.comments.setPlainText(prefix + summ)
|
||||
if book.rating is not None:
|
||||
self.rating.setValue(int(book.rating))
|
||||
if book.tags:
|
||||
@ -654,7 +654,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
||||
self.db.set_series(self.id,
|
||||
unicode(self.series.currentText()).strip(), notify=False)
|
||||
self.db.set_series_index(self.id, self.series_index.value(), notify=False)
|
||||
self.db.set_comment(self.id, qstring_to_unicode(self.comments.toPlainText()), notify=False)
|
||||
self.db.set_comment(self.id, unicode(self.comments.toPlainText()), notify=False)
|
||||
d = self.pubdate.date()
|
||||
d = qt_to_dt(d)
|
||||
self.db.set_pubdate(self.id, d, notify=False)
|
||||
|
@ -11,6 +11,7 @@ from calibre.gui2.widgets import IMAGE_EXTENSIONS
|
||||
from calibre.gui2.progress_indicator import ProgressIndicator
|
||||
from calibre.gui2.notify import get_notifier
|
||||
from calibre.ebooks import BOOK_EXTENSIONS
|
||||
from calibre.library.comments import comments_to_html
|
||||
|
||||
class BookInfoDisplay(QWidget):
|
||||
|
||||
@ -133,6 +134,8 @@ class BookInfoDisplay(QWidget):
|
||||
key = key.decode(preferred_encoding, 'replace')
|
||||
if isinstance(txt, str):
|
||||
txt = txt.decode(preferred_encoding, 'replace')
|
||||
if key == _('Comments'):
|
||||
txt = comments_to_html(txt)
|
||||
rows += u'<tr><td><b>%s:</b></td><td>%s</td></tr>'%(key, txt)
|
||||
self.book_data.setText(u'<table>'+rows+u'</table>')
|
||||
|
||||
|
114
src/calibre/library/comments.py
Normal file
114
src/calibre/library/comments.py
Normal file
@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
|
||||
from calibre import prepare_string_for_xml
|
||||
|
||||
def comments_to_html(comments):
|
||||
'''
|
||||
Convert random comment text to normalized, xml-legal block of <p>s
|
||||
'plain text' returns as
|
||||
<p>plain text</p>
|
||||
|
||||
'plain text with <i>minimal</i> <b>markup</b>' returns as
|
||||
<p>plain text with <i>minimal</i> <b>markup</b></p>
|
||||
|
||||
'<p>pre-formatted text</p> returns untouched
|
||||
|
||||
'A line of text\n\nFollowed by a line of text' returns as
|
||||
<p>A line of text</p>
|
||||
<p>Followed by a line of text</p>
|
||||
|
||||
'A line of text.\nA second line of text.\rA third line of text' returns as
|
||||
<p>A line of text.<br />A second line of text.<br />A third line of text.</p>
|
||||
|
||||
'...end of a paragraph.Somehow the break was lost...' returns as
|
||||
<p>...end of a paragraph.</p>
|
||||
<p>Somehow the break was lost...</p>
|
||||
|
||||
Deprecated HTML returns as HTML via BeautifulSoup()
|
||||
|
||||
'''
|
||||
if not isinstance(comments, unicode):
|
||||
comments = comments.decode(preferred_encoding, 'replace')
|
||||
|
||||
# Hackish - ignoring sentences ending or beginning in numbers to avoid
|
||||
# confusion with decimal points.
|
||||
|
||||
# Explode lost CRs to \n\n
|
||||
for lost_cr in re.finditer('([a-z])([\.\?!])([A-Z])', comments):
|
||||
comments = comments.replace(lost_cr.group(),
|
||||
'%s%s\n\n%s' % (lost_cr.group(1),
|
||||
lost_cr.group(2),
|
||||
lost_cr.group(3)))
|
||||
|
||||
# Convert \n\n to <p>s
|
||||
if re.search('\n\n', comments):
|
||||
soup = BeautifulSoup()
|
||||
split_ps = comments.split(u'\n\n')
|
||||
tsc = 0
|
||||
for p in split_ps:
|
||||
pTag = Tag(soup,'p')
|
||||
pTag.insert(0,p)
|
||||
soup.insert(tsc,pTag)
|
||||
tsc += 1
|
||||
comments = soup.renderContents(None)
|
||||
|
||||
# Convert solo returns to <br />
|
||||
comments = re.sub('[\r\n]','<br />', comments)
|
||||
|
||||
# Convert two hyphens to emdash
|
||||
comments = re.sub('--', '—', comments)
|
||||
soup = BeautifulSoup(comments)
|
||||
result = BeautifulSoup()
|
||||
rtc = 0
|
||||
open_pTag = False
|
||||
|
||||
all_tokens = list(soup.contents)
|
||||
for token in all_tokens:
|
||||
if type(token) is NavigableString:
|
||||
if not open_pTag:
|
||||
pTag = Tag(result,'p')
|
||||
open_pTag = True
|
||||
ptc = 0
|
||||
pTag.insert(ptc,prepare_string_for_xml(token))
|
||||
ptc += 1
|
||||
|
||||
elif token.name in ['br','b','i','em']:
|
||||
if not open_pTag:
|
||||
pTag = Tag(result,'p')
|
||||
open_pTag = True
|
||||
ptc = 0
|
||||
pTag.insert(ptc, token)
|
||||
ptc += 1
|
||||
|
||||
else:
|
||||
if open_pTag:
|
||||
result.insert(rtc, pTag)
|
||||
rtc += 1
|
||||
open_pTag = False
|
||||
ptc = 0
|
||||
# Clean up NavigableStrings for xml
|
||||
sub_tokens = list(token.contents)
|
||||
for sub_token in sub_tokens:
|
||||
if type(sub_token) is NavigableString:
|
||||
sub_token.replaceWith(prepare_string_for_xml(sub_token))
|
||||
result.insert(rtc, token)
|
||||
rtc += 1
|
||||
|
||||
if open_pTag:
|
||||
result.insert(rtc, pTag)
|
||||
|
||||
paras = result.findAll('p')
|
||||
for p in paras:
|
||||
p['class'] = 'description'
|
||||
|
||||
return result.renderContents(encoding=None)
|
||||
|
Loading…
x
Reference in New Issue
Block a user