Fix various unicode issues

This commit is contained in:
Kovid Goyal 2010-03-08 09:44:04 -07:00
commit 7a0359708c
2 changed files with 4 additions and 6 deletions

View File

@ -34,7 +34,7 @@ def _metadata_from_table(soup, searchfor):
td = td.parent td = td.parent
# there appears to be multiple ways of structuring the metadata # there appears to be multiple ways of structuring the metadata
# on the home page. cue some nasty special-case hacks... # on the home page. cue some nasty special-case hacks...
if re.match(r'^\s*'+searchfor+r'\s*$', td.renderContents(), flags=re.I): if re.match(r'^\s*'+searchfor+r'\s*$', td.renderContents(None), flags=re.I):
meta = _detag(td.findNextSibling('td')) meta = _detag(td.findNextSibling('td'))
return re.sub('^:', '', meta).strip() return re.sub('^:', '', meta).strip()
else: else:
@ -46,7 +46,7 @@ def _metadata_from_span(soup, searchfor):
if span is None: if span is None:
return None return None
# this metadata might need some cleaning up still :/ # this metadata might need some cleaning up still :/
return _detag(span.renderContents().strip()) return _detag(span.renderContents(None).strip())
def _get_authors(soup): def _get_authors(soup):
aut = (_metadata_from_span(soup, r'author') aut = (_metadata_from_span(soup, r'author')

View File

@ -3869,9 +3869,7 @@ class EPUB_MOBI(CatalogPlugin):
elem.extract() elem.extract()
# Reconstruct comments w/o <div>s # Reconstruct comments w/o <div>s
comments = soup.renderContents() comments = soup.renderContents(None)
if not isinstance(comments, unicode):
comments = comments.decode('utf-8', 'replace')
# Convert \n\n to <p>s # Convert \n\n to <p>s
if re.search('\n\n', comments): if re.search('\n\n', comments):
@ -3883,7 +3881,7 @@ class EPUB_MOBI(CatalogPlugin):
pTag.insert(0,p) pTag.insert(0,p)
soup.insert(tsc,pTag) soup.insert(tsc,pTag)
tsc += 1 tsc += 1
comments = soup.renderContents() comments = soup.renderContents(None)
# Convert solo returns to <br /> # Convert solo returns to <br />
comments = re.sub('[\r\n]','<br />', comments) comments = re.sub('[\r\n]','<br />', comments)