Fix various unicode issues

This commit is contained in:
Kovid Goyal 2010-03-08 09:44:04 -07:00
commit 7a0359708c
2 changed files with 4 additions and 6 deletions

View File

@ -34,7 +34,7 @@ def _metadata_from_table(soup, searchfor):
td = td.parent
# there appears to be multiple ways of structuring the metadata
# on the home page. cue some nasty special-case hacks...
if re.match(r'^\s*'+searchfor+r'\s*$', td.renderContents(), flags=re.I):
if re.match(r'^\s*'+searchfor+r'\s*$', td.renderContents(None), flags=re.I):
meta = _detag(td.findNextSibling('td'))
return re.sub('^:', '', meta).strip()
else:
@ -46,7 +46,7 @@ def _metadata_from_span(soup, searchfor):
if span is None:
return None
# this metadata might need some cleaning up still :/
return _detag(span.renderContents().strip())
return _detag(span.renderContents(None).strip())
def _get_authors(soup):
aut = (_metadata_from_span(soup, r'author')

View File

@ -3869,9 +3869,7 @@ class EPUB_MOBI(CatalogPlugin):
elem.extract()
# Reconstruct comments w/o <div>s
comments = soup.renderContents()
if not isinstance(comments, unicode):
comments = comments.decode('utf-8', 'replace')
comments = soup.renderContents(None)
# Convert \n\n to <p>s
if re.search('\n\n', comments):
@ -3883,7 +3881,7 @@ class EPUB_MOBI(CatalogPlugin):
pTag.insert(0,p)
soup.insert(tsc,pTag)
tsc += 1
comments = soup.renderContents()
comments = soup.renderContents(None)
# Convert solo returns to <br />
comments = re.sub('[\r\n]','<br />', comments)