renderContents as unicode

This commit is contained in:
James Ralston 2010-03-07 22:03:08 -08:00
parent 9f01f0b126
commit 043223eac6

View File

@ -37,7 +37,7 @@ def _metadata_from_table(soup, searchfor):
td = td.parent td = td.parent
# there appears to be multiple ways of structuring the metadata # there appears to be multiple ways of structuring the metadata
# on the home page. cue some nasty special-case hacks... # on the home page. cue some nasty special-case hacks...
if re.match(r'^\s*'+searchfor+r'\s*$', td.renderContents(), flags=re.I): if re.match(r'^\s*'+searchfor+r'\s*$', td.renderContents(None), flags=re.I):
meta = _detag(td.findNextSibling('td')) meta = _detag(td.findNextSibling('td'))
return re.sub('^:', '', meta).strip() return re.sub('^:', '', meta).strip()
else: else:
@ -49,7 +49,7 @@ def _metadata_from_span(soup, searchfor):
if span is None: if span is None:
return None return None
# this metadata might need some cleaning up still :/ # this metadata might need some cleaning up still :/
return _detag(span.renderContents().strip()) return _detag(span.renderContents(None).strip())
def _get_authors(soup): def _get_authors(soup):
aut = (_metadata_from_span(soup, r'author') aut = (_metadata_from_span(soup, r'author')