This commit is contained in:
Kovid Goyal 2014-05-23 08:45:02 +05:30
parent be37ccfb56
commit 3105d2bd32

View File

@ -36,7 +36,10 @@ def norm_title(title):
return normalize_entities(normalize_spaces(title)) return normalize_entities(normalize_spaces(title))
def get_title(doc): def get_title(doc):
title = doc.find('.//title').text try:
title = doc.find('.//title').text
except AttributeError:
title = None
if not title: if not title:
return '[no-title]' return '[no-title]'
@ -101,7 +104,7 @@ def shorten_title(doc):
return title return title
def get_body(doc): def get_body(doc):
[ elem.drop_tree() for elem in doc.xpath('.//script | .//link | .//style') ] [elem.drop_tree() for elem in doc.xpath('.//script | .//link | .//style')]
raw_html = unicode(tostring(doc.body or doc)) raw_html = unicode(tostring(doc.body or doc))
return clean_attributes(raw_html) return clean_attributes(raw_html)