Fix #821309 (html import: title/aurthor from old format document.)

This commit is contained in:
Kovid Goyal 2011-08-05 08:24:06 -06:00
parent f4b192c5e2
commit bc75bc89f6

View File

@ -38,17 +38,17 @@ def get_metadata_(src, encoding=None):
if match: if match:
title = match.group(2) title = match.group(2)
else: else:
pat = re.compile('<title>([^<>]+?)</title>', re.IGNORECASE) for x in ('DC.title','DCTERMS.title','Title'):
match = pat.search(src)
if match:
title = match.group(1)
if not title:
for x in ('Title','DC.title','DCTERMS.title'):
pat = get_meta_regexp_(x) pat = get_meta_regexp_(x)
match = pat.search(src) match = pat.search(src)
if match: if match:
title = match.group(1) title = match.group(1)
break break
if not title:
pat = re.compile('<title>([^<>]+?)</title>', re.IGNORECASE)
match = pat.search(src)
if match:
title = match.group(1)
# Author # Author
author = None author = None
@ -57,7 +57,7 @@ def get_metadata_(src, encoding=None):
if match: if match:
author = match.group(2).replace(',', ';') author = match.group(2).replace(',', ';')
else: else:
for x in ('Author','DC.creator.aut','DCTERMS.creator.aut'): for x in ('Author','DC.creator.aut','DCTERMS.creator.aut', 'DC.creator'):
pat = get_meta_regexp_(x) pat = get_meta_regexp_(x)
match = pat.search(src) match = pat.search(src)
if match: if match: