When reading metadata from very large HTML files, do not search the entire file for the metadata, as this can be very slow

This commit is contained in:
Kovid Goyal 2012-02-14 20:14:35 +05:30
parent 20e46a53c6
commit 7895652c76

View File

@ -34,6 +34,7 @@ def get_metadata_(src, encoding=None):
# Title
title = None
pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
src = src[:150000] # Searching shouldn't take too long
match = pat.search(src)
if match:
title = match.group(2)