diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index 1af0662fbd..6b4fc70d11 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -3,6 +3,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' '''Read meta information from PDF files''' +import re from functools import partial from calibre import prints @@ -11,10 +12,16 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors, authors_ pdfreflow, pdfreflow_error = plugins['pdfreflow'] +_isbn_pat = re.compile(r'ISBN[: ]*([-0-9Xx]+)') + def get_metadata(stream, cover=True): if pdfreflow is None: raise RuntimeError(pdfreflow_error) - info = pdfreflow.get_metadata(stream.read(), cover) + raw = stream.read() + isbn = _isbn_pat.search(raw) + if isbn is not None: + isbn = isbn.group(1).replace('-', '').replace(' ', '') + info = pdfreflow.get_metadata(raw, cover) title = info.get('Title', None) au = info.get('Author', None) if au is None: @@ -22,6 +29,8 @@ def get_metadata(stream, cover=True): else: au = string_to_authors(au) mi = MetaInformation(title, au) + if isbn is not None: + mi.isbn = isbn creator = info.get('Creator', None) if creator: