From 1b03cd029a10b75f85af6688bf1f76f8f4394382 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 2 Apr 2021 20:26:52 +0530 Subject: [PATCH] EPUB2 metadata: Read ISBNs in identifier elements without schemes if they are valid ISBNs and no properly identified isbns are present --- src/calibre/ebooks/metadata/__init__.py | 7 +++++-- src/calibre/ebooks/metadata/opf2.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index f7c68f0975..a5ed3489bf 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -399,10 +399,13 @@ def check_isbn13(isbn): return False -def check_isbn(isbn): +def check_isbn(isbn, simple_sanitize=False): if not isbn: return None - isbn = re.sub(r'[^0-9X]', '', isbn.upper()) + if simple_sanitize: + isbn = isbn.upper().replace('-', '').strip().replace(' ', '') + else: + isbn = re.sub(r'[^0-9X]', '', isbn.upper()) il = len(isbn) if il not in (10, 13): return None diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 00c49fe318..43f5f17a5f 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -971,6 +971,7 @@ class OPF(object): # {{{ def get_identifiers(self): identifiers = {} + schemeless = [] for x in self.XPath( 'descendant::*[local-name() = "identifier" and text()]')( self.metadata): @@ -993,6 +994,15 @@ class OPF(object): # {{{ val = check_isbn(val.split(':')[-1]) if val is not None: identifiers['isbn'] = val + else: + schemeless.append(val) + + if schemeless and 'isbn' not in identifiers: + for val in schemeless: + if check_isbn(val, simple_sanitize=True) is not None: + identifiers['isbn'] = check_isbn(val) + break + return identifiers def set_identifiers(self, identifiers):