EPUB2 metadata: Read ISBNs in identifier elements without schemes if they are valid ISBNs and no properly identified isbns are present

This commit is contained in:
Kovid Goyal 2021-04-02 20:26:52 +05:30
parent 9562e85768
commit 1b03cd029a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 15 additions and 2 deletions

View File

@ -399,10 +399,13 @@ def check_isbn13(isbn):
return False
def check_isbn(isbn):
def check_isbn(isbn, simple_sanitize=False):
if not isbn:
return None
isbn = re.sub(r'[^0-9X]', '', isbn.upper())
if simple_sanitize:
isbn = isbn.upper().replace('-', '').strip().replace(' ', '')
else:
isbn = re.sub(r'[^0-9X]', '', isbn.upper())
il = len(isbn)
if il not in (10, 13):
return None

View File

@ -971,6 +971,7 @@ class OPF(object): # {{{
def get_identifiers(self):
identifiers = {}
schemeless = []
for x in self.XPath(
'descendant::*[local-name() = "identifier" and text()]')(
self.metadata):
@ -993,6 +994,15 @@ class OPF(object): # {{{
val = check_isbn(val.split(':')[-1])
if val is not None:
identifiers['isbn'] = val
else:
schemeless.append(val)
if schemeless and 'isbn' not in identifiers:
for val in schemeless:
if check_isbn(val, simple_sanitize=True) is not None:
identifiers['isbn'] = check_isbn(val)
break
return identifiers
def set_identifiers(self, identifiers):