diff --git a/src/calibre/ebooks/epub/cfi/parse.py b/src/calibre/ebooks/epub/cfi/parse.py index ea20b226f5..1b6efb6ff0 100644 --- a/src/calibre/ebooks/epub/cfi/parse.py +++ b/src/calibre/ebooks/epub/cfi/parse.py @@ -17,9 +17,11 @@ class Parser: def __init__(self): # All allowed unicode characters + escaped special characters - special_char = r'[\[\](),;=^-]' - unescaped_char = '[[\t\n\r -\ud7ff\ue000-\ufffd\U00010000-\U0010ffff]--%s]' % special_char - escaped_char = r'\^' + special_char + special_char = r'[\[\](),;=^]' + unescaped_char = f'[[\t\n\r -\ud7ff\ue000-\ufffd\U00010000-\U0010ffff]--{special_char}]' + # calibre used to escape hyphens as well, so recognize them even though + # not strictly spec compliant + escaped_char = r'\^' + special_char[:-1] + '-]' chars = r'(?:%s|(?:%s))+' % (unescaped_char, escaped_char) chars_no_space = chars.replace('0020', '0021') # No leading zeros allowed for integers diff --git a/src/calibre/ebooks/epub/cfi/tests.py b/src/calibre/ebooks/epub/cfi/tests.py index 81ad7ce47e..a2d0b32c24 100644 --- a/src/calibre/ebooks/epub/cfi/tests.py +++ b/src/calibre/ebooks/epub/cfi/tests.py @@ -86,7 +86,9 @@ class Tests(unittest.TestCase): # Test parsing of text assertions ('/1:3[aa^,b]', a('aa,b'), ''), + ('/1:3[aa-b]', a('aa-b'), ''), ('/1:3[aa^-b]', a('aa-b'), ''), + ('/1:3[aa-^--b]', a('aa---b'), ''), ('/1:3[aa^,b,c1]', a('aa,b', 'c1'), ''), ('/1:3[,aa^,b]', a(after='aa,b'), ''), ('/1:3[;s=a]', a(s='a'), ''), diff --git a/src/pyj/read_book/cfi.pyj b/src/pyj/read_book/cfi.pyj index a409a0658f..81e11a4672 100644 --- a/src/pyj/read_book/cfi.pyj +++ b/src/pyj/read_book/cfi.pyj @@ -24,7 +24,7 @@ from __python__ import hash_literals from read_book.viewport import scroll_viewport, rem_size # CFI escaping {{{ -escape_pat = /[\[\],^();~@!-]/g +escape_pat = /[\[\],^();~@!]/g unescape_pat = /[\^](.)/g def escape_for_cfi(raw):