mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improved get_title_tokens
This commit is contained in:
parent
8ee74bd2fb
commit
45adf46944
@ -294,8 +294,16 @@ class Source(Plugin):
|
||||
Excludes connectives and punctuation.
|
||||
'''
|
||||
if title:
|
||||
pat = re.compile(r'''[-,:;+!@#$%^&*(){}.`~"'\s\[\]/]''')
|
||||
title = pat.sub(' ', title)
|
||||
title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
|
||||
[
|
||||
(r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|mass\s*market|edition|ed\.)[\])}]', ''),
|
||||
(r'(\d+),(\d+)', r'\1\2'),
|
||||
(r'(\s-)', ' '),
|
||||
(r"'", ''),
|
||||
(r'''[:,;+!@#$%^&*(){}.`~"\s\[\]/]''', ' ')
|
||||
]]
|
||||
for pat, repl in title_patterns:
|
||||
title = pat.sub(repl, title)
|
||||
tokens = title.split()
|
||||
for token in tokens:
|
||||
token = token.strip()
|
||||
|
Loading…
x
Reference in New Issue
Block a user