mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improved get_title_tokens
This commit is contained in:
parent
8ee74bd2fb
commit
45adf46944
@ -294,8 +294,16 @@ class Source(Plugin):
|
|||||||
Excludes connectives and punctuation.
|
Excludes connectives and punctuation.
|
||||||
'''
|
'''
|
||||||
if title:
|
if title:
|
||||||
pat = re.compile(r'''[-,:;+!@#$%^&*(){}.`~"'\s\[\]/]''')
|
title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
|
||||||
title = pat.sub(' ', title)
|
[
|
||||||
|
(r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|mass\s*market|edition|ed\.)[\])}]', ''),
|
||||||
|
(r'(\d+),(\d+)', r'\1\2'),
|
||||||
|
(r'(\s-)', ' '),
|
||||||
|
(r"'", ''),
|
||||||
|
(r'''[:,;+!@#$%^&*(){}.`~"\s\[\]/]''', ' ')
|
||||||
|
]]
|
||||||
|
for pat, repl in title_patterns:
|
||||||
|
title = pat.sub(repl, title)
|
||||||
tokens = title.split()
|
tokens = title.split()
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
token = token.strip()
|
token = token.strip()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user