mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improve tokenization of CJK author names
This commit is contained in:
parent
348338f75c
commit
3b4f584a91
@ -343,8 +343,8 @@ class Source(Plugin):
|
||||
|
||||
if authors:
|
||||
# Leave ' in there for Irish names
|
||||
remove_pat = re.compile(r'[!@#$%^&*(){}`~"\s\[\]/]')
|
||||
replace_pat = re.compile(r'[-+.:;,]')
|
||||
remove_pat = re.compile(r'[!@#$%^&*()()「」{}`~"\s\[\]/]')
|
||||
replace_pat = re.compile(r'[-+.:;,,。;:]')
|
||||
if only_first_author:
|
||||
authors = authors[:1]
|
||||
for au in authors:
|
||||
@ -384,7 +384,7 @@ class Source(Plugin):
|
||||
# Remove hyphens only if they have whitespace before them
|
||||
(r'(\s-)', ' '),
|
||||
# Replace other special chars with a space
|
||||
(r'''[:,;!@$%^&*(){}.`~"\s\[\]/]《》''', ' '),
|
||||
(r'''[:,;!@$%^&*(){}.`~"\s\[\]/]《》「」“”''', ' '),
|
||||
]]
|
||||
|
||||
for pat, repl in title_patterns:
|
||||
|
Loading…
x
Reference in New Issue
Block a user