mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improve tokenization of CJK author names
This commit is contained in:
parent
348338f75c
commit
3b4f584a91
@ -343,8 +343,8 @@ class Source(Plugin):
|
|||||||
|
|
||||||
if authors:
|
if authors:
|
||||||
# Leave ' in there for Irish names
|
# Leave ' in there for Irish names
|
||||||
remove_pat = re.compile(r'[!@#$%^&*(){}`~"\s\[\]/]')
|
remove_pat = re.compile(r'[!@#$%^&*()()「」{}`~"\s\[\]/]')
|
||||||
replace_pat = re.compile(r'[-+.:;,]')
|
replace_pat = re.compile(r'[-+.:;,,。;:]')
|
||||||
if only_first_author:
|
if only_first_author:
|
||||||
authors = authors[:1]
|
authors = authors[:1]
|
||||||
for au in authors:
|
for au in authors:
|
||||||
@ -384,7 +384,7 @@ class Source(Plugin):
|
|||||||
# Remove hyphens only if they have whitespace before them
|
# Remove hyphens only if they have whitespace before them
|
||||||
(r'(\s-)', ' '),
|
(r'(\s-)', ' '),
|
||||||
# Replace other special chars with a space
|
# Replace other special chars with a space
|
||||||
(r'''[:,;!@$%^&*(){}.`~"\s\[\]/]《》''', ' '),
|
(r'''[:,;!@$%^&*(){}.`~"\s\[\]/]《》「」“”''', ' '),
|
||||||
]]
|
]]
|
||||||
|
|
||||||
for pat, repl in title_patterns:
|
for pat, repl in title_patterns:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user