Unwrap line after more characters

Fix line unwrap for Polish. Add Cyrillic alphabet.
This commit is contained in:
Carmina16 2022-01-26 03:35:47 +07:00 committed by GitHub
parent a81e4bec5c
commit e60162503a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -365,7 +365,7 @@ class HeuristicProcessor:
# define the pieces of the regex
# (?<!\&\w{4});) is a semicolon not part of an entity
lookahead = "(?<=.{"+str(length)+r"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IAß]|(?<!\&\w{4});))"
lookahead = "(?<=.{"+str(length)+r"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýźâêîôûçąężłıãõñæøþðßěľščťžňďřůёђєіїјљњћўџѣа-я,:)\\IAß]|(?<!\&\w{4});))"
em_en_lookahead = "(?<=.{"+str(length)+"}[\u2013\u2014])"
soft_hyphen = "\xad"
line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?"