Unwrap line after more characters

Fix line unwrap for Polish. Add Cyrillic alphabet.
This commit is contained in:
Carmina16
2022-01-26 03:35:47 +07:00
committed by GitHub
parent a81e4bec5c
commit e60162503a
+1 -1
View File
@@ -365,7 +365,7 @@ class HeuristicProcessor:
# define the pieces of the regex
# (?<!\&\w{4});) is a semicolon not part of an entity
lookahead = "(?<=.{"+str(length)+r"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IAß]|(?<!\&\w{4});))"
lookahead = "(?<=.{"+str(length)+r"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýźâêîôûçąężłıãõñæøþðßěľščťžňďřůёђєіїјљњћўџѣа-я,:)\\IAß]|(?<!\&\w{4});))"
em_en_lookahead = "(?<=.{"+str(length)+"}[\u2013\u2014])"
soft_hyphen = "\xad"
line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?"