mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
PDF input: More special character fixes.
This commit is contained in:
parent
2bd8cb9059
commit
32192a5a72
@ -114,7 +114,7 @@ class HTMLPreProcessor(object):
|
|||||||
(re.compile(u'¨\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ä'),
|
(re.compile(u'¨\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ä'),
|
||||||
(re.compile(u'¨\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ä'),
|
(re.compile(u'¨\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ä'),
|
||||||
|
|
||||||
# Fix French accents
|
# Fix accents
|
||||||
(re.compile(u'`\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ò'),
|
(re.compile(u'`\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ò'),
|
||||||
(re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'),
|
(re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'),
|
||||||
(re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'),
|
(re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'),
|
||||||
@ -126,6 +126,32 @@ class HTMLPreProcessor(object):
|
|||||||
(re.compile(u'`\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'à'),
|
(re.compile(u'`\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'à'),
|
||||||
(re.compile(u'`\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'À'),
|
(re.compile(u'`\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'À'),
|
||||||
|
|
||||||
|
(re.compile(u'´\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ó'),
|
||||||
|
(re.compile(u'´\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ó'),
|
||||||
|
(re.compile(u'´\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ú'),
|
||||||
|
(re.compile(u'´\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ú'),
|
||||||
|
(re.compile(u'´\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'é'),
|
||||||
|
(re.compile(u'´\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'É'),
|
||||||
|
(re.compile(u'´\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'í'),
|
||||||
|
(re.compile(u'´\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Í'),
|
||||||
|
(re.compile(u'´\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'á'),
|
||||||
|
(re.compile(u'´\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Á'),
|
||||||
|
|
||||||
|
(re.compile(u'ˆ\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ô'),
|
||||||
|
(re.compile(u'ˆ\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ô'),
|
||||||
|
(re.compile(u'ˆ\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'û'),
|
||||||
|
(re.compile(u'ˆ\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Û'),
|
||||||
|
(re.compile(u'ˆ\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ê'),
|
||||||
|
(re.compile(u'ˆ\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'Ê'),
|
||||||
|
(re.compile(u'ˆ\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'î'),
|
||||||
|
(re.compile(u'ˆ\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Î'),
|
||||||
|
(re.compile(u'ˆ\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'â'),
|
||||||
|
(re.compile(u'ˆ\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Â'),
|
||||||
|
|
||||||
|
# Fix more special characters
|
||||||
|
(re.compile(u'¸\s*(<br.*?>)*\s*c', re.UNICODE), lambda match: u'ç'),
|
||||||
|
(re.compile(u'¸\s*(<br.*?>)*\s*C', re.UNICODE), lambda match: u'Ç'),
|
||||||
|
|
||||||
# Remove page links
|
# Remove page links
|
||||||
(re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
|
(re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
|
||||||
# Remove <hr> tags
|
# Remove <hr> tags
|
||||||
|
Loading…
x
Reference in New Issue
Block a user