mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Fix trailing spaces after unicode chars
This commit is contained in:
parent
79b448726a
commit
4953fa24ea
@ -28,7 +28,7 @@ class Tokenize:
|
|||||||
self.__bug_handler = bug_handler
|
self.__bug_handler = bug_handler
|
||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
self.__write_to = tempfile.mktemp()
|
self.__write_to = tempfile.mktemp()
|
||||||
# self.__out_file = out_file
|
# self.__write_to = out_file
|
||||||
self.__compile_expressions()
|
self.__compile_expressions()
|
||||||
#variables
|
#variables
|
||||||
self.__uc_char = 0
|
self.__uc_char = 0
|
||||||
@ -41,14 +41,11 @@ class Tokenize:
|
|||||||
|
|
||||||
def __remove_uc_chars(self, startchar, token):
|
def __remove_uc_chars(self, startchar, token):
|
||||||
for i in xrange(startchar, len(token)):
|
for i in xrange(startchar, len(token)):
|
||||||
#handle the case of an uc char with a terminating blank before ansi char
|
if self.__uc_char:
|
||||||
if token[i] == " " and self.__uc_char:
|
|
||||||
continue
|
|
||||||
elif self.__uc_char:
|
|
||||||
self.__uc_char -= 1
|
self.__uc_char -= 1
|
||||||
else:
|
else:
|
||||||
return token[i:]
|
return token[i:]
|
||||||
#if only " " and char to skip
|
#if only char to skip
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def __unicode_process(self, token):
|
def __unicode_process(self, token):
|
||||||
@ -90,7 +87,7 @@ class Tokenize:
|
|||||||
self.__reini_utf8_counters()
|
self.__reini_utf8_counters()
|
||||||
#get value and handle negative case
|
#get value and handle negative case
|
||||||
uni_char = int(match_obj.group(1))
|
uni_char = int(match_obj.group(1))
|
||||||
uni_len = len(match_obj.group(1)) + 2
|
uni_len = len(match_obj.group(0))
|
||||||
if uni_char < 0:
|
if uni_char < 0:
|
||||||
uni_char += 65536
|
uni_char += 65536
|
||||||
uni_char = unichr(uni_char).encode('ascii', 'xmlcharrefreplace')
|
uni_char = unichr(uni_char).encode('ascii', 'xmlcharrefreplace')
|
||||||
@ -199,7 +196,7 @@ class Tokenize:
|
|||||||
|
|
||||||
# import sys
|
# import sys
|
||||||
# def main(args=sys.argv):
|
# def main(args=sys.argv):
|
||||||
# if len(args) < 1:
|
# if len(args) < 2:
|
||||||
# print 'No file'
|
# print 'No file'
|
||||||
# return
|
# return
|
||||||
# file = 'data_tokens.txt'
|
# file = 'data_tokens.txt'
|
||||||
@ -211,3 +208,5 @@ class Tokenize:
|
|||||||
|
|
||||||
# if __name__ == '__main__':
|
# if __name__ == '__main__':
|
||||||
# sys.exit(main())
|
# sys.exit(main())
|
||||||
|
|
||||||
|
# calibre-debug -e src/calibre/ebooks/rtf2xml/tokenize.py
|
Loading…
x
Reference in New Issue
Block a user