mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Txt output: remove more tags, ensure no spaces at beginning and end of lines
This commit is contained in:
parent
11013c2665
commit
94c5e717a1
@ -68,6 +68,9 @@ class TXTWriter(object):
|
||||
for tag in ['script', 'style']:
|
||||
text = re.sub('(?imu)<[ ]*%s[ ]*.*?>(.*)</[ ]*%s[ ]*>' % (tag, tag), '', text)
|
||||
text = re.sub('<!--.*-->', '', text)
|
||||
text = re.sub('<\?.*?\?>', '', text)
|
||||
text = re.sub('<@.*?@>', '', text)
|
||||
text = re.sub('<%.*?%>', '', text)
|
||||
|
||||
# Headings usually indicate Chapters.
|
||||
# We are going to use a marker to insert the proper number of
|
||||
@ -107,7 +110,6 @@ class TXTWriter(object):
|
||||
text = text.replace(u'\xa0', ' ')
|
||||
|
||||
# Replace tabs, vertical tags and form feeds with single space.
|
||||
#text = re.sub('\xc2\xa0', '', text)
|
||||
text = text.replace('\t+', ' ')
|
||||
text = text.replace('\v+', ' ')
|
||||
text = text.replace('\f+', ' ')
|
||||
@ -122,8 +124,6 @@ class TXTWriter(object):
|
||||
|
||||
# Remove multiple spaces.
|
||||
text = re.sub('[ ]+', ' ', text)
|
||||
text = re.sub('(?imu)^[ ]+', '', text)
|
||||
text = re.sub('(?imu)[ ]+$', '', text)
|
||||
|
||||
# Remove excessive newlines.
|
||||
text = re.sub('\n[ ]+\n', '\n\n', text)
|
||||
@ -133,6 +133,10 @@ class TXTWriter(object):
|
||||
text = text.replace('-vzxedxy-', '\n\n\n\n\n')
|
||||
text = text.replace('-vlgzxey-', '\n\n\n')
|
||||
|
||||
# Replace spaces at the beginning and end of lines
|
||||
text = re.sub('(?imu)^[ ]+', '', text)
|
||||
text = re.sub('(?imu)[ ]+$', '', text)
|
||||
|
||||
return text
|
||||
|
||||
def unix_newlines(self, text):
|
||||
|
Loading…
x
Reference in New Issue
Block a user