mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Txt output: remove more tags, ensure no spaces at beginning and end of lines
This commit is contained in:
parent
11013c2665
commit
94c5e717a1
@ -68,6 +68,9 @@ class TXTWriter(object):
|
|||||||
for tag in ['script', 'style']:
|
for tag in ['script', 'style']:
|
||||||
text = re.sub('(?imu)<[ ]*%s[ ]*.*?>(.*)</[ ]*%s[ ]*>' % (tag, tag), '', text)
|
text = re.sub('(?imu)<[ ]*%s[ ]*.*?>(.*)</[ ]*%s[ ]*>' % (tag, tag), '', text)
|
||||||
text = re.sub('<!--.*-->', '', text)
|
text = re.sub('<!--.*-->', '', text)
|
||||||
|
text = re.sub('<\?.*?\?>', '', text)
|
||||||
|
text = re.sub('<@.*?@>', '', text)
|
||||||
|
text = re.sub('<%.*?%>', '', text)
|
||||||
|
|
||||||
# Headings usually indicate Chapters.
|
# Headings usually indicate Chapters.
|
||||||
# We are going to use a marker to insert the proper number of
|
# We are going to use a marker to insert the proper number of
|
||||||
@ -107,7 +110,6 @@ class TXTWriter(object):
|
|||||||
text = text.replace(u'\xa0', ' ')
|
text = text.replace(u'\xa0', ' ')
|
||||||
|
|
||||||
# Replace tabs, vertical tags and form feeds with single space.
|
# Replace tabs, vertical tags and form feeds with single space.
|
||||||
#text = re.sub('\xc2\xa0', '', text)
|
|
||||||
text = text.replace('\t+', ' ')
|
text = text.replace('\t+', ' ')
|
||||||
text = text.replace('\v+', ' ')
|
text = text.replace('\v+', ' ')
|
||||||
text = text.replace('\f+', ' ')
|
text = text.replace('\f+', ' ')
|
||||||
@ -122,8 +124,6 @@ class TXTWriter(object):
|
|||||||
|
|
||||||
# Remove multiple spaces.
|
# Remove multiple spaces.
|
||||||
text = re.sub('[ ]+', ' ', text)
|
text = re.sub('[ ]+', ' ', text)
|
||||||
text = re.sub('(?imu)^[ ]+', '', text)
|
|
||||||
text = re.sub('(?imu)[ ]+$', '', text)
|
|
||||||
|
|
||||||
# Remove excessive newlines.
|
# Remove excessive newlines.
|
||||||
text = re.sub('\n[ ]+\n', '\n\n', text)
|
text = re.sub('\n[ ]+\n', '\n\n', text)
|
||||||
@ -133,6 +133,10 @@ class TXTWriter(object):
|
|||||||
text = text.replace('-vzxedxy-', '\n\n\n\n\n')
|
text = text.replace('-vzxedxy-', '\n\n\n\n\n')
|
||||||
text = text.replace('-vlgzxey-', '\n\n\n')
|
text = text.replace('-vlgzxey-', '\n\n\n')
|
||||||
|
|
||||||
|
# Replace spaces at the beginning and end of lines
|
||||||
|
text = re.sub('(?imu)^[ ]+', '', text)
|
||||||
|
text = re.sub('(?imu)[ ]+$', '', text)
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def unix_newlines(self, text):
|
def unix_newlines(self, text):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user