mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #5904 (Splitting problem with text entries)
This commit is contained in:
commit
f3c723077a
@ -63,7 +63,8 @@ class TXTInput(InputFormatPlugin):
|
|||||||
raise ValueError('This txt file has malformed markup, it cannot be'
|
raise ValueError('This txt file has malformed markup, it cannot be'
|
||||||
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
||||||
else:
|
else:
|
||||||
html = convert_basic(txt)
|
flow_size = getattr(options, 'flow_size', 0)
|
||||||
|
html = convert_basic(txt, epub_split_size_kb=flow_size)
|
||||||
|
|
||||||
from calibre.customize.ui import plugin_for_input_format
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
html_input = plugin_for_input_format('html')
|
html_input = plugin_for_input_format('html')
|
||||||
|
@ -17,13 +17,10 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
|
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
|
||||||
|
|
||||||
def convert_basic(txt, title=''):
|
def convert_basic(txt, title='', epub_split_size_kb=0):
|
||||||
lines = []
|
|
||||||
# Strip whitespace from the beginning and end of the line. Also replace
|
# Strip whitespace from the beginning and end of the line. Also replace
|
||||||
# all line breaks with \n.
|
# all line breaks with \n.
|
||||||
for line in txt.splitlines():
|
txt = '\n'.join([line.strip() for line in txt.splitlines()])
|
||||||
lines.append(line.strip())
|
|
||||||
txt = '\n'.join(lines)
|
|
||||||
|
|
||||||
# Condense redundant spaces
|
# Condense redundant spaces
|
||||||
txt = re.sub('[ ]{2,}', ' ', txt)
|
txt = re.sub('[ ]{2,}', ' ', txt)
|
||||||
@ -34,6 +31,15 @@ def convert_basic(txt, title=''):
|
|||||||
# Remove excessive line breaks.
|
# Remove excessive line breaks.
|
||||||
txt = re.sub('\n{3,}', '\n\n', txt)
|
txt = re.sub('\n{3,}', '\n\n', txt)
|
||||||
|
|
||||||
|
#Takes care if there is no point to split
|
||||||
|
if epub_split_size_kb > 0:
|
||||||
|
length_byte = len(txt.encode('utf-8'))
|
||||||
|
#Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)
|
||||||
|
chunk_size = long(length_byte / (int(length_byte / (epub_split_size_kb * 1024) ) + 2 ))
|
||||||
|
#if there are chunks with a superior size then go and break
|
||||||
|
if (len(filter(lambda x: len(x.encode('utf-8')) > chunk_size, txt.split('\n\n')))) :
|
||||||
|
txt = u'\n\n'.join([split_string_separator(line, chunk_size) for line in txt.split('\n\n')])
|
||||||
|
|
||||||
lines = []
|
lines = []
|
||||||
# Split into paragraphs based on having a blank line between text.
|
# Split into paragraphs based on having a blank line between text.
|
||||||
for line in txt.split('\n\n'):
|
for line in txt.split('\n\n'):
|
||||||
@ -71,3 +77,10 @@ def opf_writer(path, opf_name, manifest, spine, mi):
|
|||||||
with open(os.path.join(path, opf_name), 'wb') as opffile:
|
with open(os.path.join(path, opf_name), 'wb') as opffile:
|
||||||
opf.render(opffile)
|
opf.render(opffile)
|
||||||
|
|
||||||
|
def split_string_separator(txt, size) :
|
||||||
|
if len(txt.encode('utf-8')) > size:
|
||||||
|
txt = u''.join([re.sub(u'\.(?P<ends>[^.]*)$', u'.\n\n\g<ends>',
|
||||||
|
txt[i:i+size], 1) for i in
|
||||||
|
xrange(0, len(txt.encode('utf-8')), size)])
|
||||||
|
return txt
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user