diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index a0570c07ae..cce7bea519 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -63,7 +63,8 @@ class TXTInput(InputFormatPlugin): raise ValueError('This txt file has malformed markup, it cannot be' ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax') else: - html = convert_basic(txt) + flow_size = getattr(options, 'flow_size', 0) + html = convert_basic(txt, epub_split_size_kb=flow_size) from calibre.customize.ui import plugin_for_input_format html_input = plugin_for_input_format('html') diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index baebf2f298..c53d630ed6 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -17,14 +17,11 @@ __docformat__ = 'restructuredtext en' HTML_TEMPLATE = u'%s\n%s\n' -def convert_basic(txt, title=''): - lines = [] +def convert_basic(txt, title='', epub_split_size_kb=0): # Strip whitespace from the beginning and end of the line. Also replace # all line breaks with \n. - for line in txt.splitlines(): - lines.append(line.strip()) - txt = '\n'.join(lines) - + txt = '\n'.join([line.strip() for line in txt.splitlines()]) + # Condense redundant spaces txt = re.sub('[ ]{2,}', ' ', txt) @@ -34,6 +31,15 @@ def convert_basic(txt, title=''): # Remove excessive line breaks. txt = re.sub('\n{3,}', '\n\n', txt) + #Takes care if there is no point to split + if epub_split_size_kb > 0: + length_byte = len(txt.encode('utf-8')) + #Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin) + chunk_size = long(length_byte / (int(length_byte / (epub_split_size_kb * 1024) ) + 2 )) + #if there are chunks with a superior size then go and break + if (len(filter(lambda x: len(x.encode('utf-8')) > chunk_size, txt.split('\n\n')))) : + txt = u'\n\n'.join([split_string_separator(line, chunk_size) for line in txt.split('\n\n')]) + lines = [] # Split into paragraphs based on having a blank line between text. for line in txt.split('\n\n'): @@ -71,3 +77,10 @@ def opf_writer(path, opf_name, manifest, spine, mi): with open(os.path.join(path, opf_name), 'wb') as opffile: opf.render(opffile) +def split_string_separator(txt, size) : + if len(txt.encode('utf-8')) > size: + txt = u''.join([re.sub(u'\.(?P[^.]*)$', u'.\n\n\g', + txt[i:i+size], 1) for i in + xrange(0, len(txt.encode('utf-8')), size)]) + return txt +