diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 11317bc312..1660e890fc 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -337,6 +337,7 @@ from calibre.ebooks.pml.input import PMLInput from calibre.ebooks.rb.input import RBInput from calibre.web.feeds.input import RecipeInput from calibre.ebooks.rtf.input import RTFInput +from calibre.ebooks.tcr.input import TCRInput from calibre.ebooks.txt.input import TXTInput from calibre.ebooks.lrf.input import LRFInput @@ -385,6 +386,7 @@ plugins += [ RBInput, RecipeInput, RTFInput, + TCRInput, TXTInput, LRFInput, ] diff --git a/src/calibre/ebooks/tcr/__init__.py b/src/calibre/ebooks/tcr/__init__.py new file mode 100644 index 0000000000..9e2aad729c --- /dev/null +++ b/src/calibre/ebooks/tcr/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' diff --git a/src/calibre/ebooks/tcr/input.py b/src/calibre/ebooks/tcr/input.py new file mode 100644 index 0000000000..066d97a421 --- /dev/null +++ b/src/calibre/ebooks/tcr/input.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +import os + +from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation +from calibre.ebooks.txt.processor import convert_basic, opf_writer, \ + separate_paragraphs_single_line, separate_paragraphs_print_formatted + +class TCRInput(InputFormatPlugin): + + name = 'TCR Input' + author = 'John Schember' + description = 'Convert TCR files to HTML' + file_types = set(['tcr']) + + options = set([ + OptionRecommendation(name='single_line_paras', recommended_value=False, + help=_('Normally calibre treats blank lines as paragraph markers. ' + 'With this option it will assume that every line represents ' + 'a paragraph instead.')), + OptionRecommendation(name='print_formatted_paras', recommended_value=False, + help=_('Normally calibre treats blank lines as paragraph markers. ' + 'With this option it will assume that every line starting with ' + 'an indent (either a tab or 2+ spaces) represents a paragraph. ' + 'Paragraphs end when the next line that starts with an indent ' + 'is reached.')), + ]) + + def convert(self, stream, options, file_ext, log, accelerators): + txt = [] + + log.debug('Checking TCR header...') + if stream.read(9) != '!!8-Bit!!': + raise ValueError('File %s contaions an invalid TCR header.' % stream.name) + + log.debug('Building string dictionary...') + # Dictionary codes that the file contents are broken down into. + entries = [] + for i in xrange(256): + entry_len = ord(stream.read(1)) + entries.append(stream.read(entry_len)) + + log.info('Decompressing text...') + # Map the values in the file to locations in the string list. + entry_loc = stream.read(1) + while entry_loc != '': # EOF + txt.append(entries[ord(entry_loc)]) + entry_loc = stream.read(1) + + ienc = options.input_encoding if options.input_encoding else 'utf-8' + txt = ''.join(txt).decode(ienc, 'replace') + + log.info('Converting text to OEB...') + if options.single_line_paras: + txt = separate_paragraphs_single_line(txt) + if options.print_formatted_paras: + txt = separate_paragraphs_print_formatted(txt) + html = convert_basic(txt) + with open(os.path.join(os.getcwd(), 'index.html'), 'wb') as index: + index.write(html.encode('utf-8')) + + from calibre.ebooks.metadata.meta import get_metadata + mi = get_metadata(stream, 'tcr') + manifest = [('index.html', None)] + spine = ['index.html'] + opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi) + + return os.path.join(os.getcwd(), 'metadata.opf') diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index e5ecc7e12a..5d0cd842a1 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -20,7 +20,7 @@ What formats does |app| support conversion to/from? |app| supports the conversion of many input formats to many output formats. It can convert every input format in the following list, to every output format. -*Input Formats:* CBZ, CBR, CBC, EPUB, FB2, HTML, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, TXT +*Input Formats:* CBZ, CBR, CBC, EPUB, FB2, HTML, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, TCR, TXT *Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PML, RB, PDF, TXT ** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers