Pull from driver-dev

This commit is contained in:
Kovid Goyal 2009-06-21 12:47:31 -07:00
commit ad78fd2a68
23 changed files with 97 additions and 57 deletions

View File

@ -57,7 +57,6 @@ def line_length(raw, percent):
return 0 return 0
total = sum(lengths) total = sum(lengths)
print total
avg = total / len(lengths) avg = total / len(lengths)
max_line = avg * 2 max_line = avg * 2

View File

@ -9,12 +9,10 @@ Transform OEB content into FB2 markup
''' '''
import os import os
import re
from base64 import b64encode from base64 import b64encode
from lxml import etree from lxml import etree
from calibre import entity_to_unicode
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import OEB_IMAGES from calibre.ebooks.oeb.base import OEB_IMAGES
@ -33,11 +31,11 @@ STYLES = [
] ]
class FB2MLizer(object): class FB2MLizer(object):
def __init__(self, ignore_tables=False): def __init__(self, log):
self.ignore_tables = ignore_tables self.log = log
def extract_content(self, oeb_book, opts): def extract_content(self, oeb_book, opts):
oeb_book.logger.info('Converting XHTML to FB2 markup...') self.log.info('Converting XHTML to FB2 markup...')
self.oeb_book = oeb_book self.oeb_book = oeb_book
self.opts = opts self.opts = opts
return self.fb2mlize_spine() return self.fb2mlize_spine()
@ -45,12 +43,14 @@ class FB2MLizer(object):
def fb2mlize_spine(self): def fb2mlize_spine(self):
output = self.fb2_header() output = self.fb2_header()
if 'titlepage' in self.oeb_book.guide: if 'titlepage' in self.oeb_book.guide:
self.log.debug('Generating cover page...')
href = self.oeb_book.guide['titlepage'].href href = self.oeb_book.guide['titlepage'].href
item = self.oeb_book.manifest.hrefs[href] item = self.oeb_book.manifest.hrefs[href]
if item.spine_position is None: if item.spine_position is None:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.dump_text(item.data.find(XHTML('body')), stylizer)
for item in self.oeb_book.spine: for item in self.oeb_book.spine:
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.dump_text(item.data.find(XHTML('body')), stylizer)
output += self.fb2_body_footer() output += self.fb2_body_footer()

View File

@ -16,7 +16,7 @@ class FB2Output(OutputFormatPlugin):
file_type = 'fb2' file_type = 'fb2'
def convert(self, oeb_book, output_path, input_plugin, opts, log): def convert(self, oeb_book, output_path, input_plugin, opts, log):
fb2mlizer = FB2MLizer(ignore_tables=opts.linearize_tables) fb2mlizer = FB2MLizer(log)
fb2_content = fb2mlizer.extract_content(oeb_book, opts) fb2_content = fb2mlizer.extract_content(oeb_book, opts)
close = False close = False
@ -30,7 +30,7 @@ class FB2Output(OutputFormatPlugin):
out_stream.seek(0) out_stream.seek(0)
out_stream.truncate() out_stream.truncate()
out_stream.write(fb2_content.encode('utf-8')) out_stream.write(fb2_content.encode('utf-8', 'replace'))
if close: if close:
out_stream.close() out_stream.close()

View File

@ -8,7 +8,6 @@ import os
from calibre.customize.conversion import OutputFormatPlugin from calibre.customize.conversion import OutputFormatPlugin
from calibre.ebooks.pdb.ereader.writer import Writer from calibre.ebooks.pdb.ereader.writer import Writer
from calibre.ebooks.metadata import authors_to_string
class EREADEROutput(OutputFormatPlugin): class EREADEROutput(OutputFormatPlugin):

View File

@ -73,9 +73,9 @@ class Reader132(FormatReader):
def decompress_text(self, number): def decompress_text(self, number):
if self.header_record.version == 2: if self.header_record.version == 2:
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
if self.header_record.version == 10: if self.header_record.version == 10:
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
def get_image(self, number): def get_image(self, number):
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1: if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:

View File

@ -54,7 +54,7 @@ class Reader202(FormatReader):
return self.sections[number] return self.sections[number]
def decompress_text(self, number): def decompress_text(self, number):
return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding) return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
def get_image(self, number): def get_image(self, number):
name = None name = None

View File

@ -49,7 +49,7 @@ class Reader(FormatReader):
if self.header_record.compression == 1: if self.header_record.compression == 1:
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding) return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
if self.header_record.compression == 2: if self.header_record.compression == 2:
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
return '' return ''
def extract_content(self, output_dir): def extract_content(self, output_dir):

View File

@ -65,7 +65,7 @@ class Reader(FormatReader):
def decompress_text(self, number): def decompress_text(self, number):
if number == 1: if number == 1:
self.uncompressor = zlib.decompressobj() self.uncompressor = zlib.decompressobj()
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
def extract_content(self, output_dir): def extract_content(self, output_dir):
txt = '' txt = ''

View File

@ -24,10 +24,12 @@ class PDFInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
accelerators): accelerators):
log.debug('Converting file to html...')
# The main html file will be named index.html # The main html file will be named index.html
pdftohtml(os.getcwd(), stream.name, options.no_images) pdftohtml(os.getcwd(), stream.name, options.no_images)
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
log.debug('Retrieving document metadata...')
mi = get_metadata(stream, 'pdf') mi = get_metadata(stream, 'pdf')
opf = OPFCreator(os.getcwd(), mi) opf = OPFCreator(os.getcwd(), mi)
@ -42,9 +44,11 @@ class PDFInput(InputFormatPlugin):
new_i = i.replace('-', '') new_i = i.replace('-', '')
os.rename(i, new_i) os.rename(i, new_i)
manifest.append((new_i, None)) manifest.append((new_i, None))
log.debug('Generating manifest...')
opf.create_manifest(manifest) opf.create_manifest(manifest)
opf.create_spine(['index.html']) opf.create_spine(['index.html'])
log.debug('Rendering manifest...')
with open('metadata.opf', 'wb') as opffile: with open('metadata.opf', 'wb') as opffile:
opf.render(opffile) opf.render(opffile)

View File

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -9,9 +8,8 @@ __docformat__ = 'restructuredtext en'
Convert OEB ebook format to PDF. Convert OEB ebook format to PDF.
''' '''
#unit, papersize, orientation, custom_size, profile import glob
import os
import os, glob
from calibre.customize.conversion import OutputFormatPlugin, \ from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation OptionRecommendation
@ -54,14 +52,17 @@ class PDFOutput(OutputFormatPlugin):
self.metadata = oeb_book.metadata self.metadata = oeb_book.metadata
if input_plugin.is_image_collection: if input_plugin.is_image_collection:
log.debug('Converting input as an image collection...')
self.convert_images(input_plugin.get_images()) self.convert_images(input_plugin.get_images())
else: else:
log.debug('Converting input as a text based book...')
self.convert_text(oeb_book) self.convert_text(oeb_book)
def convert_images(self, images): def convert_images(self, images):
self.write(ImagePDFWriter, images) self.write(ImagePDFWriter, images)
def convert_text(self, oeb_book): def convert_text(self, oeb_book):
self.log.debug('Serializing oeb input to disk for processing...')
with TemporaryDirectory('_pdf_out') as oeb_dir: with TemporaryDirectory('_pdf_out') as oeb_dir:
from calibre.customize.ui import plugin_for_output_format from calibre.customize.ui import plugin_for_output_format
oeb_output = plugin_for_output_format('oeb') oeb_output = plugin_for_output_format('oeb')
@ -86,6 +87,7 @@ class PDFOutput(OutputFormatPlugin):
out_stream.seek(0) out_stream.seek(0)
out_stream.truncate() out_stream.truncate()
self.log.debug('Rendering pages to PDF...')
writer.dump(items, out_stream, PDFMetadata(self.metadata)) writer.dump(items, out_stream, PDFMetadata(self.metadata))
if close: if close:

View File

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -9,11 +8,12 @@ __docformat__ = 'restructuredtext en'
Write content to PDF. Write content to PDF.
''' '''
import os, shutil import os
import shutil
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ebooks.pdf.pageoptions import unit, paper_size, \ from calibre.ebooks.pdf.pageoptions import unit, paper_size, \
orientation, size orientation
from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata import authors_to_string
from PyQt4 import QtCore from PyQt4 import QtCore

View File

@ -4,7 +4,9 @@ __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import glob, os, shutil import glob
import os
import shutil
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
@ -40,8 +42,9 @@ class PMLInput(InputFormatPlugin):
if self.options.input_encoding: if self.options.input_encoding:
ienc = self.options.input_encoding ienc = self.options.input_encoding
self.log.debug('Converting PML to HTML...')
html = pml_to_html(pml_stream.read().decode(ienc)) html = pml_to_html(pml_stream.read().decode(ienc))
html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8') + '</body></html>') html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8', 'replace') + '</body></html>')
if pclose: if pclose:
pml_stream.close() pml_stream.close()
@ -51,9 +54,11 @@ class PMLInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
accelerators): accelerators):
self.options = options self.options = options
self.log = log
pages, images = [], [] pages, images = [], []
if file_ext == 'pmlz': if file_ext == 'pmlz':
log.debug('De-compressing content to temporary directory...')
with TemporaryDirectory('_unpmlz') as tdir: with TemporaryDirectory('_unpmlz') as tdir:
zf = ZipFile(stream) zf = ZipFile(stream)
zf.extractall(tdir) zf.extractall(tdir)
@ -64,6 +69,7 @@ class PMLInput(InputFormatPlugin):
html_path = os.path.join(os.getcwd(), html_name) html_path = os.path.join(os.getcwd(), html_name)
pages.append(html_name) pages.append(html_name)
log.debug('Processing PML item %s...' % pml)
self.process_pml(pml, html_path) self.process_pml(pml, html_path)
imgs = glob.glob(os.path.join(tdir, '*.png')) imgs = glob.glob(os.path.join(tdir, '*.png'))
@ -90,12 +96,13 @@ class PMLInput(InputFormatPlugin):
manifest_items.append((item, None)) manifest_items.append((item, None))
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
log.debug('Reading metadata from input file...')
mi = get_metadata(stream, 'pml') mi = get_metadata(stream, 'pml')
opf = OPFCreator(os.getcwd(), mi) opf = OPFCreator(os.getcwd(), mi)
log.debug('Generating manifest...')
opf.create_manifest(manifest_items) opf.create_manifest(manifest_items)
opf.create_spine(pages) opf.create_spine(pages)
with open('metadata.opf', 'wb') as opffile: with open('metadata.opf', 'wb') as opffile:
opf.render(opffile) opf.render(opffile)
return os.path.join(os.getcwd(), 'metadata.opf') return os.path.join(os.getcwd(), 'metadata.opf')

View File

@ -37,13 +37,14 @@ class PMLOutput(OutputFormatPlugin):
def convert(self, oeb_book, output_path, input_plugin, opts, log): def convert(self, oeb_book, output_path, input_plugin, opts, log):
with TemporaryDirectory('_pmlz_output') as tdir: with TemporaryDirectory('_pmlz_output') as tdir:
pmlmlizer = PMLMLizer(ignore_tables=opts.linearize_tables) pmlmlizer = PMLMLizer(log)
content = pmlmlizer.extract_content(oeb_book, opts) content = pmlmlizer.extract_content(oeb_book, opts)
with open(os.path.join(tdir, 'index.pml'), 'wb') as out: with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
out.write(content.encode(opts.output_encoding, 'replace')) out.write(content.encode(opts.output_encoding, 'replace'))
self.write_images(oeb_book.manifest, tdir) self.write_images(oeb_book.manifest, tdir)
log.debug('Compressing output...')
pmlz = ZipFile(output_path, 'w') pmlz = ZipFile(output_path, 'w')
pmlz.add_dir(tdir) pmlz.add_dir(tdir)

View File

@ -10,8 +10,6 @@ __docformat__ = 'restructuredtext en'
import re import re
from htmlentitydefs import codepoint2name
from calibre.ebooks.pdb.ereader import image_name from calibre.ebooks.pdb.ereader import image_name
PML_HTML_RULES = [ PML_HTML_RULES = [

View File

@ -67,11 +67,11 @@ SEPARATE_TAGS = [
] ]
class PMLMLizer(object): class PMLMLizer(object):
def __init__(self, ignore_tables=False): def __init__(self, log):
self.ignore_tables = ignore_tables self.log = log
def extract_content(self, oeb_book, opts): def extract_content(self, oeb_book, opts):
oeb_book.logger.info('Converting XHTML to PML markup...') self.log.info('Converting XHTML to PML markup...')
self.oeb_book = oeb_book self.oeb_book = oeb_book
self.opts = opts self.opts = opts
return self.pmlmlize_spine() return self.pmlmlize_spine()
@ -79,12 +79,14 @@ class PMLMLizer(object):
def pmlmlize_spine(self): def pmlmlize_spine(self):
output = u'' output = u''
if 'titlepage' in self.oeb_book.guide: if 'titlepage' in self.oeb_book.guide:
self.log.debug('Generating title page...')
href = self.oeb_book.guide['titlepage'].href href = self.oeb_book.guide['titlepage'].href
item = self.oeb_book.manifest.hrefs[href] item = self.oeb_book.manifest.hrefs[href]
if item.spine_position is None: if item.spine_position is None:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.dump_text(item.data.find(XHTML('body')), stylizer)
for item in self.oeb_book.spine: for item in self.oeb_book.spine:
self.log.debug('Converting %s to PML markup...' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
output += self.add_page_anchor(item.href) output += self.add_page_anchor(item.href)
output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.dump_text(item.data.find(XHTML('body')), stylizer)

View File

@ -52,12 +52,12 @@ STYLES = [
class RBMLizer(object): class RBMLizer(object):
def __init__(self, name_map={}, ignore_tables=False): def __init__(self, log, name_map={}):
self.log = log
self.name_map = name_map self.name_map = name_map
self.ignore_tables = ignore_tables
def extract_content(self, oeb_book, opts): def extract_content(self, oeb_book, opts):
oeb_book.logger.info('Converting XHTML to RB markup...') self.log.info('Converting XHTML to RB markup...')
self.oeb_book = oeb_book self.oeb_book = oeb_book
self.opts = opts self.opts = opts
return self.mlize_spine() return self.mlize_spine()
@ -66,12 +66,14 @@ class RBMLizer(object):
def mlize_spine(self): def mlize_spine(self):
output = u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>' output = u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>'
if 'titlepage' in self.oeb_book.guide: if 'titlepage' in self.oeb_book.guide:
self.log.debug('Generating cover page...')
href = self.oeb_book.guide['titlepage'].href href = self.oeb_book.guide['titlepage'].href
item = self.oeb_book.manifest.hrefs[href] item = self.oeb_book.manifest.hrefs[href]
if item.spine_position is None: if item.spine_position is None:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.dump_text(item.data.find(XHTML('body')), stylizer)
for item in self.oeb_book.spine: for item in self.oeb_book.spine:
self.log.debug('Converting %s to RocketBook HTML...' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
output += self.add_page_anchor(item.href) output += self.add_page_anchor(item.href)
output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.dump_text(item.data.find(XHTML('body')), stylizer)

View File

@ -84,9 +84,9 @@ class Reader(object):
for size in chunck_sizes: for size in chunck_sizes:
cm_chunck = self.stream.read(size) cm_chunck = self.stream.read(size)
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding) output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
else: else:
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding) output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
with open(os.path.join(output_dir, toc_item.name), 'wb') as html: with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
html.write(output.encode('utf-8')) html.write(output.encode('utf-8'))
@ -102,14 +102,17 @@ class Reader(object):
img.write(data) img.write(data)
def extract_content(self, output_dir): def extract_content(self, output_dir):
self.log.debug('Extracting content from file...')
html = [] html = []
images = [] images = []
for item in self.toc: for item in self.toc:
if item.name.lower().endswith('html'): if item.name.lower().endswith('html'):
self.log.debug('HTML item %s found...' % item.name)
html.append(item.name) html.append(item.name)
self.get_text(item, output_dir) self.get_text(item, output_dir)
if item.name.lower().endswith('png'): if item.name.lower().endswith('png'):
self.log.debug('PNG item %s found...' % item.name)
images.append(item.name) images.append(item.name)
self.get_image(item, output_dir) self.get_image(item, output_dir)

View File

@ -64,6 +64,7 @@ class RBWriter(object):
flags = 0 flags = 0
toc_items.append(TocItem(name.ljust(32, '\x00')[:32], size, flags)) toc_items.append(TocItem(name.ljust(32, '\x00')[:32], size, flags))
self.log.debug('Writing file header...')
out_stream.write(HEADER) out_stream.write(HEADER)
out_stream.write(struct.pack('<I', 0)) out_stream.write(struct.pack('<I', 0))
out_stream.write(struct.pack('<IH', 0, 0)) out_stream.write(struct.pack('<IH', 0, 0))
@ -82,6 +83,7 @@ class RBWriter(object):
out_stream.write(info[0][1]) out_stream.write(info[0][1])
self.log.debug('Writing compressed RB HTHML...')
# Compressed text with proper heading # Compressed text with proper heading
out_stream.write(struct.pack('<I', len(text[0][1]))) out_stream.write(struct.pack('<I', len(text[0][1])))
out_stream.write(struct.pack('<I', text_size)) out_stream.write(struct.pack('<I', text_size))
@ -90,6 +92,7 @@ class RBWriter(object):
for chunck in text[0][1]: for chunck in text[0][1]:
out_stream.write(chunck) out_stream.write(chunck)
self.log.debug('Writing images...')
for item in hidx+images: for item in hidx+images:
out_stream.write(item[1]) out_stream.write(item[1])
@ -98,7 +101,7 @@ class RBWriter(object):
out_stream.write(struct.pack('<I', total_size)) out_stream.write(struct.pack('<I', total_size))
def _text(self, oeb_book): def _text(self, oeb_book):
rbmlizer = RBMLizer(name_map=self.name_map, ignore_tables=self.opts.linearize_tables) rbmlizer = RBMLizer(log, name_map=self.name_map)
text = rbmlizer.extract_content(oeb_book, self.opts).encode('cp1252', 'xmlcharrefreplace') text = rbmlizer.extract_content(oeb_book, self.opts).encode('cp1252', 'xmlcharrefreplace')
size = len(text) size = len(text)

View File

@ -16,7 +16,7 @@ class RTFOutput(OutputFormatPlugin):
file_type = 'rtf' file_type = 'rtf'
def convert(self, oeb_book, output_path, input_plugin, opts, log): def convert(self, oeb_book, output_path, input_plugin, opts, log):
rtfmlitzer = RTFMLizer(ignore_tables=opts.linearize_tables) rtfmlitzer = RTFMLizer(log)
content = rtfmlitzer.extract_content(oeb_book, opts) content = rtfmlitzer.extract_content(oeb_book, opts)
close = False close = False

View File

@ -79,11 +79,11 @@ TODO:
''' '''
class RTFMLizer(object): class RTFMLizer(object):
def __init__(self, ignore_tables=False): def __init__(self, log):
self.ignore_tables = ignore_tables self.log = log
def extract_content(self, oeb_book, opts): def extract_content(self, oeb_book, opts):
oeb_book.logger.info('Converting XHTML to RTF markup...') self.log.info('Converting XHTML to RTF markup...')
self.oeb_book = oeb_book self.oeb_book = oeb_book
self.opts = opts self.opts = opts
return self.mlize_spine() return self.mlize_spine()
@ -98,6 +98,7 @@ class RTFMLizer(object):
output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.dump_text(item.data.find(XHTML('body')), stylizer)
output += '{\\page } ' output += '{\\page } '
for item in self.oeb_book.spine: for item in self.oeb_book.spine:
self.log.debug('Converting %s to RTF markup...' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.dump_text(item.data.find(XHTML('body')), stylizer)
output += self.footer() output += self.footer()

View File

@ -21,20 +21,26 @@ class TXTInput(InputFormatPlugin):
ienc = stream.encoding if stream.encoding else 'utf-8' ienc = stream.encoding if stream.encoding else 'utf-8'
if options.input_encoding: if options.input_encoding:
ienc = options.input_encoding ienc = options.input_encoding
txt = stream.read().decode(ienc) log.debug('Reading text from file...')
txt = stream.read().decode(ienc, 'replace')
log.debug('Running text though markdown conversion...')
try: try:
html = txt_to_markdown(txt) html = txt_to_markdown(txt)
except RuntimeError: except RuntimeError:
raise ValueError('This txt file has malformed markup, it cannot be' raise ValueError('This txt file has malformed markup, it cannot be'
'converted by calibre. See http://daringfireball.net/projects/markdown/syntax') 'converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
log.debug('Writing html output...')
with open('index.html', 'wb') as index: with open('index.html', 'wb') as index:
index.write(html.encode('utf-8')) index.write(html.encode('utf-8'))
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
log.debug('Retrieving source document metadata...')
mi = get_metadata(stream, 'txt') mi = get_metadata(stream, 'txt')
manifest = [('index.html', None)] manifest = [('index.html', None)]
spine = ['index.html'] spine = ['index.html']
log.debug('Generating manifest...')
opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi) opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi)
return os.path.join(os.getcwd(), 'metadata.opf') return os.path.join(os.getcwd(), 'metadata.opf')

View File

@ -24,6 +24,7 @@ class TxtWriter(object):
def dump(self, spine): def dump(self, spine):
out = u'' out = u''
for item in spine: for item in spine:
self.log.debug('Processing %s...' % item.href)
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode)) content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
content = self.remove_newlines(content) content = self.remove_newlines(content)
content = self.strip_html(content) content = self.strip_html(content)
@ -40,6 +41,7 @@ class TxtWriter(object):
return out return out
def strip_html(self, text): def strip_html(self, text):
self.log.debug('\tStripping html...')
stripped = u'' stripped = u''
# Remove unnecessary tags # Remove unnecessary tags
@ -77,6 +79,7 @@ class TxtWriter(object):
return stripped return stripped
def replace_html_symbols(self, content): def replace_html_symbols(self, content):
self.log.debug('\tReplacing entities with unicode...')
for entity in set(re.findall('&.+?;', content)): for entity in set(re.findall('&.+?;', content)):
mo = re.search('(%s)' % entity[1:-1], content) mo = re.search('(%s)' % entity[1:-1], content)
content = content.replace(entity, entity_to_unicode(mo)) content = content.replace(entity, entity_to_unicode(mo))
@ -84,6 +87,7 @@ class TxtWriter(object):
return content return content
def cleanup_text(self, text): def cleanup_text(self, text):
self.log.debug('\tClean up text...')
# Replace bad characters. # Replace bad characters.
text = text.replace(u'\xc2', '') text = text.replace(u'\xc2', '')
text = text.replace(u'\xa0', ' ') text = text.replace(u'\xa0', ' ')
@ -114,6 +118,7 @@ class TxtWriter(object):
return text return text
def remove_newlines(self, text): def remove_newlines(self, text):
self.log.debug('\tRemove newlines for processing...')
text = text.replace('\r\n', ' ') text = text.replace('\r\n', ' ')
text = text.replace('\n', ' ') text = text.replace('\n', ' ')
text = text.replace('\r', ' ') text = text.replace('\r', ' ')
@ -121,6 +126,7 @@ class TxtWriter(object):
return text return text
def specified_newlines(self, text): def specified_newlines(self, text):
self.log.debug('\tReplacing newlines with selected type...')
if self.newline == '\n': if self.newline == '\n':
return text return text

View File

@ -167,7 +167,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
self.connect(self.quit_action, SIGNAL('triggered(bool)'), self.quit) self.connect(self.quit_action, SIGNAL('triggered(bool)'), self.quit)
self.connect(self.donate_action, SIGNAL('triggered(bool)'), self.donate) self.connect(self.donate_action, SIGNAL('triggered(bool)'), self.donate)
self.connect(self.restore_action, SIGNAL('triggered()'), self.connect(self.restore_action, SIGNAL('triggered()'),
self.show) self.show_windows)
self.connect(self.action_show_book_details, self.connect(self.action_show_book_details,
SIGNAL('triggered(bool)'), self.show_book_info) SIGNAL('triggered(bool)'), self.show_book_info)
self.connect(self.action_restart, SIGNAL('triggered()'), self.connect(self.action_restart, SIGNAL('triggered()'),
@ -317,6 +317,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
pm = QMenu() pm = QMenu()
ap = self.action_preferences ap = self.action_preferences
pm.addAction(ap.icon(), ap.text()) pm.addAction(ap.icon(), ap.text())
pm.addAction(self.preferences_action)
pm.addAction(_('Run welcome wizard')) pm.addAction(_('Run welcome wizard'))
self.connect(pm.actions()[1], SIGNAL('triggered(bool)'), self.connect(pm.actions()[1], SIGNAL('triggered(bool)'),
self.run_wizard) self.run_wizard)
@ -402,9 +403,9 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
self.card_a_view.connect_dirtied_signal(self.upload_booklists) self.card_a_view.connect_dirtied_signal(self.upload_booklists)
self.card_b_view.connect_dirtied_signal(self.upload_booklists) self.card_b_view.connect_dirtied_signal(self.upload_booklists)
self.show() self.show_windows()
if self.system_tray_icon.isVisible() and opts.start_in_tray: if self.system_tray_icon.isVisible() and opts.start_in_tray:
self.hide() self.hide_windows()
self.stack.setCurrentIndex(0) self.stack.setCurrentIndex(0)
try: try:
db = LibraryDatabase2(self.library_path) db = LibraryDatabase2(self.library_path)
@ -521,12 +522,18 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
def system_tray_icon_activated(self, r): def system_tray_icon_activated(self, r):
if r == QSystemTrayIcon.Trigger: if r == QSystemTrayIcon.Trigger:
if self.isVisible(): if self.isVisible():
self.hide_windows()
else:
self.show_windows()
def hide_windows(self):
for window in QApplication.topLevelWidgets(): for window in QApplication.topLevelWidgets():
if isinstance(window, (MainWindow, QDialog)) and \ if isinstance(window, (MainWindow, QDialog)) and \
window.isVisible(): window.isVisible():
window.hide() window.hide()
setattr(window, '__systray_minimized', True) setattr(window, '__systray_minimized', True)
else:
def show_windows(self):
for window in QApplication.topLevelWidgets(): for window in QApplication.topLevelWidgets():
if getattr(window, '__systray_minimized', False): if getattr(window, '__systray_minimized', False):
window.show() window.show()
@ -641,7 +648,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
self.add_filesystem_book(path) self.add_filesystem_book(path)
self.setWindowState(self.windowState() & \ self.setWindowState(self.windowState() & \
~Qt.WindowMinimized|Qt.WindowActive) ~Qt.WindowMinimized|Qt.WindowActive)
self.show() self.show_windows()
self.raise_() self.raise_()
self.activateWindow() self.activateWindow()
elif msg.startswith('refreshdb:'): elif msg.startswith('refreshdb:'):
@ -1658,7 +1665,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
self.spare_servers.pop().close() self.spare_servers.pop().close()
self.device_manager.keep_going = False self.device_manager.keep_going = False
self.cover_cache.stop() self.cover_cache.stop()
self.hide() self.hide_windows()
self.cover_cache.terminate() self.cover_cache.terminate()
self.emailer.stop() self.emailer.stop()
try: try:
@ -1670,7 +1677,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
time.sleep(2) time.sleep(2)
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
self.hide() self.hide_windows()
return True return True
def run_wizard(self, *args): def run_wizard(self, *args):
@ -1694,7 +1701,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
'choose <b>Quit</b> in the context menu of the ' 'choose <b>Quit</b> in the context menu of the '
'system tray.')).exec_() 'system tray.')).exec_()
dynamic['systray_msg'] = True dynamic['systray_msg'] = True
self.hide() self.hide_windows()
e.ignore() e.ignore()
else: else:
if self.confirm_quit(): if self.confirm_quit():