mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Speed up import of builtin input plugins
This commit is contained in:
parent
a9cedb0a0b
commit
4b9d1e40fc
@ -501,27 +501,27 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
from calibre.ebooks.comic.input import ComicInput
|
from calibre.ebooks.conversion.plugins.comic_input import ComicInput
|
||||||
from calibre.ebooks.djvu.input import DJVUInput
|
from calibre.ebooks.conversion.plugins.djvu_input import DJVUInput
|
||||||
from calibre.ebooks.epub.input import EPUBInput
|
from calibre.ebooks.conversion.plugins.epub_input import EPUBInput
|
||||||
from calibre.ebooks.fb2.input import FB2Input
|
from calibre.ebooks.conversion.plugins.fb2_input import FB2Input
|
||||||
from calibre.ebooks.html.input import HTMLInput
|
from calibre.ebooks.conversion.plugins.html_input import HTMLInput
|
||||||
from calibre.ebooks.htmlz.input import HTMLZInput
|
from calibre.ebooks.conversion.plugins.htmlz_input import HTMLZInput
|
||||||
from calibre.ebooks.lit.input import LITInput
|
from calibre.ebooks.conversion.plugins.lit_input import LITInput
|
||||||
from calibre.ebooks.mobi.input import MOBIInput
|
from calibre.ebooks.conversion.plugins.mobi_input import MOBIInput
|
||||||
from calibre.ebooks.odt.input import ODTInput
|
from calibre.ebooks.conversion.plugins.odt_input import ODTInput
|
||||||
from calibre.ebooks.pdb.input import PDBInput
|
from calibre.ebooks.conversion.plugins.pdb_input import PDBInput
|
||||||
from calibre.ebooks.azw4.input import AZW4Input
|
from calibre.ebooks.conversion.plugins.azw4_input import AZW4Input
|
||||||
from calibre.ebooks.pdf.input import PDFInput
|
from calibre.ebooks.conversion.plugins.pdf_input import PDFInput
|
||||||
from calibre.ebooks.pml.input import PMLInput
|
from calibre.ebooks.conversion.plugins.pml_input import PMLInput
|
||||||
from calibre.ebooks.rb.input import RBInput
|
from calibre.ebooks.conversion.plugins.rb_input import RBInput
|
||||||
from calibre.web.feeds.input import RecipeInput
|
from calibre.ebooks.conversion.plugins.recipe_input import RecipeInput
|
||||||
from calibre.ebooks.rtf.input import RTFInput
|
from calibre.ebooks.conversion.plugins.rtf_input import RTFInput
|
||||||
from calibre.ebooks.tcr.input import TCRInput
|
from calibre.ebooks.conversion.plugins.tcr_input import TCRInput
|
||||||
from calibre.ebooks.txt.input import TXTInput
|
from calibre.ebooks.conversion.plugins.txt_input import TXTInput
|
||||||
from calibre.ebooks.lrf.input import LRFInput
|
from calibre.ebooks.conversion.plugins.lrf_input import LRFInput
|
||||||
from calibre.ebooks.chm.input import CHMInput
|
from calibre.ebooks.conversion.plugins.chm_input import CHMInput
|
||||||
from calibre.ebooks.snb.input import SNBInput
|
from calibre.ebooks.conversion.plugins.snb_input import SNBInput
|
||||||
|
|
||||||
from calibre.ebooks.epub.output import EPUBOutput
|
from calibre.ebooks.epub.output import EPUBOutput
|
||||||
from calibre.ebooks.fb2.output import FB2Output
|
from calibre.ebooks.fb2.output import FB2Output
|
||||||
|
@ -7,11 +7,10 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Based on ideas from comiclrf created by FangornUK.
|
Based on ideas from comiclrf created by FangornUK.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os, shutil, traceback, textwrap, time, codecs
|
import os, traceback, time
|
||||||
from Queue import Empty
|
from Queue import Empty
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre import extract, prints, walk
|
||||||
from calibre import extract, CurrentDir, prints, walk
|
|
||||||
from calibre.constants import filesystem_encoding
|
from calibre.constants import filesystem_encoding
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
from calibre.utils.ipc.server import Server
|
from calibre.utils.ipc.server import Server
|
||||||
@ -273,245 +272,4 @@ def process_pages(pages, opts, update, tdir):
|
|||||||
return ans, failures
|
return ans, failures
|
||||||
|
|
||||||
|
|
||||||
class ComicInput(InputFormatPlugin):
|
|
||||||
|
|
||||||
name = 'Comic Input'
|
|
||||||
author = 'Kovid Goyal'
|
|
||||||
description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
|
|
||||||
file_types = set(['cbz', 'cbr', 'cbc'])
|
|
||||||
is_image_collection = True
|
|
||||||
core_usage = -1
|
|
||||||
|
|
||||||
options = set([
|
|
||||||
OptionRecommendation(name='colors', recommended_value=256,
|
|
||||||
help=_('Number of colors for grayscale image conversion. Default: '
|
|
||||||
'%default. Values of less than 256 may result in blurred text '
|
|
||||||
'on your device if you are creating your comics in EPUB format.')),
|
|
||||||
OptionRecommendation(name='dont_normalize', recommended_value=False,
|
|
||||||
help=_('Disable normalize (improve contrast) color range '
|
|
||||||
'for pictures. Default: False')),
|
|
||||||
OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
|
|
||||||
help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
|
|
||||||
OptionRecommendation(name='dont_sharpen', recommended_value=False,
|
|
||||||
help=_('Disable sharpening.')),
|
|
||||||
OptionRecommendation(name='disable_trim', recommended_value=False,
|
|
||||||
help=_('Disable trimming of comic pages. For some comics, '
|
|
||||||
'trimming might remove content as well as borders.')),
|
|
||||||
OptionRecommendation(name='landscape', recommended_value=False,
|
|
||||||
help=_("Don't split landscape images into two portrait images")),
|
|
||||||
OptionRecommendation(name='wide', recommended_value=False,
|
|
||||||
help=_("Keep aspect ratio and scale image using screen height as "
|
|
||||||
"image width for viewing in landscape mode.")),
|
|
||||||
OptionRecommendation(name='right2left', recommended_value=False,
|
|
||||||
help=_('Used for right-to-left publications like manga. '
|
|
||||||
'Causes landscape pages to be split into portrait pages '
|
|
||||||
'from right to left.')),
|
|
||||||
OptionRecommendation(name='despeckle', recommended_value=False,
|
|
||||||
help=_('Enable Despeckle. Reduces speckle noise. '
|
|
||||||
'May greatly increase processing time.')),
|
|
||||||
OptionRecommendation(name='no_sort', recommended_value=False,
|
|
||||||
help=_("Don't sort the files found in the comic "
|
|
||||||
"alphabetically by name. Instead use the order they were "
|
|
||||||
"added to the comic.")),
|
|
||||||
OptionRecommendation(name='output_format', choices=['png', 'jpg'],
|
|
||||||
recommended_value='png', help=_('The format that images in the created ebook '
|
|
||||||
'are converted to. You can experiment to see which format gives '
|
|
||||||
'you optimal size and look on your device.')),
|
|
||||||
OptionRecommendation(name='no_process', recommended_value=False,
|
|
||||||
help=_("Apply no processing to the image")),
|
|
||||||
OptionRecommendation(name='dont_grayscale', recommended_value=False,
|
|
||||||
help=_('Do not convert the image to grayscale (black and white)')),
|
|
||||||
OptionRecommendation(name='comic_image_size', recommended_value=None,
|
|
||||||
help=_('Specify the image size as widthxheight pixels. Normally,'
|
|
||||||
' an image size is automatically calculated from the output '
|
|
||||||
'profile, this option overrides it.')),
|
|
||||||
OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
|
|
||||||
help=_('When converting a CBC do not add links to each page to'
|
|
||||||
' the TOC. Note this only applies if the TOC has more than one'
|
|
||||||
' section')),
|
|
||||||
])
|
|
||||||
|
|
||||||
recommendations = set([
|
|
||||||
('margin_left', 0, OptionRecommendation.HIGH),
|
|
||||||
('margin_top', 0, OptionRecommendation.HIGH),
|
|
||||||
('margin_right', 0, OptionRecommendation.HIGH),
|
|
||||||
('margin_bottom', 0, OptionRecommendation.HIGH),
|
|
||||||
('insert_blank_line', False, OptionRecommendation.HIGH),
|
|
||||||
('remove_paragraph_spacing', False, OptionRecommendation.HIGH),
|
|
||||||
('change_justification', 'left', OptionRecommendation.HIGH),
|
|
||||||
('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
|
|
||||||
('chapter', None, OptionRecommendation.HIGH),
|
|
||||||
('page_breaks_brefore', None, OptionRecommendation.HIGH),
|
|
||||||
('use_auto_toc', False, OptionRecommendation.HIGH),
|
|
||||||
('page_breaks_before', None, OptionRecommendation.HIGH),
|
|
||||||
('disable_font_rescaling', True, OptionRecommendation.HIGH),
|
|
||||||
('linearize_tables', False, OptionRecommendation.HIGH),
|
|
||||||
])
|
|
||||||
|
|
||||||
def get_comics_from_collection(self, stream):
|
|
||||||
from calibre.libunzip import extract as zipextract
|
|
||||||
tdir = PersistentTemporaryDirectory('_comic_collection')
|
|
||||||
zipextract(stream, tdir)
|
|
||||||
comics = []
|
|
||||||
with CurrentDir(tdir):
|
|
||||||
if not os.path.exists('comics.txt'):
|
|
||||||
raise ValueError((
|
|
||||||
'%s is not a valid comic collection'
|
|
||||||
' no comics.txt was found in the file')
|
|
||||||
%stream.name)
|
|
||||||
raw = open('comics.txt', 'rb').read()
|
|
||||||
if raw.startswith(codecs.BOM_UTF16_BE):
|
|
||||||
raw = raw.decode('utf-16-be')[1:]
|
|
||||||
elif raw.startswith(codecs.BOM_UTF16_LE):
|
|
||||||
raw = raw.decode('utf-16-le')[1:]
|
|
||||||
elif raw.startswith(codecs.BOM_UTF8):
|
|
||||||
raw = raw.decode('utf-8')[1:]
|
|
||||||
else:
|
|
||||||
raw = raw.decode('utf-8')
|
|
||||||
for line in raw.splitlines():
|
|
||||||
line = line.strip()
|
|
||||||
if not line:
|
|
||||||
continue
|
|
||||||
fname, title = line.partition(':')[0], line.partition(':')[-1]
|
|
||||||
fname = fname.replace('#', '_')
|
|
||||||
fname = os.path.join(tdir, *fname.split('/'))
|
|
||||||
if not title:
|
|
||||||
title = os.path.basename(fname).rpartition('.')[0]
|
|
||||||
if os.access(fname, os.R_OK):
|
|
||||||
comics.append([title, fname])
|
|
||||||
if not comics:
|
|
||||||
raise ValueError('%s has no comics'%stream.name)
|
|
||||||
return comics
|
|
||||||
|
|
||||||
def get_pages(self, comic, tdir2):
|
|
||||||
tdir = extract_comic(comic)
|
|
||||||
new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
|
|
||||||
verbose=self.opts.verbose)
|
|
||||||
thumbnail = None
|
|
||||||
if not new_pages:
|
|
||||||
raise ValueError('Could not find any pages in the comic: %s'
|
|
||||||
%comic)
|
|
||||||
if self.opts.no_process:
|
|
||||||
n2 = []
|
|
||||||
for page in new_pages:
|
|
||||||
n2.append(os.path.join(tdir2, os.path.basename(page)))
|
|
||||||
shutil.copyfile(page, n2[-1])
|
|
||||||
new_pages = n2
|
|
||||||
else:
|
|
||||||
new_pages, failures = process_pages(new_pages, self.opts,
|
|
||||||
self.report_progress, tdir2)
|
|
||||||
if failures:
|
|
||||||
self.log.warning('Could not process the following pages '
|
|
||||||
'(run with --verbose to see why):')
|
|
||||||
for f in failures:
|
|
||||||
self.log.warning('\t', f)
|
|
||||||
if not new_pages:
|
|
||||||
raise ValueError('Could not find any valid pages in comic: %s'
|
|
||||||
% comic)
|
|
||||||
thumbnail = os.path.join(tdir2,
|
|
||||||
'thumbnail.'+self.opts.output_format.lower())
|
|
||||||
if not os.access(thumbnail, os.R_OK):
|
|
||||||
thumbnail = None
|
|
||||||
return new_pages
|
|
||||||
|
|
||||||
def get_images(self):
|
|
||||||
return self._images
|
|
||||||
|
|
||||||
def convert(self, stream, opts, file_ext, log, accelerators):
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
|
||||||
|
|
||||||
self.opts, self.log= opts, log
|
|
||||||
if file_ext == 'cbc':
|
|
||||||
comics_ = self.get_comics_from_collection(stream)
|
|
||||||
else:
|
|
||||||
comics_ = [['Comic', os.path.abspath(stream.name)]]
|
|
||||||
stream.close()
|
|
||||||
comics = []
|
|
||||||
for i, x in enumerate(comics_):
|
|
||||||
title, fname = x
|
|
||||||
cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
|
|
||||||
cdir = os.path.abspath(cdir)
|
|
||||||
if not os.path.exists(cdir):
|
|
||||||
os.makedirs(cdir)
|
|
||||||
pages = self.get_pages(fname, cdir)
|
|
||||||
if not pages: continue
|
|
||||||
wrappers = self.create_wrappers(pages)
|
|
||||||
comics.append((title, pages, wrappers))
|
|
||||||
|
|
||||||
if not comics:
|
|
||||||
raise ValueError('No comic pages found in %s'%stream.name)
|
|
||||||
|
|
||||||
mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
|
|
||||||
[_('Unknown')])
|
|
||||||
opf = OPFCreator(os.path.abspath('.'), mi)
|
|
||||||
entries = []
|
|
||||||
|
|
||||||
def href(x):
|
|
||||||
if len(comics) == 1: return os.path.basename(x)
|
|
||||||
return '/'.join(x.split(os.sep)[-2:])
|
|
||||||
|
|
||||||
for comic in comics:
|
|
||||||
pages, wrappers = comic[1:]
|
|
||||||
entries += [(w, None) for w in map(href, wrappers)] + \
|
|
||||||
[(x, None) for x in map(href, pages)]
|
|
||||||
opf.create_manifest(entries)
|
|
||||||
spine = []
|
|
||||||
for comic in comics:
|
|
||||||
spine.extend(map(href, comic[2]))
|
|
||||||
self._images = []
|
|
||||||
for comic in comics:
|
|
||||||
self._images.extend(comic[1])
|
|
||||||
opf.create_spine(spine)
|
|
||||||
toc = TOC()
|
|
||||||
if len(comics) == 1:
|
|
||||||
wrappers = comics[0][2]
|
|
||||||
for i, x in enumerate(wrappers):
|
|
||||||
toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
|
|
||||||
play_order=i)
|
|
||||||
else:
|
|
||||||
po = 0
|
|
||||||
for comic in comics:
|
|
||||||
po += 1
|
|
||||||
wrappers = comic[2]
|
|
||||||
stoc = toc.add_item(href(wrappers[0]),
|
|
||||||
None, comic[0], play_order=po)
|
|
||||||
if not opts.dont_add_comic_pages_to_toc:
|
|
||||||
for i, x in enumerate(wrappers):
|
|
||||||
stoc.add_item(href(x), None,
|
|
||||||
_('Page')+' %d'%(i+1), play_order=po)
|
|
||||||
po += 1
|
|
||||||
opf.set_toc(toc)
|
|
||||||
m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
|
|
||||||
opf.render(m, n, 'toc.ncx')
|
|
||||||
return os.path.abspath('metadata.opf')
|
|
||||||
|
|
||||||
def create_wrappers(self, pages):
|
|
||||||
from calibre.ebooks.oeb.base import XHTML_NS
|
|
||||||
wrappers = []
|
|
||||||
WRAPPER = textwrap.dedent('''\
|
|
||||||
<html xmlns="%s">
|
|
||||||
<head>
|
|
||||||
<title>Page #%d</title>
|
|
||||||
<style type="text/css">
|
|
||||||
@page { margin:0pt; padding: 0pt}
|
|
||||||
body { margin: 0pt; padding: 0pt}
|
|
||||||
div { text-align: center }
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<div>
|
|
||||||
<img src="%s" alt="comic page #%d" />
|
|
||||||
</div>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
''')
|
|
||||||
dir = os.path.dirname(pages[0])
|
|
||||||
for i, page in enumerate(pages):
|
|
||||||
wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
|
|
||||||
page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
|
|
||||||
open(page, 'wb').write(wrapper)
|
|
||||||
wrappers.append(page)
|
|
||||||
return wrappers
|
|
||||||
|
|
||||||
|
11
src/calibre/ebooks/conversion/plugins/__init__.py
Normal file
11
src/calibre/ebooks/conversion/plugins/__init__.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
|
||||||
from calibre.ebooks.azw4.reader import Reader
|
|
||||||
|
|
||||||
class AZW4Input(InputFormatPlugin):
|
class AZW4Input(InputFormatPlugin):
|
||||||
|
|
||||||
@ -19,6 +17,9 @@ class AZW4Input(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||||
|
from calibre.ebooks.azw4.reader import Reader
|
||||||
|
|
||||||
header = PdbHeaderReader(stream)
|
header = PdbHeaderReader(stream)
|
||||||
reader = Reader(header, stream, log, options)
|
reader = Reader(header, stream, log, options)
|
||||||
opf = reader.extract_content(os.getcwd())
|
opf = reader.extract_content(os.getcwd())
|
@ -3,9 +3,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||||
' and Alex Bramley <a.bramley at gmail.com>.'
|
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||||
|
|
||||||
import os, uuid
|
import os
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
@ -77,7 +75,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
|
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
|
||||||
# use HTMLInput plugin to generate book
|
# use HTMLInput plugin to generate book
|
||||||
from calibre.ebooks.html.input import HTMLInput
|
from calibre.customize.builtins import HTMLInput
|
||||||
opts.breadth_first = True
|
opts.breadth_first = True
|
||||||
htmlinput = HTMLInput(None)
|
htmlinput = HTMLInput(None)
|
||||||
oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi)
|
oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi)
|
||||||
@ -85,6 +83,8 @@ class CHMInput(InputFormatPlugin):
|
|||||||
|
|
||||||
|
|
||||||
def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
|
def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
|
||||||
|
import uuid
|
||||||
|
from lxml import html
|
||||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
from calibre.ebooks.oeb.base import DirContainer
|
from calibre.ebooks.oeb.base import DirContainer
|
||||||
oeb = create_oebbook(log, None, opts,
|
oeb = create_oebbook(log, None, opts,
|
||||||
@ -142,6 +142,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
return oeb
|
return oeb
|
||||||
|
|
||||||
def _create_html_root(self, hhcpath, log):
|
def _create_html_root(self, hhcpath, log):
|
||||||
|
from lxml import html
|
||||||
hhcdata = self._read_file(hhcpath)
|
hhcdata = self._read_file(hhcpath)
|
||||||
hhcroot = html.fromstring(hhcdata)
|
hhcroot = html.fromstring(hhcdata)
|
||||||
chapters = self._process_nodes(hhcroot)
|
chapters = self._process_nodes(hhcroot)
|
259
src/calibre/ebooks/conversion/plugins/comic_input.py
Normal file
259
src/calibre/ebooks/conversion/plugins/comic_input.py
Normal file
@ -0,0 +1,259 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Based on ideas from comiclrf created by FangornUK.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import shutil, textwrap, codecs, os
|
||||||
|
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
|
from calibre import CurrentDir
|
||||||
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
|
|
||||||
|
class ComicInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'Comic Input'
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
|
||||||
|
file_types = set(['cbz', 'cbr', 'cbc'])
|
||||||
|
is_image_collection = True
|
||||||
|
core_usage = -1
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
OptionRecommendation(name='colors', recommended_value=256,
|
||||||
|
help=_('Number of colors for grayscale image conversion. Default: '
|
||||||
|
'%default. Values of less than 256 may result in blurred text '
|
||||||
|
'on your device if you are creating your comics in EPUB format.')),
|
||||||
|
OptionRecommendation(name='dont_normalize', recommended_value=False,
|
||||||
|
help=_('Disable normalize (improve contrast) color range '
|
||||||
|
'for pictures. Default: False')),
|
||||||
|
OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
|
||||||
|
help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
|
||||||
|
OptionRecommendation(name='dont_sharpen', recommended_value=False,
|
||||||
|
help=_('Disable sharpening.')),
|
||||||
|
OptionRecommendation(name='disable_trim', recommended_value=False,
|
||||||
|
help=_('Disable trimming of comic pages. For some comics, '
|
||||||
|
'trimming might remove content as well as borders.')),
|
||||||
|
OptionRecommendation(name='landscape', recommended_value=False,
|
||||||
|
help=_("Don't split landscape images into two portrait images")),
|
||||||
|
OptionRecommendation(name='wide', recommended_value=False,
|
||||||
|
help=_("Keep aspect ratio and scale image using screen height as "
|
||||||
|
"image width for viewing in landscape mode.")),
|
||||||
|
OptionRecommendation(name='right2left', recommended_value=False,
|
||||||
|
help=_('Used for right-to-left publications like manga. '
|
||||||
|
'Causes landscape pages to be split into portrait pages '
|
||||||
|
'from right to left.')),
|
||||||
|
OptionRecommendation(name='despeckle', recommended_value=False,
|
||||||
|
help=_('Enable Despeckle. Reduces speckle noise. '
|
||||||
|
'May greatly increase processing time.')),
|
||||||
|
OptionRecommendation(name='no_sort', recommended_value=False,
|
||||||
|
help=_("Don't sort the files found in the comic "
|
||||||
|
"alphabetically by name. Instead use the order they were "
|
||||||
|
"added to the comic.")),
|
||||||
|
OptionRecommendation(name='output_format', choices=['png', 'jpg'],
|
||||||
|
recommended_value='png', help=_('The format that images in the created ebook '
|
||||||
|
'are converted to. You can experiment to see which format gives '
|
||||||
|
'you optimal size and look on your device.')),
|
||||||
|
OptionRecommendation(name='no_process', recommended_value=False,
|
||||||
|
help=_("Apply no processing to the image")),
|
||||||
|
OptionRecommendation(name='dont_grayscale', recommended_value=False,
|
||||||
|
help=_('Do not convert the image to grayscale (black and white)')),
|
||||||
|
OptionRecommendation(name='comic_image_size', recommended_value=None,
|
||||||
|
help=_('Specify the image size as widthxheight pixels. Normally,'
|
||||||
|
' an image size is automatically calculated from the output '
|
||||||
|
'profile, this option overrides it.')),
|
||||||
|
OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
|
||||||
|
help=_('When converting a CBC do not add links to each page to'
|
||||||
|
' the TOC. Note this only applies if the TOC has more than one'
|
||||||
|
' section')),
|
||||||
|
])
|
||||||
|
|
||||||
|
recommendations = set([
|
||||||
|
('margin_left', 0, OptionRecommendation.HIGH),
|
||||||
|
('margin_top', 0, OptionRecommendation.HIGH),
|
||||||
|
('margin_right', 0, OptionRecommendation.HIGH),
|
||||||
|
('margin_bottom', 0, OptionRecommendation.HIGH),
|
||||||
|
('insert_blank_line', False, OptionRecommendation.HIGH),
|
||||||
|
('remove_paragraph_spacing', False, OptionRecommendation.HIGH),
|
||||||
|
('change_justification', 'left', OptionRecommendation.HIGH),
|
||||||
|
('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
|
||||||
|
('chapter', None, OptionRecommendation.HIGH),
|
||||||
|
('page_breaks_brefore', None, OptionRecommendation.HIGH),
|
||||||
|
('use_auto_toc', False, OptionRecommendation.HIGH),
|
||||||
|
('page_breaks_before', None, OptionRecommendation.HIGH),
|
||||||
|
('disable_font_rescaling', True, OptionRecommendation.HIGH),
|
||||||
|
('linearize_tables', False, OptionRecommendation.HIGH),
|
||||||
|
])
|
||||||
|
|
||||||
|
def get_comics_from_collection(self, stream):
|
||||||
|
from calibre.libunzip import extract as zipextract
|
||||||
|
tdir = PersistentTemporaryDirectory('_comic_collection')
|
||||||
|
zipextract(stream, tdir)
|
||||||
|
comics = []
|
||||||
|
with CurrentDir(tdir):
|
||||||
|
if not os.path.exists('comics.txt'):
|
||||||
|
raise ValueError((
|
||||||
|
'%s is not a valid comic collection'
|
||||||
|
' no comics.txt was found in the file')
|
||||||
|
%stream.name)
|
||||||
|
raw = open('comics.txt', 'rb').read()
|
||||||
|
if raw.startswith(codecs.BOM_UTF16_BE):
|
||||||
|
raw = raw.decode('utf-16-be')[1:]
|
||||||
|
elif raw.startswith(codecs.BOM_UTF16_LE):
|
||||||
|
raw = raw.decode('utf-16-le')[1:]
|
||||||
|
elif raw.startswith(codecs.BOM_UTF8):
|
||||||
|
raw = raw.decode('utf-8')[1:]
|
||||||
|
else:
|
||||||
|
raw = raw.decode('utf-8')
|
||||||
|
for line in raw.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
fname, title = line.partition(':')[0], line.partition(':')[-1]
|
||||||
|
fname = fname.replace('#', '_')
|
||||||
|
fname = os.path.join(tdir, *fname.split('/'))
|
||||||
|
if not title:
|
||||||
|
title = os.path.basename(fname).rpartition('.')[0]
|
||||||
|
if os.access(fname, os.R_OK):
|
||||||
|
comics.append([title, fname])
|
||||||
|
if not comics:
|
||||||
|
raise ValueError('%s has no comics'%stream.name)
|
||||||
|
return comics
|
||||||
|
|
||||||
|
def get_pages(self, comic, tdir2):
|
||||||
|
from calibre.ebooks.comic.input import (extract_comic, process_pages,
|
||||||
|
find_pages)
|
||||||
|
tdir = extract_comic(comic)
|
||||||
|
new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
|
||||||
|
verbose=self.opts.verbose)
|
||||||
|
thumbnail = None
|
||||||
|
if not new_pages:
|
||||||
|
raise ValueError('Could not find any pages in the comic: %s'
|
||||||
|
%comic)
|
||||||
|
if self.opts.no_process:
|
||||||
|
n2 = []
|
||||||
|
for page in new_pages:
|
||||||
|
n2.append(os.path.join(tdir2, os.path.basename(page)))
|
||||||
|
shutil.copyfile(page, n2[-1])
|
||||||
|
new_pages = n2
|
||||||
|
else:
|
||||||
|
new_pages, failures = process_pages(new_pages, self.opts,
|
||||||
|
self.report_progress, tdir2)
|
||||||
|
if failures:
|
||||||
|
self.log.warning('Could not process the following pages '
|
||||||
|
'(run with --verbose to see why):')
|
||||||
|
for f in failures:
|
||||||
|
self.log.warning('\t', f)
|
||||||
|
if not new_pages:
|
||||||
|
raise ValueError('Could not find any valid pages in comic: %s'
|
||||||
|
% comic)
|
||||||
|
thumbnail = os.path.join(tdir2,
|
||||||
|
'thumbnail.'+self.opts.output_format.lower())
|
||||||
|
if not os.access(thumbnail, os.R_OK):
|
||||||
|
thumbnail = None
|
||||||
|
return new_pages
|
||||||
|
|
||||||
|
def get_images(self):
|
||||||
|
return self._images
|
||||||
|
|
||||||
|
def convert(self, stream, opts, file_ext, log, accelerators):
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
|
|
||||||
|
self.opts, self.log= opts, log
|
||||||
|
if file_ext == 'cbc':
|
||||||
|
comics_ = self.get_comics_from_collection(stream)
|
||||||
|
else:
|
||||||
|
comics_ = [['Comic', os.path.abspath(stream.name)]]
|
||||||
|
stream.close()
|
||||||
|
comics = []
|
||||||
|
for i, x in enumerate(comics_):
|
||||||
|
title, fname = x
|
||||||
|
cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
|
||||||
|
cdir = os.path.abspath(cdir)
|
||||||
|
if not os.path.exists(cdir):
|
||||||
|
os.makedirs(cdir)
|
||||||
|
pages = self.get_pages(fname, cdir)
|
||||||
|
if not pages: continue
|
||||||
|
wrappers = self.create_wrappers(pages)
|
||||||
|
comics.append((title, pages, wrappers))
|
||||||
|
|
||||||
|
if not comics:
|
||||||
|
raise ValueError('No comic pages found in %s'%stream.name)
|
||||||
|
|
||||||
|
mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
|
||||||
|
[_('Unknown')])
|
||||||
|
opf = OPFCreator(os.path.abspath('.'), mi)
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
def href(x):
|
||||||
|
if len(comics) == 1: return os.path.basename(x)
|
||||||
|
return '/'.join(x.split(os.sep)[-2:])
|
||||||
|
|
||||||
|
for comic in comics:
|
||||||
|
pages, wrappers = comic[1:]
|
||||||
|
entries += [(w, None) for w in map(href, wrappers)] + \
|
||||||
|
[(x, None) for x in map(href, pages)]
|
||||||
|
opf.create_manifest(entries)
|
||||||
|
spine = []
|
||||||
|
for comic in comics:
|
||||||
|
spine.extend(map(href, comic[2]))
|
||||||
|
self._images = []
|
||||||
|
for comic in comics:
|
||||||
|
self._images.extend(comic[1])
|
||||||
|
opf.create_spine(spine)
|
||||||
|
toc = TOC()
|
||||||
|
if len(comics) == 1:
|
||||||
|
wrappers = comics[0][2]
|
||||||
|
for i, x in enumerate(wrappers):
|
||||||
|
toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
|
||||||
|
play_order=i)
|
||||||
|
else:
|
||||||
|
po = 0
|
||||||
|
for comic in comics:
|
||||||
|
po += 1
|
||||||
|
wrappers = comic[2]
|
||||||
|
stoc = toc.add_item(href(wrappers[0]),
|
||||||
|
None, comic[0], play_order=po)
|
||||||
|
if not opts.dont_add_comic_pages_to_toc:
|
||||||
|
for i, x in enumerate(wrappers):
|
||||||
|
stoc.add_item(href(x), None,
|
||||||
|
_('Page')+' %d'%(i+1), play_order=po)
|
||||||
|
po += 1
|
||||||
|
opf.set_toc(toc)
|
||||||
|
m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
|
||||||
|
opf.render(m, n, 'toc.ncx')
|
||||||
|
return os.path.abspath('metadata.opf')
|
||||||
|
|
||||||
|
def create_wrappers(self, pages):
|
||||||
|
from calibre.ebooks.oeb.base import XHTML_NS
|
||||||
|
wrappers = []
|
||||||
|
WRAPPER = textwrap.dedent('''\
|
||||||
|
<html xmlns="%s">
|
||||||
|
<head>
|
||||||
|
<title>Page #%d</title>
|
||||||
|
<style type="text/css">
|
||||||
|
@page { margin:0pt; padding: 0pt}
|
||||||
|
body { margin: 0pt; padding: 0pt}
|
||||||
|
div { text-align: center }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div>
|
||||||
|
<img src="%s" alt="comic page #%d" />
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
''')
|
||||||
|
dir = os.path.dirname(pages[0])
|
||||||
|
for i, page in enumerate(pages):
|
||||||
|
wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
|
||||||
|
page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
|
||||||
|
open(page, 'wb').write(wrapper)
|
||||||
|
wrappers.append(page)
|
||||||
|
return wrappers
|
||||||
|
|
@ -12,7 +12,6 @@ from subprocess import Popen, PIPE
|
|||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from calibre.ebooks.txt.processor import convert_basic
|
|
||||||
|
|
||||||
class DJVUInput(InputFormatPlugin):
|
class DJVUInput(InputFormatPlugin):
|
||||||
|
|
||||||
@ -28,6 +27,8 @@ class DJVUInput(InputFormatPlugin):
|
|||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log, accelerators):
|
def convert(self, stream, options, file_ext, log, accelerators):
|
||||||
|
from calibre.ebooks.txt.processor import convert_basic
|
||||||
|
|
||||||
stdout = StringIO()
|
stdout = StringIO()
|
||||||
ppdjvu = True
|
ppdjvu = True
|
||||||
# using djvutxt is MUCH faster, should make it an option
|
# using djvutxt is MUCH faster, should make it an option
|
@ -3,11 +3,9 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, uuid
|
import os
|
||||||
from itertools import cycle
|
from itertools import cycle
|
||||||
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
|
|
||||||
class EPUBInput(InputFormatPlugin):
|
class EPUBInput(InputFormatPlugin):
|
||||||
@ -30,6 +28,8 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
f.write(raw[1024:])
|
f.write(raw[1024:])
|
||||||
|
|
||||||
def process_encryption(self, encfile, opf, log):
|
def process_encryption(self, encfile, opf, log):
|
||||||
|
from lxml import etree
|
||||||
|
import uuid
|
||||||
key = None
|
key = None
|
||||||
for item in opf.identifier_iter():
|
for item in opf.identifier_iter():
|
||||||
scheme = None
|
scheme = None
|
||||||
@ -65,6 +65,7 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def rationalize_cover(self, opf, log):
|
def rationalize_cover(self, opf, log):
|
||||||
|
from lxml import etree
|
||||||
guide_cover, guide_elem = None, None
|
guide_cover, guide_elem = None, None
|
||||||
for guide_elem in opf.iterguide():
|
for guide_elem in opf.iterguide():
|
||||||
if guide_elem.get('type', '').lower() == 'cover':
|
if guide_elem.get('type', '').lower() == 'cover':
|
||||||
@ -110,6 +111,7 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
renderer)
|
renderer)
|
||||||
|
|
||||||
def find_opf(self):
|
def find_opf(self):
|
||||||
|
from lxml import etree
|
||||||
def attr(n, attr):
|
def attr(n, attr):
|
||||||
for k, v in n.attrib.items():
|
for k, v in n.attrib.items():
|
||||||
if k.endswith(attr):
|
if k.endswith(attr):
|
@ -6,7 +6,6 @@ Convert .fb2 files to .lrf
|
|||||||
"""
|
"""
|
||||||
import os, re
|
import os, re
|
||||||
from base64 import b64decode
|
from base64 import b64decode
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from calibre import guess_type
|
from calibre import guess_type
|
||||||
@ -38,6 +37,7 @@ class FB2Input(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
from lxml import etree
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
|
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
|
283
src/calibre/ebooks/conversion/plugins/html_input.py
Normal file
283
src/calibre/ebooks/conversion/plugins/html_input.py
Normal file
@ -0,0 +1,283 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re, tempfile, os
|
||||||
|
from functools import partial
|
||||||
|
from itertools import izip
|
||||||
|
from urllib import quote
|
||||||
|
|
||||||
|
from calibre.constants import islinux, isbsd
|
||||||
|
from calibre.customize.conversion import (InputFormatPlugin,
|
||||||
|
OptionRecommendation)
|
||||||
|
from calibre.utils.localization import get_lang
|
||||||
|
from calibre.utils.filenames import ascii_filename
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'HTML Input'
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
description = 'Convert HTML and OPF files to an OEB'
|
||||||
|
file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
OptionRecommendation(name='breadth_first',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Traverse links in HTML files breadth first. Normally, '
|
||||||
|
'they are traversed depth first.'
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
OptionRecommendation(name='max_levels',
|
||||||
|
recommended_value=5, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Maximum levels of recursion when following links in '
|
||||||
|
'HTML files. Must be non-negative. 0 implies that no '
|
||||||
|
'links in the root HTML file are followed. Default is '
|
||||||
|
'%default.'
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
OptionRecommendation(name='dont_package',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Normally this input plugin re-arranges all the input '
|
||||||
|
'files into a standard folder hierarchy. Only use this option '
|
||||||
|
'if you know what you are doing as it can result in various '
|
||||||
|
'nasty side effects in the rest of the conversion pipeline.'
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
])
|
||||||
|
|
||||||
|
def convert(self, stream, opts, file_ext, log,
|
||||||
|
accelerators):
|
||||||
|
self._is_case_sensitive = None
|
||||||
|
basedir = os.getcwd()
|
||||||
|
self.opts = opts
|
||||||
|
|
||||||
|
fname = None
|
||||||
|
if hasattr(stream, 'name'):
|
||||||
|
basedir = os.path.dirname(stream.name)
|
||||||
|
fname = os.path.basename(stream.name)
|
||||||
|
|
||||||
|
if file_ext != 'opf':
|
||||||
|
if opts.dont_package:
|
||||||
|
raise ValueError('The --dont-package option is not supported for an HTML input file')
|
||||||
|
from calibre.ebooks.metadata.html import get_metadata
|
||||||
|
mi = get_metadata(stream)
|
||||||
|
if fname:
|
||||||
|
from calibre.ebooks.metadata.meta import metadata_from_filename
|
||||||
|
fmi = metadata_from_filename(fname)
|
||||||
|
fmi.smart_update(mi)
|
||||||
|
mi = fmi
|
||||||
|
oeb = self.create_oebbook(stream.name, basedir, opts, log, mi)
|
||||||
|
return oeb
|
||||||
|
|
||||||
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
|
return create_oebbook(log, stream.name, opts,
|
||||||
|
encoding=opts.input_encoding)
|
||||||
|
|
||||||
|
def is_case_sensitive(self, path):
|
||||||
|
if getattr(self, '_is_case_sensitive', None) is not None:
|
||||||
|
return self._is_case_sensitive
|
||||||
|
if not path or not os.path.exists(path):
|
||||||
|
return islinux or isbsd
|
||||||
|
self._is_case_sensitive = not (os.path.exists(path.lower()) \
|
||||||
|
and os.path.exists(path.upper()))
|
||||||
|
return self._is_case_sensitive
|
||||||
|
|
||||||
|
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
|
||||||
|
import uuid
|
||||||
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
|
from calibre.ebooks.oeb.base import (DirContainer,
|
||||||
|
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
|
||||||
|
xpath)
|
||||||
|
from calibre import guess_type
|
||||||
|
from calibre.ebooks.oeb.transforms.metadata import \
|
||||||
|
meta_info_to_oeb_metadata
|
||||||
|
from calibre.ebooks.html.input import get_filelist
|
||||||
|
import cssutils, logging
|
||||||
|
cssutils.log.setLevel(logging.WARN)
|
||||||
|
self.OEB_STYLES = OEB_STYLES
|
||||||
|
oeb = create_oebbook(log, None, opts, self,
|
||||||
|
encoding=opts.input_encoding, populate=False)
|
||||||
|
self.oeb = oeb
|
||||||
|
|
||||||
|
metadata = oeb.metadata
|
||||||
|
meta_info_to_oeb_metadata(mi, metadata, log)
|
||||||
|
if not metadata.language:
|
||||||
|
oeb.logger.warn(u'Language not specified')
|
||||||
|
metadata.add('language', get_lang().replace('_', '-'))
|
||||||
|
if not metadata.creator:
|
||||||
|
oeb.logger.warn('Creator not specified')
|
||||||
|
metadata.add('creator', self.oeb.translate(__('Unknown')))
|
||||||
|
if not metadata.title:
|
||||||
|
oeb.logger.warn('Title not specified')
|
||||||
|
metadata.add('title', self.oeb.translate(__('Unknown')))
|
||||||
|
bookid = str(uuid.uuid4())
|
||||||
|
metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
|
||||||
|
for ident in metadata.identifier:
|
||||||
|
if 'id' in ident.attrib:
|
||||||
|
self.oeb.uid = metadata.identifier[0]
|
||||||
|
break
|
||||||
|
|
||||||
|
filelist = get_filelist(htmlpath, basedir, opts, log)
|
||||||
|
filelist = [f for f in filelist if not f.is_binary]
|
||||||
|
htmlfile_map = {}
|
||||||
|
for f in filelist:
|
||||||
|
path = f.path
|
||||||
|
oeb.container = DirContainer(os.path.dirname(path), log,
|
||||||
|
ignore_opf=True)
|
||||||
|
bname = os.path.basename(path)
|
||||||
|
id, href = oeb.manifest.generate(id='html',
|
||||||
|
href=ascii_filename(bname))
|
||||||
|
htmlfile_map[path] = href
|
||||||
|
item = oeb.manifest.add(id, href, 'text/html')
|
||||||
|
item.html_input_href = bname
|
||||||
|
oeb.spine.add(item, True)
|
||||||
|
|
||||||
|
self.added_resources = {}
|
||||||
|
self.log = log
|
||||||
|
self.log('Normalizing filename cases')
|
||||||
|
for path, href in htmlfile_map.items():
|
||||||
|
if not self.is_case_sensitive(path):
|
||||||
|
path = path.lower()
|
||||||
|
self.added_resources[path] = href
|
||||||
|
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
||||||
|
self.urldefrag = urldefrag
|
||||||
|
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
|
||||||
|
|
||||||
|
self.log('Rewriting HTML links')
|
||||||
|
for f in filelist:
|
||||||
|
path = f.path
|
||||||
|
dpath = os.path.dirname(path)
|
||||||
|
oeb.container = DirContainer(dpath, log, ignore_opf=True)
|
||||||
|
item = oeb.manifest.hrefs[htmlfile_map[path]]
|
||||||
|
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
|
||||||
|
|
||||||
|
for item in oeb.manifest.values():
|
||||||
|
if item.media_type in self.OEB_STYLES:
|
||||||
|
dpath = None
|
||||||
|
for path, href in self.added_resources.items():
|
||||||
|
if href == item.href:
|
||||||
|
dpath = os.path.dirname(path)
|
||||||
|
break
|
||||||
|
cssutils.replaceUrls(item.data,
|
||||||
|
partial(self.resource_adder, base=dpath))
|
||||||
|
|
||||||
|
toc = self.oeb.toc
|
||||||
|
self.oeb.auto_generated_toc = True
|
||||||
|
titles = []
|
||||||
|
headers = []
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
if not item.linear: continue
|
||||||
|
html = item.data
|
||||||
|
title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
|
||||||
|
title = re.sub(r'\s+', ' ', title.strip())
|
||||||
|
if title:
|
||||||
|
titles.append(title)
|
||||||
|
headers.append('(unlabled)')
|
||||||
|
for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
|
||||||
|
expr = '/h:html/h:body//h:%s[position()=1]/text()'
|
||||||
|
header = ''.join(xpath(html, expr % tag))
|
||||||
|
header = re.sub(r'\s+', ' ', header.strip())
|
||||||
|
if header:
|
||||||
|
headers[-1] = header
|
||||||
|
break
|
||||||
|
use = titles
|
||||||
|
if len(titles) > len(set(titles)):
|
||||||
|
use = headers
|
||||||
|
for title, item in izip(use, self.oeb.spine):
|
||||||
|
if not item.linear: continue
|
||||||
|
toc.add(title, item.href)
|
||||||
|
|
||||||
|
oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
|
||||||
|
return oeb
|
||||||
|
|
||||||
|
def link_to_local_path(self, link_, base=None):
|
||||||
|
from calibre.ebooks.html.input import Link
|
||||||
|
if not isinstance(link_, unicode):
|
||||||
|
try:
|
||||||
|
link_ = link_.decode('utf-8', 'error')
|
||||||
|
except:
|
||||||
|
self.log.warn('Failed to decode link %r. Ignoring'%link_)
|
||||||
|
return None, None
|
||||||
|
try:
|
||||||
|
l = Link(link_, base if base else os.getcwdu())
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to process link: %r'%link_)
|
||||||
|
return None, None
|
||||||
|
if l.path is None:
|
||||||
|
# Not a local resource
|
||||||
|
return None, None
|
||||||
|
link = l.path.replace('/', os.sep).strip()
|
||||||
|
frag = l.fragment
|
||||||
|
if not link:
|
||||||
|
return None, None
|
||||||
|
return link, frag
|
||||||
|
|
||||||
|
def resource_adder(self, link_, base=None):
|
||||||
|
link, frag = self.link_to_local_path(link_, base=base)
|
||||||
|
if link is None:
|
||||||
|
return link_
|
||||||
|
try:
|
||||||
|
if base and not os.path.isabs(link):
|
||||||
|
link = os.path.join(base, link)
|
||||||
|
link = os.path.abspath(link)
|
||||||
|
except:
|
||||||
|
return link_
|
||||||
|
if not os.access(link, os.R_OK):
|
||||||
|
return link_
|
||||||
|
if os.path.isdir(link):
|
||||||
|
self.log.warn(link_, 'is a link to a directory. Ignoring.')
|
||||||
|
return link_
|
||||||
|
if not self.is_case_sensitive(tempfile.gettempdir()):
|
||||||
|
link = link.lower()
|
||||||
|
if link not in self.added_resources:
|
||||||
|
bhref = os.path.basename(link)
|
||||||
|
id, href = self.oeb.manifest.generate(id='added',
|
||||||
|
href=bhref)
|
||||||
|
guessed = self.guess_type(href)[0]
|
||||||
|
media_type = guessed or self.BINARY_MIME
|
||||||
|
if media_type == 'text/plain':
|
||||||
|
self.log.warn('Ignoring link to text file %r'%link_)
|
||||||
|
return None
|
||||||
|
|
||||||
|
self.oeb.log.debug('Added', link)
|
||||||
|
self.oeb.container = self.DirContainer(os.path.dirname(link),
|
||||||
|
self.oeb.log, ignore_opf=True)
|
||||||
|
# Load into memory
|
||||||
|
item = self.oeb.manifest.add(id, href, media_type)
|
||||||
|
# bhref refers to an already existing file. The read() method of
|
||||||
|
# DirContainer will call unquote on it before trying to read the
|
||||||
|
# file, therefore we quote it here.
|
||||||
|
if isinstance(bhref, unicode):
|
||||||
|
bhref = bhref.encode('utf-8')
|
||||||
|
item.html_input_href = quote(bhref).decode('utf-8')
|
||||||
|
if guessed in self.OEB_STYLES:
|
||||||
|
item.override_css_fetch = partial(
|
||||||
|
self.css_import_handler, os.path.dirname(link))
|
||||||
|
item.data
|
||||||
|
self.added_resources[link] = href
|
||||||
|
|
||||||
|
nlink = self.added_resources[link]
|
||||||
|
if frag:
|
||||||
|
nlink = '#'.join((nlink, frag))
|
||||||
|
return nlink
|
||||||
|
|
||||||
|
def css_import_handler(self, base, href):
|
||||||
|
link, frag = self.link_to_local_path(href, base=base)
|
||||||
|
if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
|
||||||
|
return (None, None)
|
||||||
|
try:
|
||||||
|
raw = open(link, 'rb').read().decode('utf-8', 'replace')
|
||||||
|
raw = self.oeb.css_preprocessor(raw, add_namespace=True)
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to read CSS file: %r'%link)
|
||||||
|
return (None, None)
|
||||||
|
return (None, raw)
|
@ -10,9 +10,6 @@ import os
|
|||||||
|
|
||||||
from calibre import guess_type
|
from calibre import guess_type
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
|
||||||
from calibre.ebooks.metadata.opf2 import OPF
|
|
||||||
from calibre.utils.zipfile import ZipFile
|
|
||||||
|
|
||||||
class HTMLZInput(InputFormatPlugin):
|
class HTMLZInput(InputFormatPlugin):
|
||||||
|
|
||||||
@ -23,6 +20,10 @@ class HTMLZInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
self.log = log
|
self.log = log
|
||||||
html = u''
|
html = u''
|
||||||
top_levels = []
|
top_levels = []
|
87
src/calibre/ebooks/conversion/plugins/lrf_input.py
Normal file
87
src/calibre/ebooks/conversion/plugins/lrf_input.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, sys
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
|
||||||
|
class LRFInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'LRF Input'
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
description = 'Convert LRF files to HTML'
|
||||||
|
file_types = set(['lrf'])
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
accelerators):
|
||||||
|
from lxml import etree
|
||||||
|
from calibre.ebooks.lrf.input import (MediaType, Styles, TextBlock,
|
||||||
|
Canvas, ImageBlock, RuledLine)
|
||||||
|
self.log = log
|
||||||
|
self.log('Generating XML')
|
||||||
|
from calibre.ebooks.lrf.lrfparser import LRFDocument
|
||||||
|
d = LRFDocument(stream)
|
||||||
|
d.parse()
|
||||||
|
xml = d.to_xml(write_files=True)
|
||||||
|
if options.verbose > 2:
|
||||||
|
open('lrs.xml', 'wb').write(xml.encode('utf-8'))
|
||||||
|
parser = etree.XMLParser(no_network=True, huge_tree=True)
|
||||||
|
try:
|
||||||
|
doc = etree.fromstring(xml, parser=parser)
|
||||||
|
except:
|
||||||
|
self.log.warn('Failed to parse XML. Trying to recover')
|
||||||
|
parser = etree.XMLParser(no_network=True, huge_tree=True,
|
||||||
|
recover=True)
|
||||||
|
doc = etree.fromstring(xml, parser=parser)
|
||||||
|
|
||||||
|
|
||||||
|
char_button_map = {}
|
||||||
|
for x in doc.xpath('//CharButton[@refobj]'):
|
||||||
|
ro = x.get('refobj')
|
||||||
|
jump_button = doc.xpath('//*[@objid="%s"]'%ro)
|
||||||
|
if jump_button:
|
||||||
|
jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
|
||||||
|
if jump_to:
|
||||||
|
char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
|
||||||
|
jump_to[0].get('refobj'))
|
||||||
|
plot_map = {}
|
||||||
|
for x in doc.xpath('//Plot[@refobj]'):
|
||||||
|
ro = x.get('refobj')
|
||||||
|
image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
|
||||||
|
if image:
|
||||||
|
imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
|
||||||
|
image[0].get('refstream'))
|
||||||
|
if imgstr:
|
||||||
|
plot_map[ro] = imgstr[0].get('file')
|
||||||
|
|
||||||
|
self.log('Converting XML to HTML...')
|
||||||
|
styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
|
||||||
|
media_type = MediaType()
|
||||||
|
styles = Styles()
|
||||||
|
text_block = TextBlock(styles, char_button_map, plot_map, log)
|
||||||
|
canvas = Canvas(doc, styles, text_block, log)
|
||||||
|
image_block = ImageBlock(canvas)
|
||||||
|
ruled_line = RuledLine()
|
||||||
|
extensions = {
|
||||||
|
('calibre', 'media-type') : media_type,
|
||||||
|
('calibre', 'text-block') : text_block,
|
||||||
|
('calibre', 'ruled-line') : ruled_line,
|
||||||
|
('calibre', 'styles') : styles,
|
||||||
|
('calibre', 'canvas') : canvas,
|
||||||
|
('calibre', 'image-block'): image_block,
|
||||||
|
}
|
||||||
|
transform = etree.XSLT(styledoc, extensions=extensions)
|
||||||
|
try:
|
||||||
|
result = transform(doc)
|
||||||
|
except RuntimeError:
|
||||||
|
sys.setrecursionlimit(5000)
|
||||||
|
result = transform(doc)
|
||||||
|
|
||||||
|
with open('content.opf', 'wb') as f:
|
||||||
|
f.write(result)
|
||||||
|
styles.write()
|
||||||
|
return os.path.abspath('content.opf')
|
25
src/calibre/ebooks/conversion/plugins/odt_input.py
Normal file
25
src/calibre/ebooks/conversion/plugins/odt_input.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Convert an ODT file into a Open Ebook
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
|
||||||
|
class ODTInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'ODT Input'
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
description = 'Convert ODT (OpenOffice) files to HTML'
|
||||||
|
file_types = set(['odt'])
|
||||||
|
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
accelerators):
|
||||||
|
from calibre.ebooks.odt.input import Extract
|
||||||
|
return Extract()(stream, '.', log)
|
||||||
|
|
||||||
|
|
@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
|
||||||
from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
|
|
||||||
|
|
||||||
class PDBInput(InputFormatPlugin):
|
class PDBInput(InputFormatPlugin):
|
||||||
|
|
||||||
@ -19,6 +17,9 @@ class PDBInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||||
|
from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
|
||||||
|
|
||||||
header = PdbHeaderReader(stream)
|
header = PdbHeaderReader(stream)
|
||||||
Reader = get_reader(header.ident)
|
Reader = get_reader(header.ident)
|
||||||
|
|
@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from calibre.ebooks.pdf.pdftohtml import pdftohtml
|
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
|
||||||
from calibre.constants import plugins
|
from calibre.constants import plugins
|
||||||
pdfreflow, pdfreflow_err = plugins['pdfreflow']
|
pdfreflow, pdfreflow_err = plugins['pdfreflow']
|
||||||
|
|
||||||
@ -43,6 +41,9 @@ class PDFInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
from calibre.ebooks.pdf.pdftohtml import pdftohtml
|
||||||
|
|
||||||
log.debug('Converting file to html...')
|
log.debug('Converting file to html...')
|
||||||
# The main html file will be named index.html
|
# The main html file will be named index.html
|
||||||
self.opts, self.log = options, log
|
self.opts, self.log = options, log
|
@ -11,9 +11,6 @@ import shutil
|
|||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
|
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
|
||||||
|
|
||||||
class PMLInput(InputFormatPlugin):
|
class PMLInput(InputFormatPlugin):
|
||||||
|
|
||||||
@ -24,6 +21,8 @@ class PMLInput(InputFormatPlugin):
|
|||||||
file_types = set(['pml', 'pmlz'])
|
file_types = set(['pml', 'pmlz'])
|
||||||
|
|
||||||
def process_pml(self, pml_path, html_path, close_all=False):
|
def process_pml(self, pml_path, html_path, close_all=False):
|
||||||
|
from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
|
||||||
|
|
||||||
pclose = False
|
pclose = False
|
||||||
hclose = False
|
hclose = False
|
||||||
|
|
||||||
@ -85,6 +84,9 @@ class PMLInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
|
||||||
self.options = options
|
self.options = options
|
||||||
self.log = log
|
self.log = log
|
||||||
pages, images = [], []
|
pages, images = [], []
|
@ -6,7 +6,6 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from calibre.ebooks.rb.reader import Reader
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
|
||||||
class RBInput(InputFormatPlugin):
|
class RBInput(InputFormatPlugin):
|
||||||
@ -18,6 +17,8 @@ class RBInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
from calibre.ebooks.rb.reader import Reader
|
||||||
|
|
||||||
reader = Reader(stream, log, options.input_encoding)
|
reader = Reader(stream, log, options.input_encoding)
|
||||||
opf = reader.extract_content(os.getcwd())
|
opf = reader.extract_content(os.getcwd())
|
||||||
|
|
298
src/calibre/ebooks/conversion/plugins/rtf_input.py
Normal file
298
src/calibre/ebooks/conversion/plugins/rtf_input.py
Normal file
@ -0,0 +1,298 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
import os, glob, re, textwrap
|
||||||
|
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
|
||||||
|
border_style_map = {
|
||||||
|
'single' : 'solid',
|
||||||
|
'double-thickness-border' : 'double',
|
||||||
|
'shadowed-border': 'outset',
|
||||||
|
'double-border': 'double',
|
||||||
|
'dotted-border': 'dotted',
|
||||||
|
'dashed': 'dashed',
|
||||||
|
'hairline': 'solid',
|
||||||
|
'inset': 'inset',
|
||||||
|
'dash-small': 'dashed',
|
||||||
|
'dot-dash': 'dotted',
|
||||||
|
'dot-dot-dash': 'dotted',
|
||||||
|
'outset': 'outset',
|
||||||
|
'tripple': 'double',
|
||||||
|
'triple': 'double',
|
||||||
|
'thick-thin-small': 'solid',
|
||||||
|
'thin-thick-small': 'solid',
|
||||||
|
'thin-thick-thin-small': 'solid',
|
||||||
|
'thick-thin-medium': 'solid',
|
||||||
|
'thin-thick-medium': 'solid',
|
||||||
|
'thin-thick-thin-medium': 'solid',
|
||||||
|
'thick-thin-large': 'solid',
|
||||||
|
'thin-thick-thin-large': 'solid',
|
||||||
|
'wavy': 'ridge',
|
||||||
|
'double-wavy': 'ridge',
|
||||||
|
'striped': 'ridge',
|
||||||
|
'emboss': 'inset',
|
||||||
|
'engrave': 'inset',
|
||||||
|
'frame': 'ridge',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RTFInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'RTF Input'
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
description = 'Convert RTF files to HTML'
|
||||||
|
file_types = set(['rtf'])
|
||||||
|
|
||||||
|
def generate_xml(self, stream):
|
||||||
|
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
||||||
|
ofile = 'dataxml.xml'
|
||||||
|
run_lev, debug_dir, indent_out = 1, None, 0
|
||||||
|
if getattr(self.opts, 'debug_pipeline', None) is not None:
|
||||||
|
try:
|
||||||
|
os.mkdir('rtfdebug')
|
||||||
|
debug_dir = 'rtfdebug'
|
||||||
|
run_lev = 4
|
||||||
|
indent_out = 1
|
||||||
|
self.log('Running RTFParser in debug mode')
|
||||||
|
except:
|
||||||
|
self.log.warn('Impossible to run RTFParser in debug mode')
|
||||||
|
parser = ParseRtf(
|
||||||
|
in_file = stream,
|
||||||
|
out_file = ofile,
|
||||||
|
# Convert symbol fonts to unicode equivalents. Default
|
||||||
|
# is 1
|
||||||
|
convert_symbol = 1,
|
||||||
|
|
||||||
|
# Convert Zapf fonts to unicode equivalents. Default
|
||||||
|
# is 1.
|
||||||
|
convert_zapf = 1,
|
||||||
|
|
||||||
|
# Convert Wingding fonts to unicode equivalents.
|
||||||
|
# Default is 1.
|
||||||
|
convert_wingdings = 1,
|
||||||
|
|
||||||
|
# Convert RTF caps to real caps.
|
||||||
|
# Default is 1.
|
||||||
|
convert_caps = 1,
|
||||||
|
|
||||||
|
# Indent resulting XML.
|
||||||
|
# Default is 0 (no indent).
|
||||||
|
indent = indent_out,
|
||||||
|
|
||||||
|
# Form lists from RTF. Default is 1.
|
||||||
|
form_lists = 1,
|
||||||
|
|
||||||
|
# Convert headings to sections. Default is 0.
|
||||||
|
headings_to_sections = 1,
|
||||||
|
|
||||||
|
# Group paragraphs with the same style name. Default is 1.
|
||||||
|
group_styles = 1,
|
||||||
|
|
||||||
|
# Group borders. Default is 1.
|
||||||
|
group_borders = 1,
|
||||||
|
|
||||||
|
# Write or do not write paragraphs. Default is 0.
|
||||||
|
empty_paragraphs = 1,
|
||||||
|
|
||||||
|
#debug
|
||||||
|
deb_dir = debug_dir,
|
||||||
|
run_level = run_lev,
|
||||||
|
)
|
||||||
|
parser.parse_rtf()
|
||||||
|
with open(ofile, 'rb') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
def extract_images(self, picts):
|
||||||
|
import imghdr
|
||||||
|
self.log('Extracting images...')
|
||||||
|
|
||||||
|
with open(picts, 'rb') as f:
|
||||||
|
raw = f.read()
|
||||||
|
picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
|
||||||
|
hex = re.compile(r'[^a-fA-F0-9]')
|
||||||
|
encs = [hex.sub('', pict) for pict in picts]
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
imap = {}
|
||||||
|
for enc in encs:
|
||||||
|
if len(enc) % 2 == 1:
|
||||||
|
enc = enc[:-1]
|
||||||
|
data = enc.decode('hex')
|
||||||
|
fmt = imghdr.what(None, data)
|
||||||
|
if fmt is None:
|
||||||
|
fmt = 'wmf'
|
||||||
|
count += 1
|
||||||
|
name = '%04d.%s' % (count, fmt)
|
||||||
|
with open(name, 'wb') as f:
|
||||||
|
f.write(data)
|
||||||
|
imap[count] = name
|
||||||
|
# with open(name+'.hex', 'wb') as f:
|
||||||
|
# f.write(enc)
|
||||||
|
return self.convert_images(imap)
|
||||||
|
|
||||||
|
def convert_images(self, imap):
|
||||||
|
self.default_img = None
|
||||||
|
for count, val in imap.iteritems():
|
||||||
|
try:
|
||||||
|
imap[count] = self.convert_image(val)
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to convert', val)
|
||||||
|
return imap
|
||||||
|
|
||||||
|
def convert_image(self, name):
|
||||||
|
if not name.endswith('.wmf'):
|
||||||
|
return name
|
||||||
|
try:
|
||||||
|
return self.rasterize_wmf(name)
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to convert WMF image %r'%name)
|
||||||
|
return self.replace_wmf(name)
|
||||||
|
|
||||||
|
def replace_wmf(self, name):
|
||||||
|
from calibre.ebooks import calibre_cover
|
||||||
|
if self.default_img is None:
|
||||||
|
self.default_img = calibre_cover('Conversion of WMF images is not supported',
|
||||||
|
'Use Microsoft Word or OpenOffice to save this RTF file'
|
||||||
|
' as HTML and convert that in calibre.', title_size=36,
|
||||||
|
author_size=20)
|
||||||
|
name = name.replace('.wmf', '.jpg')
|
||||||
|
with open(name, 'wb') as f:
|
||||||
|
f.write(self.default_img)
|
||||||
|
return name
|
||||||
|
|
||||||
|
def rasterize_wmf(self, name):
|
||||||
|
from calibre.utils.wmf.parse import wmf_unwrap
|
||||||
|
with open(name, 'rb') as f:
|
||||||
|
data = f.read()
|
||||||
|
data = wmf_unwrap(data)
|
||||||
|
name = name.replace('.wmf', '.png')
|
||||||
|
with open(name, 'wb') as f:
|
||||||
|
f.write(data)
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
def write_inline_css(self, ic, border_styles):
|
||||||
|
font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
|
||||||
|
enumerate(ic.font_sizes)]
|
||||||
|
color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
|
||||||
|
enumerate(ic.colors)]
|
||||||
|
css = textwrap.dedent('''
|
||||||
|
span.none {
|
||||||
|
text-decoration: none; font-weight: normal;
|
||||||
|
font-style: normal; font-variant: normal
|
||||||
|
}
|
||||||
|
|
||||||
|
span.italics { font-style: italic }
|
||||||
|
|
||||||
|
span.bold { font-weight: bold }
|
||||||
|
|
||||||
|
span.small-caps { font-variant: small-caps }
|
||||||
|
|
||||||
|
span.underlined { text-decoration: underline }
|
||||||
|
|
||||||
|
span.strike-through { text-decoration: line-through }
|
||||||
|
|
||||||
|
''')
|
||||||
|
css += '\n'+'\n'.join(font_size_classes)
|
||||||
|
css += '\n' +'\n'.join(color_classes)
|
||||||
|
|
||||||
|
for cls, val in border_styles.iteritems():
|
||||||
|
css += '\n\n.%s {\n%s\n}'%(cls, val)
|
||||||
|
|
||||||
|
with open('styles.css', 'ab') as f:
|
||||||
|
f.write(css)
|
||||||
|
|
||||||
|
def convert_borders(self, doc):
|
||||||
|
border_styles = []
|
||||||
|
style_map = {}
|
||||||
|
for elem in doc.xpath(r'//*[local-name()="cell"]'):
|
||||||
|
style = ['border-style: hidden', 'border-width: 1px',
|
||||||
|
'border-color: black']
|
||||||
|
for x in ('bottom', 'top', 'left', 'right'):
|
||||||
|
bs = elem.get('border-cell-%s-style'%x, None)
|
||||||
|
if bs:
|
||||||
|
cbs = border_style_map.get(bs, 'solid')
|
||||||
|
style.append('border-%s-style: %s'%(x, cbs))
|
||||||
|
bw = elem.get('border-cell-%s-line-width'%x, None)
|
||||||
|
if bw:
|
||||||
|
style.append('border-%s-width: %spt'%(x, bw))
|
||||||
|
bc = elem.get('border-cell-%s-color'%x, None)
|
||||||
|
if bc:
|
||||||
|
style.append('border-%s-color: %s'%(x, bc))
|
||||||
|
style = ';\n'.join(style)
|
||||||
|
if style not in border_styles:
|
||||||
|
border_styles.append(style)
|
||||||
|
idx = border_styles.index(style)
|
||||||
|
cls = 'border_style%d'%idx
|
||||||
|
style_map[cls] = style
|
||||||
|
elem.set('class', cls)
|
||||||
|
return style_map
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
accelerators):
|
||||||
|
from lxml import etree
|
||||||
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
|
||||||
|
from calibre.ebooks.rtf.input import InlineClass
|
||||||
|
self.opts = options
|
||||||
|
self.log = log
|
||||||
|
self.log('Converting RTF to XML...')
|
||||||
|
try:
|
||||||
|
xml = self.generate_xml(stream.name)
|
||||||
|
except RtfInvalidCodeException as e:
|
||||||
|
raise ValueError(_('This RTF file has a feature calibre does not '
|
||||||
|
'support. Convert it to HTML first and then try it.\n%s')%e)
|
||||||
|
|
||||||
|
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
|
||||||
|
if d:
|
||||||
|
imap = {}
|
||||||
|
try:
|
||||||
|
imap = self.extract_images(d[0])
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to extract images...')
|
||||||
|
|
||||||
|
self.log('Parsing XML...')
|
||||||
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
|
doc = etree.fromstring(xml, parser=parser)
|
||||||
|
border_styles = self.convert_borders(doc)
|
||||||
|
for pict in doc.xpath('//rtf:pict[@num]',
|
||||||
|
namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
|
||||||
|
num = int(pict.get('num'))
|
||||||
|
name = imap.get(num, None)
|
||||||
|
if name is not None:
|
||||||
|
pict.set('num', name)
|
||||||
|
|
||||||
|
self.log('Converting XML to HTML...')
|
||||||
|
inline_class = InlineClass(self.log)
|
||||||
|
styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
|
||||||
|
extensions = { ('calibre', 'inline-class') : inline_class }
|
||||||
|
transform = etree.XSLT(styledoc, extensions=extensions)
|
||||||
|
result = transform(doc)
|
||||||
|
html = 'index.xhtml'
|
||||||
|
with open(html, 'wb') as f:
|
||||||
|
res = transform.tostring(result)
|
||||||
|
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||||
|
#clean multiple \n
|
||||||
|
res = re.sub('\n+', '\n', res)
|
||||||
|
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
|
||||||
|
# res = re.sub('\s*<body>', '<body>', res)
|
||||||
|
# res = re.sub('(?<=\n)\n{2}',
|
||||||
|
# u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
||||||
|
f.write(res)
|
||||||
|
self.write_inline_css(inline_class, border_styles)
|
||||||
|
stream.seek(0)
|
||||||
|
mi = get_metadata(stream, 'rtf')
|
||||||
|
if not mi.title:
|
||||||
|
mi.title = _('Unknown')
|
||||||
|
if not mi.authors:
|
||||||
|
mi.authors = [_('Unknown')]
|
||||||
|
opf = OPFCreator(os.getcwd(), mi)
|
||||||
|
opf.create_manifest([('index.xhtml', None)])
|
||||||
|
opf.create_spine(['index.xhtml'])
|
||||||
|
opf.render(open('metadata.opf', 'wb'))
|
||||||
|
return os.path.abspath('metadata.opf')
|
||||||
|
|
||||||
|
|
@ -4,13 +4,11 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
|
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, uuid
|
import os
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ebooks.snb.snbfile import SNBFile
|
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
|
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
|
||||||
|
|
||||||
@ -29,7 +27,12 @@ class SNBInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
import uuid
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import DirContainer
|
from calibre.ebooks.oeb.base import DirContainer
|
||||||
|
from calibre.ebooks.snb.snbfile import SNBFile
|
||||||
|
|
||||||
log.debug("Parsing SNB file...")
|
log.debug("Parsing SNB file...")
|
||||||
snbFile = SNBFile()
|
snbFile = SNBFile()
|
||||||
try:
|
try:
|
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
|
|||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ebooks.compression.tcr import decompress
|
|
||||||
|
|
||||||
class TCRInput(InputFormatPlugin):
|
class TCRInput(InputFormatPlugin):
|
||||||
|
|
||||||
@ -17,6 +16,8 @@ class TCRInput(InputFormatPlugin):
|
|||||||
file_types = set(['tcr'])
|
file_types = set(['tcr'])
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log, accelerators):
|
def convert(self, stream, options, file_ext, log, accelerators):
|
||||||
|
from calibre.ebooks.compression.tcr import decompress
|
||||||
|
|
||||||
log.info('Decompressing text...')
|
log.info('Decompressing text...')
|
||||||
raw_txt = decompress(stream)
|
raw_txt = decompress(stream)
|
||||||
|
|
@ -8,14 +8,6 @@ import os
|
|||||||
|
|
||||||
from calibre import _ent_pat, walk, xml_entity_to_unicode
|
from calibre import _ent_pat, walk, xml_entity_to_unicode
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
|
||||||
from calibre.ebooks.chardet import detect
|
|
||||||
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
|
||||||
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
|
|
||||||
preserve_spaces, detect_paragraph_type, detect_formatting_type, \
|
|
||||||
normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \
|
|
||||||
separate_hard_scene_breaks
|
|
||||||
from calibre.utils.zipfile import ZipFile
|
|
||||||
|
|
||||||
class TXTInput(InputFormatPlugin):
|
class TXTInput(InputFormatPlugin):
|
||||||
|
|
||||||
@ -61,6 +53,17 @@ class TXTInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
||||||
|
from calibre.ebooks.chardet import detect
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
from calibre.ebooks.txt.processor import (convert_basic,
|
||||||
|
convert_markdown, separate_paragraphs_single_line,
|
||||||
|
separate_paragraphs_print_formatted, preserve_spaces,
|
||||||
|
detect_paragraph_type, detect_formatting_type,
|
||||||
|
normalize_line_endings, convert_textile, remove_indents,
|
||||||
|
block_to_single_line, separate_hard_scene_breaks)
|
||||||
|
|
||||||
|
|
||||||
self.log = log
|
self.log = log
|
||||||
txt = ''
|
txt = ''
|
||||||
log.debug('Reading text from file...')
|
log.debug('Reading text from file...')
|
@ -11,19 +11,13 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Input plugin for HTML or OPF ebooks.
|
Input plugin for HTML or OPF ebooks.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os, re, sys, uuid, tempfile, errno as gerrno
|
import os, re, sys, errno as gerrno
|
||||||
from urlparse import urlparse, urlunparse
|
from urlparse import urlparse, urlunparse
|
||||||
from urllib import unquote, quote
|
from urllib import unquote
|
||||||
from functools import partial
|
|
||||||
from itertools import izip
|
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
|
||||||
from calibre.ebooks.chardet import detect_xml_encoding
|
from calibre.ebooks.chardet import detect_xml_encoding
|
||||||
from calibre.customize.conversion import OptionRecommendation
|
from calibre.constants import iswindows
|
||||||
from calibre.constants import islinux, isbsd, iswindows
|
|
||||||
from calibre import unicode_path, as_unicode
|
from calibre import unicode_path, as_unicode
|
||||||
from calibre.utils.localization import get_lang
|
|
||||||
from calibre.utils.filenames import ascii_filename
|
|
||||||
|
|
||||||
class Link(object):
|
class Link(object):
|
||||||
'''
|
'''
|
||||||
@ -241,262 +235,4 @@ def get_filelist(htmlfile, dir, opts, log):
|
|||||||
return filelist
|
return filelist
|
||||||
|
|
||||||
|
|
||||||
class HTMLInput(InputFormatPlugin):
|
|
||||||
|
|
||||||
name = 'HTML Input'
|
|
||||||
author = 'Kovid Goyal'
|
|
||||||
description = 'Convert HTML and OPF files to an OEB'
|
|
||||||
file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
|
|
||||||
|
|
||||||
options = set([
|
|
||||||
OptionRecommendation(name='breadth_first',
|
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
|
||||||
help=_('Traverse links in HTML files breadth first. Normally, '
|
|
||||||
'they are traversed depth first.'
|
|
||||||
)
|
|
||||||
),
|
|
||||||
|
|
||||||
OptionRecommendation(name='max_levels',
|
|
||||||
recommended_value=5, level=OptionRecommendation.LOW,
|
|
||||||
help=_('Maximum levels of recursion when following links in '
|
|
||||||
'HTML files. Must be non-negative. 0 implies that no '
|
|
||||||
'links in the root HTML file are followed. Default is '
|
|
||||||
'%default.'
|
|
||||||
)
|
|
||||||
),
|
|
||||||
|
|
||||||
OptionRecommendation(name='dont_package',
|
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
|
||||||
help=_('Normally this input plugin re-arranges all the input '
|
|
||||||
'files into a standard folder hierarchy. Only use this option '
|
|
||||||
'if you know what you are doing as it can result in various '
|
|
||||||
'nasty side effects in the rest of the conversion pipeline.'
|
|
||||||
)
|
|
||||||
),
|
|
||||||
|
|
||||||
])
|
|
||||||
|
|
||||||
def convert(self, stream, opts, file_ext, log,
|
|
||||||
accelerators):
|
|
||||||
self._is_case_sensitive = None
|
|
||||||
basedir = os.getcwd()
|
|
||||||
self.opts = opts
|
|
||||||
|
|
||||||
fname = None
|
|
||||||
if hasattr(stream, 'name'):
|
|
||||||
basedir = os.path.dirname(stream.name)
|
|
||||||
fname = os.path.basename(stream.name)
|
|
||||||
|
|
||||||
if file_ext != 'opf':
|
|
||||||
if opts.dont_package:
|
|
||||||
raise ValueError('The --dont-package option is not supported for an HTML input file')
|
|
||||||
from calibre.ebooks.metadata.html import get_metadata
|
|
||||||
mi = get_metadata(stream)
|
|
||||||
if fname:
|
|
||||||
from calibre.ebooks.metadata.meta import metadata_from_filename
|
|
||||||
fmi = metadata_from_filename(fname)
|
|
||||||
fmi.smart_update(mi)
|
|
||||||
mi = fmi
|
|
||||||
oeb = self.create_oebbook(stream.name, basedir, opts, log, mi)
|
|
||||||
return oeb
|
|
||||||
|
|
||||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
|
||||||
return create_oebbook(log, stream.name, opts,
|
|
||||||
encoding=opts.input_encoding)
|
|
||||||
|
|
||||||
def is_case_sensitive(self, path):
|
|
||||||
if getattr(self, '_is_case_sensitive', None) is not None:
|
|
||||||
return self._is_case_sensitive
|
|
||||||
if not path or not os.path.exists(path):
|
|
||||||
return islinux or isbsd
|
|
||||||
self._is_case_sensitive = not (os.path.exists(path.lower()) \
|
|
||||||
and os.path.exists(path.upper()))
|
|
||||||
return self._is_case_sensitive
|
|
||||||
|
|
||||||
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
|
|
||||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
|
||||||
from calibre.ebooks.oeb.base import (DirContainer,
|
|
||||||
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
|
|
||||||
xpath)
|
|
||||||
from calibre import guess_type
|
|
||||||
from calibre.ebooks.oeb.transforms.metadata import \
|
|
||||||
meta_info_to_oeb_metadata
|
|
||||||
import cssutils, logging
|
|
||||||
cssutils.log.setLevel(logging.WARN)
|
|
||||||
self.OEB_STYLES = OEB_STYLES
|
|
||||||
oeb = create_oebbook(log, None, opts, self,
|
|
||||||
encoding=opts.input_encoding, populate=False)
|
|
||||||
self.oeb = oeb
|
|
||||||
|
|
||||||
metadata = oeb.metadata
|
|
||||||
meta_info_to_oeb_metadata(mi, metadata, log)
|
|
||||||
if not metadata.language:
|
|
||||||
oeb.logger.warn(u'Language not specified')
|
|
||||||
metadata.add('language', get_lang().replace('_', '-'))
|
|
||||||
if not metadata.creator:
|
|
||||||
oeb.logger.warn('Creator not specified')
|
|
||||||
metadata.add('creator', self.oeb.translate(__('Unknown')))
|
|
||||||
if not metadata.title:
|
|
||||||
oeb.logger.warn('Title not specified')
|
|
||||||
metadata.add('title', self.oeb.translate(__('Unknown')))
|
|
||||||
bookid = str(uuid.uuid4())
|
|
||||||
metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
|
|
||||||
for ident in metadata.identifier:
|
|
||||||
if 'id' in ident.attrib:
|
|
||||||
self.oeb.uid = metadata.identifier[0]
|
|
||||||
break
|
|
||||||
|
|
||||||
filelist = get_filelist(htmlpath, basedir, opts, log)
|
|
||||||
filelist = [f for f in filelist if not f.is_binary]
|
|
||||||
htmlfile_map = {}
|
|
||||||
for f in filelist:
|
|
||||||
path = f.path
|
|
||||||
oeb.container = DirContainer(os.path.dirname(path), log,
|
|
||||||
ignore_opf=True)
|
|
||||||
bname = os.path.basename(path)
|
|
||||||
id, href = oeb.manifest.generate(id='html',
|
|
||||||
href=ascii_filename(bname))
|
|
||||||
htmlfile_map[path] = href
|
|
||||||
item = oeb.manifest.add(id, href, 'text/html')
|
|
||||||
item.html_input_href = bname
|
|
||||||
oeb.spine.add(item, True)
|
|
||||||
|
|
||||||
self.added_resources = {}
|
|
||||||
self.log = log
|
|
||||||
self.log('Normalizing filename cases')
|
|
||||||
for path, href in htmlfile_map.items():
|
|
||||||
if not self.is_case_sensitive(path):
|
|
||||||
path = path.lower()
|
|
||||||
self.added_resources[path] = href
|
|
||||||
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
|
||||||
self.urldefrag = urldefrag
|
|
||||||
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
|
|
||||||
|
|
||||||
self.log('Rewriting HTML links')
|
|
||||||
for f in filelist:
|
|
||||||
path = f.path
|
|
||||||
dpath = os.path.dirname(path)
|
|
||||||
oeb.container = DirContainer(dpath, log, ignore_opf=True)
|
|
||||||
item = oeb.manifest.hrefs[htmlfile_map[path]]
|
|
||||||
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
|
|
||||||
|
|
||||||
for item in oeb.manifest.values():
|
|
||||||
if item.media_type in self.OEB_STYLES:
|
|
||||||
dpath = None
|
|
||||||
for path, href in self.added_resources.items():
|
|
||||||
if href == item.href:
|
|
||||||
dpath = os.path.dirname(path)
|
|
||||||
break
|
|
||||||
cssutils.replaceUrls(item.data,
|
|
||||||
partial(self.resource_adder, base=dpath))
|
|
||||||
|
|
||||||
toc = self.oeb.toc
|
|
||||||
self.oeb.auto_generated_toc = True
|
|
||||||
titles = []
|
|
||||||
headers = []
|
|
||||||
for item in self.oeb.spine:
|
|
||||||
if not item.linear: continue
|
|
||||||
html = item.data
|
|
||||||
title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
|
|
||||||
title = re.sub(r'\s+', ' ', title.strip())
|
|
||||||
if title:
|
|
||||||
titles.append(title)
|
|
||||||
headers.append('(unlabled)')
|
|
||||||
for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
|
|
||||||
expr = '/h:html/h:body//h:%s[position()=1]/text()'
|
|
||||||
header = ''.join(xpath(html, expr % tag))
|
|
||||||
header = re.sub(r'\s+', ' ', header.strip())
|
|
||||||
if header:
|
|
||||||
headers[-1] = header
|
|
||||||
break
|
|
||||||
use = titles
|
|
||||||
if len(titles) > len(set(titles)):
|
|
||||||
use = headers
|
|
||||||
for title, item in izip(use, self.oeb.spine):
|
|
||||||
if not item.linear: continue
|
|
||||||
toc.add(title, item.href)
|
|
||||||
|
|
||||||
oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
|
|
||||||
return oeb
|
|
||||||
|
|
||||||
def link_to_local_path(self, link_, base=None):
|
|
||||||
if not isinstance(link_, unicode):
|
|
||||||
try:
|
|
||||||
link_ = link_.decode('utf-8', 'error')
|
|
||||||
except:
|
|
||||||
self.log.warn('Failed to decode link %r. Ignoring'%link_)
|
|
||||||
return None, None
|
|
||||||
try:
|
|
||||||
l = Link(link_, base if base else os.getcwdu())
|
|
||||||
except:
|
|
||||||
self.log.exception('Failed to process link: %r'%link_)
|
|
||||||
return None, None
|
|
||||||
if l.path is None:
|
|
||||||
# Not a local resource
|
|
||||||
return None, None
|
|
||||||
link = l.path.replace('/', os.sep).strip()
|
|
||||||
frag = l.fragment
|
|
||||||
if not link:
|
|
||||||
return None, None
|
|
||||||
return link, frag
|
|
||||||
|
|
||||||
def resource_adder(self, link_, base=None):
|
|
||||||
link, frag = self.link_to_local_path(link_, base=base)
|
|
||||||
if link is None:
|
|
||||||
return link_
|
|
||||||
try:
|
|
||||||
if base and not os.path.isabs(link):
|
|
||||||
link = os.path.join(base, link)
|
|
||||||
link = os.path.abspath(link)
|
|
||||||
except:
|
|
||||||
return link_
|
|
||||||
if not os.access(link, os.R_OK):
|
|
||||||
return link_
|
|
||||||
if os.path.isdir(link):
|
|
||||||
self.log.warn(link_, 'is a link to a directory. Ignoring.')
|
|
||||||
return link_
|
|
||||||
if not self.is_case_sensitive(tempfile.gettempdir()):
|
|
||||||
link = link.lower()
|
|
||||||
if link not in self.added_resources:
|
|
||||||
bhref = os.path.basename(link)
|
|
||||||
id, href = self.oeb.manifest.generate(id='added',
|
|
||||||
href=bhref)
|
|
||||||
guessed = self.guess_type(href)[0]
|
|
||||||
media_type = guessed or self.BINARY_MIME
|
|
||||||
if media_type == 'text/plain':
|
|
||||||
self.log.warn('Ignoring link to text file %r'%link_)
|
|
||||||
return None
|
|
||||||
|
|
||||||
self.oeb.log.debug('Added', link)
|
|
||||||
self.oeb.container = self.DirContainer(os.path.dirname(link),
|
|
||||||
self.oeb.log, ignore_opf=True)
|
|
||||||
# Load into memory
|
|
||||||
item = self.oeb.manifest.add(id, href, media_type)
|
|
||||||
# bhref refers to an already existing file. The read() method of
|
|
||||||
# DirContainer will call unquote on it before trying to read the
|
|
||||||
# file, therefore we quote it here.
|
|
||||||
if isinstance(bhref, unicode):
|
|
||||||
bhref = bhref.encode('utf-8')
|
|
||||||
item.html_input_href = quote(bhref).decode('utf-8')
|
|
||||||
if guessed in self.OEB_STYLES:
|
|
||||||
item.override_css_fetch = partial(
|
|
||||||
self.css_import_handler, os.path.dirname(link))
|
|
||||||
item.data
|
|
||||||
self.added_resources[link] = href
|
|
||||||
|
|
||||||
nlink = self.added_resources[link]
|
|
||||||
if frag:
|
|
||||||
nlink = '#'.join((nlink, frag))
|
|
||||||
return nlink
|
|
||||||
|
|
||||||
def css_import_handler(self, base, href):
|
|
||||||
link, frag = self.link_to_local_path(href, base=base)
|
|
||||||
if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
|
|
||||||
return (None, None)
|
|
||||||
try:
|
|
||||||
raw = open(link, 'rb').read().decode('utf-8', 'replace')
|
|
||||||
raw = self.oeb.css_preprocessor(raw, add_namespace=True)
|
|
||||||
except:
|
|
||||||
self.log.exception('Failed to read CSS file: %r'%link)
|
|
||||||
return (None, None)
|
|
||||||
return (None, raw)
|
|
||||||
|
@ -6,12 +6,11 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, textwrap, sys, operator
|
import textwrap, operator
|
||||||
from copy import deepcopy, copy
|
from copy import deepcopy, copy
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
|
||||||
from calibre import guess_type
|
from calibre import guess_type
|
||||||
|
|
||||||
class Canvas(etree.XSLTExtension):
|
class Canvas(etree.XSLTExtension):
|
||||||
@ -406,76 +405,4 @@ class Styles(etree.XSLTExtension):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
class LRFInput(InputFormatPlugin):
|
|
||||||
|
|
||||||
name = 'LRF Input'
|
|
||||||
author = 'Kovid Goyal'
|
|
||||||
description = 'Convert LRF files to HTML'
|
|
||||||
file_types = set(['lrf'])
|
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
|
||||||
accelerators):
|
|
||||||
self.log = log
|
|
||||||
self.log('Generating XML')
|
|
||||||
from calibre.ebooks.lrf.lrfparser import LRFDocument
|
|
||||||
d = LRFDocument(stream)
|
|
||||||
d.parse()
|
|
||||||
xml = d.to_xml(write_files=True)
|
|
||||||
if options.verbose > 2:
|
|
||||||
open('lrs.xml', 'wb').write(xml.encode('utf-8'))
|
|
||||||
parser = etree.XMLParser(no_network=True, huge_tree=True)
|
|
||||||
try:
|
|
||||||
doc = etree.fromstring(xml, parser=parser)
|
|
||||||
except:
|
|
||||||
self.log.warn('Failed to parse XML. Trying to recover')
|
|
||||||
parser = etree.XMLParser(no_network=True, huge_tree=True,
|
|
||||||
recover=True)
|
|
||||||
doc = etree.fromstring(xml, parser=parser)
|
|
||||||
|
|
||||||
|
|
||||||
char_button_map = {}
|
|
||||||
for x in doc.xpath('//CharButton[@refobj]'):
|
|
||||||
ro = x.get('refobj')
|
|
||||||
jump_button = doc.xpath('//*[@objid="%s"]'%ro)
|
|
||||||
if jump_button:
|
|
||||||
jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
|
|
||||||
if jump_to:
|
|
||||||
char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
|
|
||||||
jump_to[0].get('refobj'))
|
|
||||||
plot_map = {}
|
|
||||||
for x in doc.xpath('//Plot[@refobj]'):
|
|
||||||
ro = x.get('refobj')
|
|
||||||
image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
|
|
||||||
if image:
|
|
||||||
imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
|
|
||||||
image[0].get('refstream'))
|
|
||||||
if imgstr:
|
|
||||||
plot_map[ro] = imgstr[0].get('file')
|
|
||||||
|
|
||||||
self.log('Converting XML to HTML...')
|
|
||||||
styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
|
|
||||||
media_type = MediaType()
|
|
||||||
styles = Styles()
|
|
||||||
text_block = TextBlock(styles, char_button_map, plot_map, log)
|
|
||||||
canvas = Canvas(doc, styles, text_block, log)
|
|
||||||
image_block = ImageBlock(canvas)
|
|
||||||
ruled_line = RuledLine()
|
|
||||||
extensions = {
|
|
||||||
('calibre', 'media-type') : media_type,
|
|
||||||
('calibre', 'text-block') : text_block,
|
|
||||||
('calibre', 'ruled-line') : ruled_line,
|
|
||||||
('calibre', 'styles') : styles,
|
|
||||||
('calibre', 'canvas') : canvas,
|
|
||||||
('calibre', 'image-block'): image_block,
|
|
||||||
}
|
|
||||||
transform = etree.XSLT(styledoc, extensions=extensions)
|
|
||||||
try:
|
|
||||||
result = transform(doc)
|
|
||||||
except RuntimeError:
|
|
||||||
sys.setrecursionlimit(5000)
|
|
||||||
result = transform(doc)
|
|
||||||
|
|
||||||
with open('content.opf', 'wb') as f:
|
|
||||||
f.write(result)
|
|
||||||
styles.write()
|
|
||||||
return os.path.abspath('content.opf')
|
|
||||||
|
@ -12,7 +12,6 @@ from lxml import etree
|
|||||||
from odf.odf2xhtml import ODF2XHTML
|
from odf.odf2xhtml import ODF2XHTML
|
||||||
|
|
||||||
from calibre import CurrentDir, walk
|
from calibre import CurrentDir, walk
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
|
||||||
|
|
||||||
class Extract(ODF2XHTML):
|
class Extract(ODF2XHTML):
|
||||||
|
|
||||||
@ -178,16 +177,4 @@ class Extract(ODF2XHTML):
|
|||||||
return os.path.abspath('metadata.opf')
|
return os.path.abspath('metadata.opf')
|
||||||
|
|
||||||
|
|
||||||
class ODTInput(InputFormatPlugin):
|
|
||||||
|
|
||||||
name = 'ODT Input'
|
|
||||||
author = 'Kovid Goyal'
|
|
||||||
description = 'Convert ODT (OpenOffice) files to HTML'
|
|
||||||
file_types = set(['odt'])
|
|
||||||
|
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
|
||||||
accelerators):
|
|
||||||
return Extract()(stream, '.', log)
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,42 +2,9 @@ from __future__ import with_statement
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import os, glob, re, textwrap
|
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
|
||||||
|
|
||||||
border_style_map = {
|
|
||||||
'single' : 'solid',
|
|
||||||
'double-thickness-border' : 'double',
|
|
||||||
'shadowed-border': 'outset',
|
|
||||||
'double-border': 'double',
|
|
||||||
'dotted-border': 'dotted',
|
|
||||||
'dashed': 'dashed',
|
|
||||||
'hairline': 'solid',
|
|
||||||
'inset': 'inset',
|
|
||||||
'dash-small': 'dashed',
|
|
||||||
'dot-dash': 'dotted',
|
|
||||||
'dot-dot-dash': 'dotted',
|
|
||||||
'outset': 'outset',
|
|
||||||
'tripple': 'double',
|
|
||||||
'triple': 'double',
|
|
||||||
'thick-thin-small': 'solid',
|
|
||||||
'thin-thick-small': 'solid',
|
|
||||||
'thin-thick-thin-small': 'solid',
|
|
||||||
'thick-thin-medium': 'solid',
|
|
||||||
'thin-thick-medium': 'solid',
|
|
||||||
'thin-thick-thin-medium': 'solid',
|
|
||||||
'thick-thin-large': 'solid',
|
|
||||||
'thin-thick-thin-large': 'solid',
|
|
||||||
'wavy': 'ridge',
|
|
||||||
'double-wavy': 'ridge',
|
|
||||||
'striped': 'ridge',
|
|
||||||
'emboss': 'inset',
|
|
||||||
'engrave': 'inset',
|
|
||||||
'frame': 'ridge',
|
|
||||||
}
|
|
||||||
|
|
||||||
class InlineClass(etree.XSLTExtension):
|
class InlineClass(etree.XSLTExtension):
|
||||||
|
|
||||||
@ -71,261 +38,3 @@ class InlineClass(etree.XSLTExtension):
|
|||||||
output_parent.text = ' '.join(classes)
|
output_parent.text = ' '.join(classes)
|
||||||
|
|
||||||
|
|
||||||
class RTFInput(InputFormatPlugin):
|
|
||||||
|
|
||||||
name = 'RTF Input'
|
|
||||||
author = 'Kovid Goyal'
|
|
||||||
description = 'Convert RTF files to HTML'
|
|
||||||
file_types = set(['rtf'])
|
|
||||||
|
|
||||||
def generate_xml(self, stream):
|
|
||||||
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
|
||||||
ofile = 'dataxml.xml'
|
|
||||||
run_lev, debug_dir, indent_out = 1, None, 0
|
|
||||||
if getattr(self.opts, 'debug_pipeline', None) is not None:
|
|
||||||
try:
|
|
||||||
os.mkdir('rtfdebug')
|
|
||||||
debug_dir = 'rtfdebug'
|
|
||||||
run_lev = 4
|
|
||||||
indent_out = 1
|
|
||||||
self.log('Running RTFParser in debug mode')
|
|
||||||
except:
|
|
||||||
self.log.warn('Impossible to run RTFParser in debug mode')
|
|
||||||
parser = ParseRtf(
|
|
||||||
in_file = stream,
|
|
||||||
out_file = ofile,
|
|
||||||
# Convert symbol fonts to unicode equivalents. Default
|
|
||||||
# is 1
|
|
||||||
convert_symbol = 1,
|
|
||||||
|
|
||||||
# Convert Zapf fonts to unicode equivalents. Default
|
|
||||||
# is 1.
|
|
||||||
convert_zapf = 1,
|
|
||||||
|
|
||||||
# Convert Wingding fonts to unicode equivalents.
|
|
||||||
# Default is 1.
|
|
||||||
convert_wingdings = 1,
|
|
||||||
|
|
||||||
# Convert RTF caps to real caps.
|
|
||||||
# Default is 1.
|
|
||||||
convert_caps = 1,
|
|
||||||
|
|
||||||
# Indent resulting XML.
|
|
||||||
# Default is 0 (no indent).
|
|
||||||
indent = indent_out,
|
|
||||||
|
|
||||||
# Form lists from RTF. Default is 1.
|
|
||||||
form_lists = 1,
|
|
||||||
|
|
||||||
# Convert headings to sections. Default is 0.
|
|
||||||
headings_to_sections = 1,
|
|
||||||
|
|
||||||
# Group paragraphs with the same style name. Default is 1.
|
|
||||||
group_styles = 1,
|
|
||||||
|
|
||||||
# Group borders. Default is 1.
|
|
||||||
group_borders = 1,
|
|
||||||
|
|
||||||
# Write or do not write paragraphs. Default is 0.
|
|
||||||
empty_paragraphs = 1,
|
|
||||||
|
|
||||||
#debug
|
|
||||||
deb_dir = debug_dir,
|
|
||||||
run_level = run_lev,
|
|
||||||
)
|
|
||||||
parser.parse_rtf()
|
|
||||||
with open(ofile, 'rb') as f:
|
|
||||||
return f.read()
|
|
||||||
|
|
||||||
def extract_images(self, picts):
|
|
||||||
import imghdr
|
|
||||||
self.log('Extracting images...')
|
|
||||||
|
|
||||||
with open(picts, 'rb') as f:
|
|
||||||
raw = f.read()
|
|
||||||
picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
|
|
||||||
hex = re.compile(r'[^a-fA-F0-9]')
|
|
||||||
encs = [hex.sub('', pict) for pict in picts]
|
|
||||||
|
|
||||||
count = 0
|
|
||||||
imap = {}
|
|
||||||
for enc in encs:
|
|
||||||
if len(enc) % 2 == 1:
|
|
||||||
enc = enc[:-1]
|
|
||||||
data = enc.decode('hex')
|
|
||||||
fmt = imghdr.what(None, data)
|
|
||||||
if fmt is None:
|
|
||||||
fmt = 'wmf'
|
|
||||||
count += 1
|
|
||||||
name = '%04d.%s' % (count, fmt)
|
|
||||||
with open(name, 'wb') as f:
|
|
||||||
f.write(data)
|
|
||||||
imap[count] = name
|
|
||||||
# with open(name+'.hex', 'wb') as f:
|
|
||||||
# f.write(enc)
|
|
||||||
return self.convert_images(imap)
|
|
||||||
|
|
||||||
def convert_images(self, imap):
|
|
||||||
self.default_img = None
|
|
||||||
for count, val in imap.iteritems():
|
|
||||||
try:
|
|
||||||
imap[count] = self.convert_image(val)
|
|
||||||
except:
|
|
||||||
self.log.exception('Failed to convert', val)
|
|
||||||
return imap
|
|
||||||
|
|
||||||
def convert_image(self, name):
|
|
||||||
if not name.endswith('.wmf'):
|
|
||||||
return name
|
|
||||||
try:
|
|
||||||
return self.rasterize_wmf(name)
|
|
||||||
except:
|
|
||||||
self.log.exception('Failed to convert WMF image %r'%name)
|
|
||||||
return self.replace_wmf(name)
|
|
||||||
|
|
||||||
def replace_wmf(self, name):
|
|
||||||
from calibre.ebooks import calibre_cover
|
|
||||||
if self.default_img is None:
|
|
||||||
self.default_img = calibre_cover('Conversion of WMF images is not supported',
|
|
||||||
'Use Microsoft Word or OpenOffice to save this RTF file'
|
|
||||||
' as HTML and convert that in calibre.', title_size=36,
|
|
||||||
author_size=20)
|
|
||||||
name = name.replace('.wmf', '.jpg')
|
|
||||||
with open(name, 'wb') as f:
|
|
||||||
f.write(self.default_img)
|
|
||||||
return name
|
|
||||||
|
|
||||||
def rasterize_wmf(self, name):
|
|
||||||
from calibre.utils.wmf.parse import wmf_unwrap
|
|
||||||
with open(name, 'rb') as f:
|
|
||||||
data = f.read()
|
|
||||||
data = wmf_unwrap(data)
|
|
||||||
name = name.replace('.wmf', '.png')
|
|
||||||
with open(name, 'wb') as f:
|
|
||||||
f.write(data)
|
|
||||||
return name
|
|
||||||
|
|
||||||
|
|
||||||
def write_inline_css(self, ic, border_styles):
|
|
||||||
font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
|
|
||||||
enumerate(ic.font_sizes)]
|
|
||||||
color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
|
|
||||||
enumerate(ic.colors)]
|
|
||||||
css = textwrap.dedent('''
|
|
||||||
span.none {
|
|
||||||
text-decoration: none; font-weight: normal;
|
|
||||||
font-style: normal; font-variant: normal
|
|
||||||
}
|
|
||||||
|
|
||||||
span.italics { font-style: italic }
|
|
||||||
|
|
||||||
span.bold { font-weight: bold }
|
|
||||||
|
|
||||||
span.small-caps { font-variant: small-caps }
|
|
||||||
|
|
||||||
span.underlined { text-decoration: underline }
|
|
||||||
|
|
||||||
span.strike-through { text-decoration: line-through }
|
|
||||||
|
|
||||||
''')
|
|
||||||
css += '\n'+'\n'.join(font_size_classes)
|
|
||||||
css += '\n' +'\n'.join(color_classes)
|
|
||||||
|
|
||||||
for cls, val in border_styles.iteritems():
|
|
||||||
css += '\n\n.%s {\n%s\n}'%(cls, val)
|
|
||||||
|
|
||||||
with open('styles.css', 'ab') as f:
|
|
||||||
f.write(css)
|
|
||||||
|
|
||||||
def convert_borders(self, doc):
|
|
||||||
border_styles = []
|
|
||||||
style_map = {}
|
|
||||||
for elem in doc.xpath(r'//*[local-name()="cell"]'):
|
|
||||||
style = ['border-style: hidden', 'border-width: 1px',
|
|
||||||
'border-color: black']
|
|
||||||
for x in ('bottom', 'top', 'left', 'right'):
|
|
||||||
bs = elem.get('border-cell-%s-style'%x, None)
|
|
||||||
if bs:
|
|
||||||
cbs = border_style_map.get(bs, 'solid')
|
|
||||||
style.append('border-%s-style: %s'%(x, cbs))
|
|
||||||
bw = elem.get('border-cell-%s-line-width'%x, None)
|
|
||||||
if bw:
|
|
||||||
style.append('border-%s-width: %spt'%(x, bw))
|
|
||||||
bc = elem.get('border-cell-%s-color'%x, None)
|
|
||||||
if bc:
|
|
||||||
style.append('border-%s-color: %s'%(x, bc))
|
|
||||||
style = ';\n'.join(style)
|
|
||||||
if style not in border_styles:
|
|
||||||
border_styles.append(style)
|
|
||||||
idx = border_styles.index(style)
|
|
||||||
cls = 'border_style%d'%idx
|
|
||||||
style_map[cls] = style
|
|
||||||
elem.set('class', cls)
|
|
||||||
return style_map
|
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
|
||||||
accelerators):
|
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
|
||||||
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
|
|
||||||
self.opts = options
|
|
||||||
self.log = log
|
|
||||||
self.log('Converting RTF to XML...')
|
|
||||||
try:
|
|
||||||
xml = self.generate_xml(stream.name)
|
|
||||||
except RtfInvalidCodeException as e:
|
|
||||||
raise ValueError(_('This RTF file has a feature calibre does not '
|
|
||||||
'support. Convert it to HTML first and then try it.\n%s')%e)
|
|
||||||
|
|
||||||
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
|
|
||||||
if d:
|
|
||||||
imap = {}
|
|
||||||
try:
|
|
||||||
imap = self.extract_images(d[0])
|
|
||||||
except:
|
|
||||||
self.log.exception('Failed to extract images...')
|
|
||||||
|
|
||||||
self.log('Parsing XML...')
|
|
||||||
parser = etree.XMLParser(recover=True, no_network=True)
|
|
||||||
doc = etree.fromstring(xml, parser=parser)
|
|
||||||
border_styles = self.convert_borders(doc)
|
|
||||||
for pict in doc.xpath('//rtf:pict[@num]',
|
|
||||||
namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
|
|
||||||
num = int(pict.get('num'))
|
|
||||||
name = imap.get(num, None)
|
|
||||||
if name is not None:
|
|
||||||
pict.set('num', name)
|
|
||||||
|
|
||||||
self.log('Converting XML to HTML...')
|
|
||||||
inline_class = InlineClass(self.log)
|
|
||||||
styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
|
|
||||||
extensions = { ('calibre', 'inline-class') : inline_class }
|
|
||||||
transform = etree.XSLT(styledoc, extensions=extensions)
|
|
||||||
result = transform(doc)
|
|
||||||
html = 'index.xhtml'
|
|
||||||
with open(html, 'wb') as f:
|
|
||||||
res = transform.tostring(result)
|
|
||||||
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
|
||||||
#clean multiple \n
|
|
||||||
res = re.sub('\n+', '\n', res)
|
|
||||||
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
|
|
||||||
# res = re.sub('\s*<body>', '<body>', res)
|
|
||||||
# res = re.sub('(?<=\n)\n{2}',
|
|
||||||
# u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
|
||||||
f.write(res)
|
|
||||||
self.write_inline_css(inline_class, border_styles)
|
|
||||||
stream.seek(0)
|
|
||||||
mi = get_metadata(stream, 'rtf')
|
|
||||||
if not mi.title:
|
|
||||||
mi.title = _('Unknown')
|
|
||||||
if not mi.authors:
|
|
||||||
mi.authors = [_('Unknown')]
|
|
||||||
opf = OPFCreator(os.getcwd(), mi)
|
|
||||||
opf.create_manifest([('index.xhtml', None)])
|
|
||||||
opf.create_spine(['index.xhtml'])
|
|
||||||
opf.render(open('metadata.opf', 'wb'))
|
|
||||||
return os.path.abspath('metadata.opf')
|
|
||||||
|
|
||||||
#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
|
|
||||||
# os.makedirs("E:\\Mes eBooks\\Developpement\\rtfdebug")
|
|
||||||
# debug_dir = "E:\\Mes eBooks\\Developpement\\rtfdebug"
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user