mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 07:20:44 -04:00
Working initial HTML->LRF converter with CSS support. Next on list is support for <style>, <link> and <img> tags.
This commit is contained in:
parent
15014f74fe
commit
d69fad53f4
@ -5,5 +5,6 @@
|
||||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property>
|
||||
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
|
||||
<path>/libprs500/src</path>
|
||||
<path>/libprs500/libprs500.lrf.txt</path>
|
||||
</pydev_pathproperty>
|
||||
</pydev_project>
|
||||
|
8
setup.py
8
setup.py
@ -33,7 +33,10 @@ if sys.argv[1] == 'py2exe':
|
||||
f.close()
|
||||
try:
|
||||
import py2exe
|
||||
console = [{'script' : 'src/libprs500/cli/main.py', 'dest_base':'prs500'}]
|
||||
console = [{
|
||||
'script' : 'src/libprs500/cli/main.py', 'dest_base':'prs500',
|
||||
'script' : 'src/libprs500/lrf/html/convert_from.py', 'dest_base':'html2lrf'
|
||||
}]
|
||||
windows = [{'script' : 'src/libprs500/gui/main.py', 'dest_base':'prs500-gui',
|
||||
'icon_resources':[(1,'icons/library.ico')]}]
|
||||
excludes = ["Tkconstants", "Tkinter", "tcl", "_imagingtk",
|
||||
@ -94,9 +97,8 @@ setup(
|
||||
'prs500 = libprs500.cli.main:main', \
|
||||
'lrf-meta = libprs500.lrf.meta:main', \
|
||||
'rtf-meta = libprs500.metadata.rtf:main', \
|
||||
'makelrf = libprs500.lrf.makelrf:main', \
|
||||
'txt2lrf = libprs500.lrf.makelrf:txt', \
|
||||
'html2lrf = libprs500.lrf.makelrf:html',\
|
||||
'html2lrf = libprs500.lrf.html.convert_from:main',\
|
||||
],
|
||||
'gui_scripts' : [ 'prs500-gui = libprs500.gui.main:main']
|
||||
},
|
||||
|
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 1.8 KiB |
@ -19,10 +19,10 @@ Code to convert HTML ebooks into LRF ebooks.
|
||||
"""
|
||||
import os, re, sys
|
||||
from htmlentitydefs import name2codepoint
|
||||
|
||||
from optparse import OptionParser
|
||||
|
||||
from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, NavigableString
|
||||
from libprs500.lrf.pylrs.pylrs import Book, Page, Paragraph, TextBlock, CR
|
||||
from libprs500.lrf.pylrs.pylrs import Book, Page, Paragraph, TextBlock, CR, Italic
|
||||
from libprs500.lrf.pylrs.pylrs import Span as _Span
|
||||
from libprs500.lrf import ConversionError
|
||||
|
||||
@ -40,7 +40,7 @@ class Span(_Span):
|
||||
(an int) if successful. Otherwise, returns None.
|
||||
Assumes: 1 pixel is 1/4 mm. One em is 10pts
|
||||
"""
|
||||
m = re.match("\s*([0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val)
|
||||
m = re.match("\s*(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val)
|
||||
if m is not None:
|
||||
unit = float(m.group(1))
|
||||
if m.group(2) == '%':
|
||||
@ -160,6 +160,10 @@ class Span(_Span):
|
||||
src = pat.sub(repl, src)
|
||||
if not src:
|
||||
raise ConversionError('No point in adding an empty string')
|
||||
if 'font-style' in css.keys():
|
||||
fs = css.pop('font-style')
|
||||
if fs.lower() == 'italic':
|
||||
src = Italic(src)
|
||||
attrs = Span.translate_attrs(css)
|
||||
_Span.__init__(self, text=src, **attrs)
|
||||
|
||||
@ -227,6 +231,13 @@ class HTMLConvertor(object):
|
||||
"""
|
||||
Return a dictionary of style properties applicable to Tag tag.
|
||||
"""
|
||||
def merge_parent_css(prop, pcss):
|
||||
temp = {}
|
||||
for key in pcss.keys():
|
||||
if key.lower().startswith('font'):
|
||||
temp[key] = pcss[key]
|
||||
prop.update(temp)
|
||||
|
||||
prop = dict()
|
||||
if tag.has_key("align"):
|
||||
prop["text-align"] = tag["align"]
|
||||
@ -238,7 +249,7 @@ class HTMLConvertor(object):
|
||||
if self.css.has_key(classname):
|
||||
prop.update(self.css[classname])
|
||||
if parent_css:
|
||||
prop.update(parent_css)
|
||||
merge_parent_css(prop, parent_css)
|
||||
if tag.has_key("style"):
|
||||
prop.update(self.parse_style_properties(tag["style"]))
|
||||
return prop
|
||||
@ -257,21 +268,51 @@ class HTMLConvertor(object):
|
||||
if self.current_page:
|
||||
self.book.append(self.current_page)
|
||||
|
||||
def end_page(self):
|
||||
self.current_block.append(self.current_para)
|
||||
self.current_para = Paragraph()
|
||||
self.current_page.append(self.current_block)
|
||||
self.current_block = TextBlock()
|
||||
self.book.append(self.current_page)
|
||||
self.current_page = Page()
|
||||
|
||||
|
||||
def parse_tag(self, tag, parent_css):
|
||||
def sanctify_css(css):
|
||||
""" Make css safe for use in a SPAM Xylog tag """
|
||||
for key in css.keys():
|
||||
test = key.lower()
|
||||
if test.startswith('margin') or 'indent' in test or \
|
||||
'padding' in test or 'border' in test or test in \
|
||||
['color', 'display', 'text-decoration', 'letter-spacing']:
|
||||
css.pop(key)
|
||||
return css
|
||||
|
||||
def add_text(tag, css):
|
||||
try:
|
||||
self.current_para.append(Span(tag, css))
|
||||
self.current_para.append(Span(tag, sanctify_css(css)))
|
||||
except ConversionError, err:
|
||||
if self.verbose:
|
||||
print >>sys.stderr, err
|
||||
|
||||
|
||||
|
||||
def process_text_tag(tag, pcss):
|
||||
if 'page-break-before' in pcss.keys():
|
||||
if pcss['page-break-before'].lower() != 'avoid':
|
||||
self.end_page()
|
||||
pcss.pop('page-break-before')
|
||||
end_page = False
|
||||
if 'page-break-after' in pcss.keys():
|
||||
end_page = True
|
||||
pcss.pop('page-break-after')
|
||||
for c in tag.contents:
|
||||
if isinstance(tag, NavigableString):
|
||||
add_text(tag, pcss)
|
||||
else:
|
||||
self.parse_tag(c, pcss)
|
||||
if end_page:
|
||||
self.end_page()
|
||||
|
||||
try:
|
||||
tagname = tag.name.lower()
|
||||
@ -280,8 +321,17 @@ class HTMLConvertor(object):
|
||||
return
|
||||
if tagname in ["title", "script", "meta"]:
|
||||
pass
|
||||
elif tagname in ['style', 'link']:
|
||||
# TODO: Append CSS to self.css
|
||||
pass
|
||||
elif tagname == 'p':
|
||||
css = self.tag_css(tag, parent_css=parent_css)
|
||||
indent = css.pop('text-indent', '')
|
||||
if indent:
|
||||
# TODO: If indent is different from current textblock's parindent
|
||||
# start a new TextBlock
|
||||
pass
|
||||
self.current_para.CR() # Put a paragraph end
|
||||
self.current_block.append(self.current_para)
|
||||
self.current_para = Paragraph()
|
||||
process_text_tag(tag, css)
|
||||
@ -302,13 +352,14 @@ class HTMLConvertor(object):
|
||||
self.current_para = Paragraph()
|
||||
self.current_page = Page()
|
||||
else:
|
||||
css = self.tag_css(tag, parent_css=parent_css)
|
||||
for c in tag.contents:
|
||||
if isinstance(c, Comment):
|
||||
continue
|
||||
elif isinstance(c, Tag):
|
||||
self.parse_tag(c)
|
||||
self.parse_tag(c, css)
|
||||
elif isinstance(c, NavigableString):
|
||||
add_text(c, parent_css)
|
||||
add_text(c, css)
|
||||
|
||||
def writeto(self, path):
|
||||
if path.lower().endswith('lrs'):
|
||||
@ -327,8 +378,33 @@ def process_file(path, options):
|
||||
book = Book(title=options.title, author=options.author, \
|
||||
sourceencoding='utf8')
|
||||
conv = HTMLConvertor(book, soup)
|
||||
name = os.path.splitext(os.path.basename(path))[0]+'.lrs'
|
||||
name = os.path.splitext(os.path.basename(path))[0]+'.lrf'
|
||||
os.chdir(cwd)
|
||||
conv.writeto(name)
|
||||
finally:
|
||||
os.chdir(cwd)
|
||||
|
||||
def main():
|
||||
""" CLI for html -> lrf conversions """
|
||||
parser = OptionParser(usage=\
|
||||
"""usage: %prog [options] mybook.txt
|
||||
|
||||
%prog converts mybook.txt to mybook.lrf
|
||||
"""\
|
||||
)
|
||||
parser.add_option("-t", "--title", action="store", type="string", \
|
||||
dest="title", help="Set the title")
|
||||
parser.add_option("-a", "--author", action="store", type="string", \
|
||||
dest="author", help="Set the author", default='Unknown')
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 1:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
src = args[0]
|
||||
if options.title == None:
|
||||
options.title = os.path.splitext(os.path.basename(src))[0]
|
||||
process_file(src, options)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,266 +0,0 @@
|
||||
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
## the Free Software Foundation; either version 2 of the License, or
|
||||
## (at your option) any later version.
|
||||
##
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
## GNU General Public License for more details.
|
||||
##
|
||||
## You should have received a copy of the GNU General Public License along
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
"""
|
||||
Thin ctypes based wrapper around libtidy. Example usage:
|
||||
>>> from libtidy import parseString
|
||||
>>> print parseString('<h1>fowehfow</h2>', \
|
||||
output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0)
|
||||
<?xml version="1.0" encoding="us-ascii"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title></title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
fowehfow
|
||||
</h1>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
import ctypes
|
||||
from cStringIO import StringIO
|
||||
import weakref
|
||||
|
||||
class TidyLibError(Exception):
|
||||
def __init__(self, arg):
|
||||
self.arg=arg
|
||||
|
||||
class InvalidOptionError(TidyLibError):
|
||||
def __str__(self):
|
||||
return "%s was not a valid Tidy option." % (self.arg)
|
||||
__repr__=__str__
|
||||
|
||||
class OptionArgError(TidyLibError):
|
||||
def __init__(self, arg):
|
||||
self.arg=arg
|
||||
def __str__(self):
|
||||
return self.arg
|
||||
|
||||
# search the path for libtidy using the known names;
|
||||
thelib=None
|
||||
for libname in ('cygtidy-0-99-0', 'libtidy', 'libtidy.so', 'tidylib'):
|
||||
try:
|
||||
thelib = getattr(ctypes.cdll, libname)
|
||||
break
|
||||
except OSError:
|
||||
pass
|
||||
if not thelib:
|
||||
raise OSError("Couldn't find libtidy, please make sure it is installed.")
|
||||
|
||||
class Loader:
|
||||
"""
|
||||
I am a trivial wrapper that eliminates the need for tidy.tidyFoo,
|
||||
so you can just access tidy.Foo
|
||||
"""
|
||||
def __init__(self):
|
||||
self.lib = thelib
|
||||
def __getattr__(self, name):
|
||||
try:
|
||||
return getattr(self.lib, "tidy%s" % name)
|
||||
# current ctypes uses ValueError, future will use AttributeError
|
||||
except (ValueError, AttributeError):
|
||||
return getattr(self.lib, name)
|
||||
|
||||
_tidy=Loader()
|
||||
|
||||
# define a callback to pass to Tidylib
|
||||
def _putByte(handle, c):
|
||||
"""Lookup sink by handle and call its putByte method"""
|
||||
sinkfactory[handle].putByte(c)
|
||||
return 0
|
||||
|
||||
PUTBYTEFUNC = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, ctypes.c_char)
|
||||
putByte = PUTBYTEFUNC(_putByte)
|
||||
|
||||
class _OutputSink(ctypes.Structure):
|
||||
_fields_ = [("sinkData", ctypes.c_int),
|
||||
("putByte", PUTBYTEFUNC),
|
||||
]
|
||||
|
||||
class _Sink:
|
||||
def __init__(self):
|
||||
self._data = StringIO()
|
||||
self.struct = _OutputSink()
|
||||
self.struct.putByte = putByte
|
||||
|
||||
def putByte(self, c):
|
||||
self._data.write(c)
|
||||
|
||||
def __str__(self):
|
||||
return self._data.getvalue()
|
||||
|
||||
class ReportItem:
|
||||
def __init__(self, err):
|
||||
self.err = err
|
||||
if err.startswith('line'):
|
||||
tokens = err.split(' ',6)
|
||||
self.severity = tokens[5][0] # W or E
|
||||
self.line = int(tokens[1])
|
||||
self.col = int(tokens[3])
|
||||
self.message = tokens[6]
|
||||
else:
|
||||
tokens = err.split(' ',1)
|
||||
self.severity = tokens[0][0]
|
||||
self.message = tokens[1]
|
||||
self.line = None
|
||||
self.col = None
|
||||
# TODO - parse emacs mode
|
||||
|
||||
def __str__(self):
|
||||
severities = dict(W='Warning', E='Error', C='Config')
|
||||
try:
|
||||
if self.line:
|
||||
return "line %d col %d - %s: %s" % (self.line, self.col,
|
||||
severities[self.severity],
|
||||
self.message)
|
||||
|
||||
else:
|
||||
return "%s: %s" % (severities[self.severity], self.message)
|
||||
except KeyError:
|
||||
return self.err
|
||||
|
||||
def __repr__(self):
|
||||
return "%s('%s')" % (self.__class__.__name__,
|
||||
str(self).replace("'", "\\'"))
|
||||
|
||||
class FactoryDict(dict):
|
||||
"""I am a dict with a create method and no __setitem__. This allows
|
||||
me to control my own keys.
|
||||
"""
|
||||
def create(self):
|
||||
"""Subclasses should implement me to generate a new item"""
|
||||
|
||||
def _setitem(self, name, value):
|
||||
dict.__setitem__(self, name, value)
|
||||
|
||||
def __setitem__(self, name, value):
|
||||
raise TypeError, "Use create() to get a new object"
|
||||
|
||||
|
||||
class SinkFactory(FactoryDict):
|
||||
"""Mapping for lookup of sinks by handle"""
|
||||
def __init__(self):
|
||||
FactoryDict.__init__(self)
|
||||
self.lastsink = 0
|
||||
|
||||
def create(self):
|
||||
sink = _Sink()
|
||||
sink.struct.sinkData = self.lastsink
|
||||
FactoryDict._setitem(self, self.lastsink, sink)
|
||||
self.lastsink = self.lastsink+1
|
||||
return sink
|
||||
|
||||
sinkfactory = SinkFactory()
|
||||
|
||||
class _Document(object):
|
||||
def __init__(self):
|
||||
self.cdoc = _tidy.Create()
|
||||
self.errsink = sinkfactory.create()
|
||||
_tidy.SetErrorSink(self.cdoc, ctypes.byref(self.errsink.struct))
|
||||
|
||||
def write(self, stream):
|
||||
stream.write(str(self))
|
||||
|
||||
def get_errors(self):
|
||||
ret = []
|
||||
for line in str(self.errsink).split('\n'):
|
||||
line = line.strip(' \n\r')
|
||||
if line: ret.append(ReportItem(line))
|
||||
return ret
|
||||
|
||||
errors=property(get_errors)
|
||||
|
||||
def __str__(self):
|
||||
stlen = ctypes.c_int(8192)
|
||||
st = ctypes.c_buffer(stlen.value)
|
||||
rc = _tidy.SaveString(self.cdoc, st, ctypes.byref(stlen))
|
||||
if rc==-12: # buffer too small
|
||||
st = ctypes.c_buffer(stlen.value)
|
||||
_tidy.SaveString(self.cdoc, st, ctypes.byref(stlen))
|
||||
return st.value
|
||||
|
||||
errors = {'missing or malformed argument for option: ': OptionArgError,
|
||||
'unknown option: ': InvalidOptionError,
|
||||
}
|
||||
|
||||
|
||||
class DocumentFactory(FactoryDict):
|
||||
def _setOptions(self, doc, **options):
|
||||
for k in options.keys():
|
||||
|
||||
# this will flush out most argument type errors...
|
||||
if options[k] is None: options[k] = ''
|
||||
|
||||
_tidy.OptParseValue(doc.cdoc,
|
||||
k.replace('_', '-'),
|
||||
str(options[k]))
|
||||
if doc.errors:
|
||||
match=filter(doc.errors[-1].message.startswith, errors.keys())
|
||||
if match:
|
||||
raise errors[match[0]](doc.errors[-1].message)
|
||||
|
||||
def load(self, doc, arg, loader):
|
||||
loader(doc.cdoc, arg)
|
||||
_tidy.CleanAndRepair(doc.cdoc)
|
||||
|
||||
def loadFile(self, doc, filename):
|
||||
self.load(doc, filename, _tidy.ParseFile)
|
||||
|
||||
def loadString(self, doc, st):
|
||||
self.load(doc, st, _tidy.ParseString)
|
||||
|
||||
def _create(self, *args, **kwargs):
|
||||
doc = _Document()
|
||||
self._setOptions(doc, **kwargs)
|
||||
ref = weakref.ref(doc, self.releaseDoc)
|
||||
FactoryDict._setitem(self, ref, doc.cdoc)
|
||||
return doc
|
||||
|
||||
def parse(self, filename, *args, **kwargs):
|
||||
"""
|
||||
Open and process filename as an HTML file, returning a
|
||||
processed document object.
|
||||
@param kwargs: named options to pass to TidyLib for processing
|
||||
the input file.
|
||||
@param filename: the name of a file to process
|
||||
@return: a document object
|
||||
"""
|
||||
doc = self._create(**kwargs)
|
||||
self.loadFile(doc, filename)
|
||||
return doc
|
||||
|
||||
def parseString(self, st, *args, **kwargs):
|
||||
"""
|
||||
Use st as an HTML file, and process it, returning a
|
||||
document object.
|
||||
@param kwargs: named options to pass to TidyLib for processing
|
||||
the input file.
|
||||
@param st: the string to parse
|
||||
@return: a document object
|
||||
"""
|
||||
doc = self._create(**kwargs)
|
||||
self.loadString(doc, st)
|
||||
return doc
|
||||
|
||||
def releaseDoc(self, ref):
|
||||
_tidy.Release(self[ref])
|
||||
|
||||
docfactory = DocumentFactory()
|
||||
parse = docfactory.parse
|
||||
parseString = docfactory.parseString
|
@ -17,19 +17,14 @@ import shutil
|
||||
import sys
|
||||
import hashlib
|
||||
import re
|
||||
import time
|
||||
import pkg_resources
|
||||
import subprocess
|
||||
from tempfile import mkdtemp
|
||||
from optparse import OptionParser
|
||||
import xml.dom.minidom as dom
|
||||
|
||||
from libprs500.lrf import ConversionError
|
||||
from libprs500.lrf.meta import LRFException, LRFMetaFile
|
||||
from libprs500.ptempfile import PersistentTemporaryFile
|
||||
|
||||
_bbebook = 'BBeBook-0.2.jar'
|
||||
|
||||
def generate_thumbnail(path):
|
||||
""" Generate a JPEG thumbnail of size ~ 128x128 (aspect ratio preserved)"""
|
||||
try:
|
||||
@ -45,30 +40,6 @@ def generate_thumbnail(path):
|
||||
im.save(thumb.name)
|
||||
return thumb
|
||||
|
||||
def create_xml(cfg):
|
||||
doc = dom.getDOMImplementation().createDocument(None, None, None)
|
||||
def add_field(parent, tag, value):
|
||||
elem = doc.createElement(tag)
|
||||
elem.appendChild(doc.createTextNode(value))
|
||||
parent.appendChild(elem)
|
||||
|
||||
info = doc.createElement('Info')
|
||||
info.setAttribute('version', '1.0')
|
||||
book_info = doc.createElement('BookInfo')
|
||||
doc_info = doc.createElement('DocInfo')
|
||||
info.appendChild(book_info)
|
||||
info.appendChild(doc_info)
|
||||
add_field(book_info, 'File', cfg['File'])
|
||||
add_field(doc_info, 'Output', cfg['Output'])
|
||||
for field in ['Title', 'Author', 'BookID', 'Publisher', 'Label', \
|
||||
'Category', 'Classification', 'Icon', 'Cover', 'FreeText']:
|
||||
if cfg.has_key(field):
|
||||
add_field(book_info, field, cfg[field])
|
||||
add_field(doc_info, 'Language', 'en')
|
||||
add_field(doc_info, 'Creator', _bbebook)
|
||||
add_field(doc_info, 'CreationDate', time.strftime('%Y-%m-%d', time.gmtime()))
|
||||
doc.appendChild(info)
|
||||
return doc.toxml()
|
||||
|
||||
def makelrf(author=None, title=None, \
|
||||
thumbnail=None, src=None, odir=".",\
|
||||
@ -150,127 +121,3 @@ def makelrf(author=None, title=None, \
|
||||
if dirpath:
|
||||
shutil.rmtree(dirpath, True)
|
||||
|
||||
def txt():
|
||||
""" CLI for txt -> lrf conversions """
|
||||
parser = OptionParser(usage=\
|
||||
"""usage: %prog [options] mybook.txt
|
||||
|
||||
%prog converts mybook.txt to mybook.lrf
|
||||
"""\
|
||||
)
|
||||
parser.add_option("-t", "--title", action="store", type="string", \
|
||||
dest="title", help="Set the title")
|
||||
parser.add_option("-a", "--author", action="store", type="string", \
|
||||
dest="author", help="Set the author", default='Unknown')
|
||||
defenc = 'cp1252'
|
||||
enchelp = 'Set the encoding used to decode ' + \
|
||||
'the text in mybook.txt. Default encoding is ' + defenc
|
||||
parser.add_option('-e', '--encoding', action='store', type='string', \
|
||||
dest='encoding', help=enchelp, default=defenc)
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 1:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
src = args[0]
|
||||
if options.title == None:
|
||||
options.title = os.path.splitext(os.path.basename(src))[0]
|
||||
try:
|
||||
convert_txt(src, options)
|
||||
except ConversionError, err:
|
||||
print >>sys.stderr, err
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def convert_txt(path, options):
|
||||
"""
|
||||
Convert the text file at C{path} into an lrf file.
|
||||
@param options: Object with the following attributes:
|
||||
C{author}, C{title}, C{encoding} (the assumed encoding of
|
||||
the text in C{path}.)
|
||||
"""
|
||||
import fileinput
|
||||
from libprs500.lrf.pylrs.pylrs import Book
|
||||
book = Book(title=options.title, author=options.author, \
|
||||
sourceencoding=options.encoding)
|
||||
buffer = ''
|
||||
block = book.Page().TextBlock()
|
||||
for line in fileinput.input(path):
|
||||
line = line.strip()
|
||||
if line:
|
||||
buffer += line
|
||||
else:
|
||||
block.Paragraph(buffer)
|
||||
buffer = ''
|
||||
basename = os.path.basename(path)
|
||||
name = os.path.splitext(basename)[0]+'.lrf'
|
||||
try:
|
||||
book.renderLrf(name)
|
||||
except UnicodeDecodeError:
|
||||
raise ConversionError(path + ' is not encoded in ' + \
|
||||
options.encoding +'. Specify the '+ \
|
||||
'correct encoding with the -e option.')
|
||||
return os.path.abspath(name)
|
||||
|
||||
|
||||
def html():
|
||||
""" CLI for html -> lrf conversions """
|
||||
parser = OptionParser(usage=\
|
||||
"""usage: %prog [options] mybook.txt
|
||||
|
||||
%prog converts mybook.txt to mybook.lrf
|
||||
"""\
|
||||
)
|
||||
parser.add_option("-t", "--title", action="store", type="string", \
|
||||
dest="title", help="Set the title")
|
||||
parser.add_option("-a", "--author", action="store", type="string", \
|
||||
dest="author", help="Set the author", default='Unknown')
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 1:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
src = args[0]
|
||||
if options.title == None:
|
||||
options.title = os.path.splitext(os.path.basename(src))[0]
|
||||
from libprs500.lrf.html.convert import process_file
|
||||
process_file(src, options)
|
||||
|
||||
def main(cargs=None):
|
||||
parser = OptionParser(usage=\
|
||||
"""usage: %prog [options] mybook.[html|pdf|rar]
|
||||
|
||||
%prog converts mybook to mybook.lrf
|
||||
If you specify a rar file you must have the unrar command line client
|
||||
installed. makelrf assumes the rar file is an archive containing the
|
||||
html file you want converted."""\
|
||||
)
|
||||
|
||||
parser.add_option("-t", "--title", action="store", type="string", \
|
||||
dest="title", help="Set the book title")
|
||||
parser.add_option("-a", "--author", action="store", type="string", \
|
||||
dest="author", help="Set the author")
|
||||
parser.add_option('-r', '--rasterize', action='store_false', \
|
||||
dest="rasterize",
|
||||
help="Convert pdfs into image files.")
|
||||
parser.add_option('-c', '--cover', action='store', dest='cover',\
|
||||
help="Path to a graphic that will be set as the cover. "\
|
||||
"If it is specified the thumbnail is automatically "\
|
||||
"generated from it")
|
||||
parser.add_option("--thumbnail", action="store", type="string", \
|
||||
dest="thumbnail", \
|
||||
help="Path to a graphic that will be set as the thumbnail")
|
||||
if not cargs:
|
||||
cargs = sys.argv
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 1:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
src = args[0]
|
||||
root, ext = os.path.splitext(src)
|
||||
if ext not in ['.html', '.pdf', '.rar']:
|
||||
print >> sys.stderr, "Can only convert files ending in .html|.pdf|.rar"
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
name = makelrf(author=options.author, title=options.title, \
|
||||
thumbnail=options.thumbnail, src=src, cover=options.cover, \
|
||||
rasterize=options.rasterize)
|
||||
print "LRF generated:", name
|
||||
|
14
src/libprs500/lrf/txt/__init__.py
Normal file
14
src/libprs500/lrf/txt/__init__.py
Normal file
@ -0,0 +1,14 @@
|
||||
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
## the Free Software Foundation; either version 2 of the License, or
|
||||
## (at your option) any later version.
|
||||
##
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
## GNU General Public License for more details.
|
||||
##
|
||||
## You should have received a copy of the GNU General Public License along
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
86
src/libprs500/lrf/txt/convert_from.py
Normal file
86
src/libprs500/lrf/txt/convert_from.py
Normal file
@ -0,0 +1,86 @@
|
||||
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
## the Free Software Foundation; either version 2 of the License, or
|
||||
## (at your option) any later version.
|
||||
##
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
## GNU General Public License for more details.
|
||||
##
|
||||
## You should have received a copy of the GNU General Public License along
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Convert .txt files to .lrf
|
||||
"""
|
||||
import os, sys
|
||||
from optparse import OptionParser
|
||||
|
||||
from libprs500.lrf import ConversionError
|
||||
|
||||
def main():
|
||||
""" CLI for txt -> lrf conversions """
|
||||
parser = OptionParser(usage=\
|
||||
"""usage: %prog [options] mybook.txt
|
||||
|
||||
%prog converts mybook.txt to mybook.lrf
|
||||
"""\
|
||||
)
|
||||
parser.add_option("-t", "--title", action="store", type="string", \
|
||||
dest="title", help="Set the title")
|
||||
parser.add_option("-a", "--author", action="store", type="string", \
|
||||
dest="author", help="Set the author", default='Unknown')
|
||||
defenc = 'cp1252'
|
||||
enchelp = 'Set the encoding used to decode ' + \
|
||||
'the text in mybook.txt. Default encoding is ' + defenc
|
||||
parser.add_option('-e', '--encoding', action='store', type='string', \
|
||||
dest='encoding', help=enchelp, default=defenc)
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 1:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
src = args[0]
|
||||
if options.title == None:
|
||||
options.title = os.path.splitext(os.path.basename(src))[0]
|
||||
try:
|
||||
convert_txt(src, options)
|
||||
except ConversionError, err:
|
||||
print >>sys.stderr, err
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def convert_txt(path, options):
|
||||
"""
|
||||
Convert the text file at C{path} into an lrf file.
|
||||
@param options: Object with the following attributes:
|
||||
C{author}, C{title}, C{encoding} (the assumed encoding of
|
||||
the text in C{path}.)
|
||||
"""
|
||||
import fileinput
|
||||
from libprs500.lrf.pylrs.pylrs import Book
|
||||
book = Book(title=options.title, author=options.author, \
|
||||
sourceencoding=options.encoding)
|
||||
buffer = ''
|
||||
block = book.Page().TextBlock()
|
||||
for line in fileinput.input(path):
|
||||
line = line.strip()
|
||||
if line:
|
||||
buffer += line
|
||||
else:
|
||||
block.Paragraph(buffer)
|
||||
buffer = ''
|
||||
basename = os.path.basename(path)
|
||||
name = os.path.splitext(basename)[0]+'.lrf'
|
||||
try:
|
||||
book.renderLrf(name)
|
||||
except UnicodeDecodeError:
|
||||
raise ConversionError(path + ' is not encoded in ' + \
|
||||
options.encoding +'. Specify the '+ \
|
||||
'correct encoding with the -e option.')
|
||||
return os.path.abspath(name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
x
Reference in New Issue
Block a user