mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Various improvements to html2lrf. Added a demo for html2lrf
This commit is contained in:
parent
f25cc305a1
commit
42c4acd360
@ -5,6 +5,5 @@
|
|||||||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property>
|
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property>
|
||||||
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
|
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
|
||||||
<path>/libprs500/src</path>
|
<path>/libprs500/src</path>
|
||||||
<path>/libprs500/libprs500.lrf.txt</path>
|
|
||||||
</pydev_pathproperty>
|
</pydev_pathproperty>
|
||||||
</pydev_project>
|
</pydev_project>
|
||||||
|
@ -33,7 +33,7 @@ You may have to adjust the GROUP and the location of the rules file to
|
|||||||
suit your distribution.
|
suit your distribution.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__version__ = "0.3.13"
|
__version__ = "0.3.14"
|
||||||
__docformat__ = "epytext"
|
__docformat__ = "epytext"
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
|
@ -29,6 +29,16 @@ __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
|||||||
class ConversionError(Exception):
|
class ConversionError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def get_text(elem):
|
||||||
|
''' Return the textual content of a pylrs element '''
|
||||||
|
txt = ''
|
||||||
|
if hasattr(elem, 'text'):
|
||||||
|
txt += elem.text
|
||||||
|
if hasattr(elem, 'contents'):
|
||||||
|
for child in elem.contents:
|
||||||
|
txt += get_text(child)
|
||||||
|
return txt
|
||||||
|
|
||||||
def option_parser(usage):
|
def option_parser(usage):
|
||||||
parser = OptionParser(usage=usage, version='libprs500 '+VERSION)
|
parser = OptionParser(usage=usage, version='libprs500 '+VERSION)
|
||||||
parser.add_option('--header', action='store_true', default=False, dest='header',
|
parser.add_option('--header', action='store_true', default=False, dest='header',
|
||||||
|
@ -14,14 +14,13 @@
|
|||||||
## You should have received a copy of the GNU General Public License along
|
## You should have received a copy of the GNU General Public License along
|
||||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Code to convert HTML ebooks into LRF ebooks.
|
Code to convert HTML ebooks into LRF ebooks.
|
||||||
|
|
||||||
I am indebted to esperanc for the CSS->Xylog Style conversion routines
|
I am indebted to esperanc for the CSS->Xylog Style conversion routines
|
||||||
and to Falstaff for pylrs.
|
and to Falstaff for pylrs.
|
||||||
"""
|
"""
|
||||||
import os, re, sys, shutil
|
import os, re, sys, shutil, traceback
|
||||||
from htmlentitydefs import name2codepoint
|
from htmlentitydefs import name2codepoint
|
||||||
from urllib import urlopen
|
from urllib import urlopen
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
@ -31,9 +30,10 @@ from operator import itemgetter
|
|||||||
from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, \
|
from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, \
|
||||||
NavigableString, Declaration
|
NavigableString, Declaration
|
||||||
from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \
|
from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \
|
||||||
ImageBlock, JumpButton, CharButton, Page, Bold
|
ImageBlock, JumpButton, CharButton, \
|
||||||
|
Page, Bold, Space, Plot, TextStyle, Image
|
||||||
from libprs500.lrf.pylrs.pylrs import Span as _Span
|
from libprs500.lrf.pylrs.pylrs import Span as _Span
|
||||||
from libprs500.lrf import ConversionError, option_parser, Book
|
from libprs500.lrf import ConversionError, option_parser, Book, get_text
|
||||||
from libprs500 import extract
|
from libprs500 import extract
|
||||||
|
|
||||||
def ImagePage():
|
def ImagePage():
|
||||||
@ -155,6 +155,8 @@ class Span(_Span):
|
|||||||
ans = font_weight(val)
|
ans = font_weight(val)
|
||||||
if ans:
|
if ans:
|
||||||
t['fontweight'] = ans
|
t['fontweight'] = ans
|
||||||
|
if ans > 140:
|
||||||
|
t['wordspace'] = '50'
|
||||||
elif key.startswith("margin"):
|
elif key.startswith("margin"):
|
||||||
if key == "margin":
|
if key == "margin":
|
||||||
u = []
|
u = []
|
||||||
@ -181,19 +183,12 @@ class Span(_Span):
|
|||||||
t["topskip"] = str(u[0])
|
t["topskip"] = str(u[0])
|
||||||
if u[1] is not None:
|
if u[1] is not None:
|
||||||
t["sidemargin"] = str(u[1])
|
t["sidemargin"] = str(u[1])
|
||||||
elif key == "text-align" or key == "align":
|
|
||||||
if val in ["right", "foot"]:
|
|
||||||
t["align"] = "foot"
|
|
||||||
elif val == "center":
|
|
||||||
t["align"] = "center"
|
|
||||||
else:
|
|
||||||
t["align"] = "head"
|
|
||||||
else:
|
else:
|
||||||
print >>sys.stderr, 'Unhandled/malformed CSS key:', key, d[key]
|
print >>sys.stderr, 'Unhandled/malformed CSS key:', key, d[key]
|
||||||
return t
|
return t
|
||||||
|
|
||||||
def __init__(self, ns, css, font_delta=0):
|
def __init__(self, ns, css, font_delta=0):
|
||||||
src = ns.string
|
src = ns.string if hasattr(ns, 'string') else str(ns)
|
||||||
src = re.sub(r'\s{2,}', ' ', src) # Remove multiple spaces
|
src = re.sub(r'\s{2,}', ' ', src) # Remove multiple spaces
|
||||||
for pat, repl in Span.rules:
|
for pat, repl in Span.rules:
|
||||||
src = pat.sub(repl, src)
|
src = pat.sub(repl, src)
|
||||||
@ -228,18 +223,43 @@ class HTMLConverter(object):
|
|||||||
i = {"font-style" :"italic"},
|
i = {"font-style" :"italic"},
|
||||||
em = {"font-style" :"italic"},
|
em = {"font-style" :"italic"},
|
||||||
small = {'font-size' :'small'},
|
small = {'font-size' :'small'},
|
||||||
|
pre = {'font-family' :'monospace' },
|
||||||
center = {'text-align' : 'center'}
|
center = {'text-align' : 'center'}
|
||||||
)
|
)
|
||||||
processed_files = {} #: Files that have been processed
|
processed_files = {} #: Files that have been processed
|
||||||
|
|
||||||
def __init__(self, book, path, font_delta=0, verbose=False, cover=None):
|
def __init__(self, book, path, width=575, height=747,
|
||||||
self.images = {} #: Images referenced in the HTML document
|
font_delta=0, verbose=False, cover=None):
|
||||||
self.targets = {} #: <a name=...> elements
|
'''
|
||||||
self.links = [] #: <a href=...> elements
|
Convert HTML file at C{path} and add it to C{book}. After creating
|
||||||
self.files = {} #: links that point to other files
|
the object, you must call L{self.process_links} on it to create the links and
|
||||||
|
then L{self.writeto} to output the LRF/S file.
|
||||||
|
|
||||||
|
@param book: The LRF book
|
||||||
|
@type book: L{libprs500.lrf.pylrs.Book}
|
||||||
|
@param path: path to the HTML file to process
|
||||||
|
@type path: C{str}
|
||||||
|
@param width: Width of the device on which the LRF file is to be read
|
||||||
|
@type width: C{int}
|
||||||
|
@param height: Height of the device on which the LRF file is to be read
|
||||||
|
@type height: C{int}
|
||||||
|
@param font_delta: The amount in pts by which all fonts should be changed
|
||||||
|
@type font_delta: C{int}
|
||||||
|
@param verbose: Whether processing should be verbose or not
|
||||||
|
@type verbose: C{bool}
|
||||||
|
@param cover: Path to an image to use as the cover of this book
|
||||||
|
@type cover: C{str}
|
||||||
|
'''
|
||||||
|
self.page_width = width #: The width of the page
|
||||||
|
self.page_height = height #: The height of the page
|
||||||
|
self.images = {} #: Images referenced in the HTML document
|
||||||
|
self.targets = {} #: <a name=...> elements
|
||||||
|
self.links = [] #: <a href=...> elements
|
||||||
|
self.files = {} #: links that point to other files
|
||||||
self.links_processed = False #: Whether links_processed has been called on this object
|
self.links_processed = False #: Whether links_processed has been called on this object
|
||||||
self.font_delta = font_delta
|
self.font_delta = font_delta
|
||||||
self.cover = cover
|
self.cover = cover
|
||||||
|
self.in_ol = False #: Flag indicating we're in an <ol> element
|
||||||
self.book = book #: The Book object representing a BBeB book
|
self.book = book #: The Book object representing a BBeB book
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
os.chdir(os.path.dirname(path))
|
os.chdir(os.path.dirname(path))
|
||||||
@ -301,7 +321,8 @@ class HTMLConverter(object):
|
|||||||
def merge_parent_css(prop, pcss):
|
def merge_parent_css(prop, pcss):
|
||||||
temp = {}
|
temp = {}
|
||||||
for key in pcss.keys():
|
for key in pcss.keys():
|
||||||
if key.lower().startswith('font'):
|
chk = key.lower()
|
||||||
|
if chk.startswith('font') or chk == 'text-align':
|
||||||
temp[key] = pcss[key]
|
temp[key] = pcss[key]
|
||||||
prop.update(temp)
|
prop.update(temp)
|
||||||
|
|
||||||
@ -330,11 +351,11 @@ class HTMLConverter(object):
|
|||||||
self.top = self.current_block
|
self.top = self.current_block
|
||||||
|
|
||||||
self.process_children(self.soup, {})
|
self.process_children(self.soup, {})
|
||||||
if self.current_para:
|
if self.current_para and get_text(self.current_para).strip():
|
||||||
self.current_block.append(self.current_para)
|
self.current_block.append(self.current_para)
|
||||||
if self.current_block:
|
if self.current_block and get_text(self.current_block).strip():
|
||||||
self.current_page.append(self.current_block)
|
self.current_page.append(self.current_block)
|
||||||
if self.current_page:
|
if self.current_page and get_text(self.current_page).strip():
|
||||||
self.book.append(self.current_page)
|
self.book.append(self.current_page)
|
||||||
|
|
||||||
|
|
||||||
@ -356,7 +377,7 @@ class HTMLConverter(object):
|
|||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
for link in self.links:
|
for link in self.links:
|
||||||
purl = urlparse(link.tag['href'])
|
purl = urlparse(link.tag['href'])
|
||||||
if purl[1]: # Not a local link
|
if purl[1]: # Not a link to a file on the local filesystem
|
||||||
continue
|
continue
|
||||||
path, fragment = purl[2], purl[5]
|
path, fragment = purl[2], purl[5]
|
||||||
para, tag = link.para, link.tag
|
para, tag = link.para, link.tag
|
||||||
@ -366,6 +387,7 @@ class HTMLConverter(object):
|
|||||||
jb = JumpButton(tb)
|
jb = JumpButton(tb)
|
||||||
self.book.append(jb)
|
self.book.append(jb)
|
||||||
cb = CharButton(jb, text=self.get_text(tag))
|
cb = CharButton(jb, text=self.get_text(tag))
|
||||||
|
para.contents = []
|
||||||
para.append(cb)
|
para.append(cb)
|
||||||
else:
|
else:
|
||||||
if not os.access(path, os.R_OK):
|
if not os.access(path, os.R_OK):
|
||||||
@ -378,7 +400,9 @@ class HTMLConverter(object):
|
|||||||
self.files[path] = HTMLConverter(self.book, path, \
|
self.files[path] = HTMLConverter(self.book, path, \
|
||||||
font_delta=self.font_delta, verbose=self.verbose)
|
font_delta=self.font_delta, verbose=self.verbose)
|
||||||
HTMLConverter.processed_files[path] = self.files[path]
|
HTMLConverter.processed_files[path] = self.files[path]
|
||||||
except:
|
except Exception, e:
|
||||||
|
print >>sys.stderr, 'Unable to process', path
|
||||||
|
traceback.print_exc()
|
||||||
continue
|
continue
|
||||||
finally:
|
finally:
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
@ -392,6 +416,7 @@ class HTMLConverter(object):
|
|||||||
jb = JumpButton(tb)
|
jb = JumpButton(tb)
|
||||||
self.book.append(jb)
|
self.book.append(jb)
|
||||||
cb = CharButton(jb, text=self.get_text(tag))
|
cb = CharButton(jb, text=self.get_text(tag))
|
||||||
|
para.contents = []
|
||||||
para.append(cb)
|
para.append(cb)
|
||||||
|
|
||||||
self.links_processed = True
|
self.links_processed = True
|
||||||
@ -411,13 +436,13 @@ class HTMLConverter(object):
|
|||||||
End the current page, ensuring that any further content is displayed
|
End the current page, ensuring that any further content is displayed
|
||||||
on a new page.
|
on a new page.
|
||||||
"""
|
"""
|
||||||
if self.current_para.contents:
|
if get_text(self.current_para).strip():
|
||||||
self.current_block.append(self.current_para)
|
self.current_block.append(self.current_para)
|
||||||
self.current_para = Paragraph()
|
self.current_para = Paragraph()
|
||||||
if self.current_block.contents:
|
if get_text(self.current_block).strip():
|
||||||
self.current_page.append(self.current_block)
|
self.current_page.append(self.current_block)
|
||||||
self.current_block = TextBlock()
|
self.current_block = TextBlock()
|
||||||
if self.current_page.contents:
|
if get_text(self.current_page).strip():
|
||||||
self.book.append(self.current_page)
|
self.book.append(self.current_page)
|
||||||
self.current_page = Page()
|
self.current_page = Page()
|
||||||
|
|
||||||
@ -442,12 +467,37 @@ class HTMLConverter(object):
|
|||||||
self.add_text(c, pcss)
|
self.add_text(c, pcss)
|
||||||
|
|
||||||
def add_text(self, tag, css):
|
def add_text(self, tag, css):
|
||||||
try:
|
'''
|
||||||
self.current_para.append(Span(tag, self.sanctify_css(css), \
|
Add text to the current paragraph taking CSS into account.
|
||||||
font_delta=self.font_delta))
|
@param tag: Either a BeautifulSoup tag or a string
|
||||||
except ConversionError, err:
|
@param css:
|
||||||
if self.verbose:
|
@type css:
|
||||||
print >>sys.stderr, err
|
'''
|
||||||
|
src = tag.string if hasattr(tag, 'string') else str(tag)
|
||||||
|
if not src.strip():
|
||||||
|
self.current_para.append(' ')
|
||||||
|
else:
|
||||||
|
align = 'head'
|
||||||
|
if css.has_key('text-align'):
|
||||||
|
val = css['text-align']
|
||||||
|
if val in ["right", "foot"]:
|
||||||
|
align = "foot"
|
||||||
|
elif val == "center":
|
||||||
|
align = "center"
|
||||||
|
css.pop('text-align')
|
||||||
|
if align != self.current_block.textStyle.attrs['align']:
|
||||||
|
if get_text(self.current_para).strip():
|
||||||
|
self.current_block.append(self.current_para)
|
||||||
|
if get_text(self.current_block).strip():
|
||||||
|
self.current_page.append(self.current_block)
|
||||||
|
self.current_block = TextBlock(TextStyle(align=align))
|
||||||
|
self.current_para = Paragraph()
|
||||||
|
try:
|
||||||
|
self.current_para.append(Span(src, self.sanctify_css(css), \
|
||||||
|
font_delta=self.font_delta))
|
||||||
|
except ConversionError, err:
|
||||||
|
if self.verbose:
|
||||||
|
print >>sys.stderr, err
|
||||||
|
|
||||||
def sanctify_css(self, css):
|
def sanctify_css(self, css):
|
||||||
""" Make css safe for use in a SPAM Xylog tag """
|
""" Make css safe for use in a SPAM Xylog tag """
|
||||||
@ -461,22 +511,21 @@ class HTMLConverter(object):
|
|||||||
css.pop(key)
|
css.pop(key)
|
||||||
return css
|
return css
|
||||||
|
|
||||||
|
def end_current_para(self):
|
||||||
|
'''
|
||||||
|
End current paragraph with a paragraph break after it. If the current
|
||||||
|
paragraph has no non whitespace text in it do nothing.
|
||||||
|
'''
|
||||||
|
if not get_text(self.current_para).strip():
|
||||||
|
return
|
||||||
|
if self.current_para.contents:
|
||||||
|
self.current_block.append(self.current_para)
|
||||||
|
self.current_para = Paragraph()
|
||||||
|
if self.current_block.contents and \
|
||||||
|
not isinstance(self.current_block.contents[-1], CR):
|
||||||
|
self.current_block.append(CR())
|
||||||
|
|
||||||
def parse_tag(self, tag, parent_css):
|
def parse_tag(self, tag, parent_css):
|
||||||
|
|
||||||
def process_text_tag(tag, tag_css):
|
|
||||||
if 'page-break-before' in tag_css.keys():
|
|
||||||
if tag_css['page-break-before'].lower() != 'avoid':
|
|
||||||
self.end_page()
|
|
||||||
tag_css.pop('page-break-before')
|
|
||||||
end_page = False
|
|
||||||
if 'page-break-after' in tag_css.keys():
|
|
||||||
end_page = True
|
|
||||||
tag_css.pop('page-break-after')
|
|
||||||
self.process_children(tag, tag_css)
|
|
||||||
if end_page:
|
|
||||||
self.end_page()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tagname = tag.name.lower()
|
tagname = tag.name.lower()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
@ -488,17 +537,47 @@ class HTMLConverter(object):
|
|||||||
return
|
return
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
|
if 'page-break-before' in tag_css.keys():
|
||||||
|
if tag_css['page-break-before'].lower() != 'avoid':
|
||||||
|
self.end_page()
|
||||||
|
tag_css.pop('page-break-before')
|
||||||
|
end_page = False
|
||||||
|
if 'page-break-after' in tag_css.keys() and \
|
||||||
|
tag_css['page-break-after'].lower() != 'avoid':
|
||||||
|
end_page = True
|
||||||
|
tag_css.pop('page-break-after')
|
||||||
|
|
||||||
if tagname in ["title", "script", "meta", 'del']:
|
if tagname in ["title", "script", "meta", 'del']:
|
||||||
pass
|
pass
|
||||||
elif tagname == 'a':
|
elif tagname == 'a':
|
||||||
if tag.has_key('name'):
|
if tag.has_key('name'):
|
||||||
self.current_block.append(self.current_para)
|
if get_text(self.current_para).strip():
|
||||||
self.current_page.append(self.current_block)
|
self.current_block.append(self.current_para)
|
||||||
|
if get_text(self.current_block).strip():
|
||||||
|
self.current_page.append(self.current_block)
|
||||||
|
previous = self.current_block
|
||||||
tb = TextBlock()
|
tb = TextBlock()
|
||||||
self.current_block = tb
|
self.current_block = tb
|
||||||
self.current_para = Paragraph()
|
self.current_para = Paragraph()
|
||||||
self.targets[tag['name']] = tb
|
self.targets[tag['name']] = tb
|
||||||
self.process_children(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
|
if tb.parent == None:
|
||||||
|
if self.current_block == tb:
|
||||||
|
if get_text(self.current_para):
|
||||||
|
self.current_block.append(self.current_para)
|
||||||
|
self.current_para = Paragraph()
|
||||||
|
self.current_page.append(self.current_block)
|
||||||
|
self.current_block = TextBlock()
|
||||||
|
else:
|
||||||
|
found, marked = False, False
|
||||||
|
for item in self.current_page.contents:
|
||||||
|
if item == previous:
|
||||||
|
found = True
|
||||||
|
if found and isinstance(item, TextBlock):
|
||||||
|
self.targets[tag['name']] = item
|
||||||
|
marked = True
|
||||||
|
if not marked:
|
||||||
|
self.current_page.append(tb)
|
||||||
elif tag.has_key('href'):
|
elif tag.has_key('href'):
|
||||||
purl = urlparse(tag['href'])
|
purl = urlparse(tag['href'])
|
||||||
path = purl[2]
|
path = purl[2]
|
||||||
@ -506,19 +585,18 @@ class HTMLConverter(object):
|
|||||||
['png', 'jpg', 'bmp', 'jpeg']:
|
['png', 'jpg', 'bmp', 'jpeg']:
|
||||||
self.add_image_page(path)
|
self.add_image_page(path)
|
||||||
else:
|
else:
|
||||||
span = _Span()
|
self.add_text('Link: '+tag['href'], tag_css)
|
||||||
self.current_para.append(span)
|
self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
|
||||||
self.links.append(HTMLConverter.Link(span, tag))
|
|
||||||
elif tagname == 'img':
|
elif tagname == 'img':
|
||||||
if tag.has_key('src') and os.access(tag['src'], os.R_OK):
|
if tag.has_key('src') and os.access(tag['src'], os.R_OK):
|
||||||
width, height = 600, 800
|
width, height = self.page_width, self.page_height
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
from PIL import Image
|
from PIL import Image as PILImage
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
im = Image.open(tag['src'])
|
im = PILImage.open(tag['src'])
|
||||||
width, height = im.size
|
width, height = im.size
|
||||||
if tag.has_key('width'):
|
if tag.has_key('width'):
|
||||||
width = int(tag['width'])
|
width = int(tag['width'])
|
||||||
@ -526,16 +604,26 @@ class HTMLConverter(object):
|
|||||||
height = int(tag['height'])
|
height = int(tag['height'])
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
self.current_block.append(self.current_para)
|
|
||||||
self.current_page.append(self.current_block)
|
|
||||||
self.current_para = Paragraph()
|
|
||||||
self.current_block = TextBlock()
|
|
||||||
path = os.path.abspath(tag['src'])
|
path = os.path.abspath(tag['src'])
|
||||||
if not self.images.has_key(path):
|
if not self.images.has_key(path):
|
||||||
self.images[path] = ImageStream(path)
|
self.images[path] = ImageStream(path)
|
||||||
im = ImageBlock(self.images[path], x1=width, y1=height,
|
if max(width, height) <= min(self.page_width, self.page_height)/5.:
|
||||||
xsize=width, ysize=height)
|
im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
|
||||||
self.current_page.append(im)
|
xsize=width, ysize=height)
|
||||||
|
self.current_para.append(Plot(im, xsize=width*10, ysize=width*10))
|
||||||
|
elif max(width, height) <= min(self.page_width, self.page_height)/2.:
|
||||||
|
self.end_current_para()
|
||||||
|
im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
|
||||||
|
xsize=width, ysize=height)
|
||||||
|
self.current_para.append(Plot(im, xsize=width*10, ysize=width*10))
|
||||||
|
else:
|
||||||
|
self.current_block.append(self.current_para)
|
||||||
|
self.current_page.append(self.current_block)
|
||||||
|
self.current_para = Paragraph()
|
||||||
|
self.current_block = TextBlock()
|
||||||
|
im = ImageBlock(self.images[path], x1=width, y1=height,
|
||||||
|
xsize=width, ysize=height)
|
||||||
|
self.current_page.append(im)
|
||||||
else:
|
else:
|
||||||
print >>sys.stderr, "Failed to process", tag
|
print >>sys.stderr, "Failed to process", tag
|
||||||
|
|
||||||
@ -557,31 +645,73 @@ class HTMLConverter(object):
|
|||||||
f.close()
|
f.close()
|
||||||
except IOError:
|
except IOError:
|
||||||
pass
|
pass
|
||||||
elif tagname in ['p', 'div', 'ul', 'ol', 'tr', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
|
elif tagname == 'pre':
|
||||||
|
self.end_current_para()
|
||||||
|
src = ''.join([str(i) for i in tag.contents])
|
||||||
|
lines = src.split('\n')
|
||||||
|
for line in lines:
|
||||||
|
try:
|
||||||
|
self.current_para.append(Span(line, tag_css))
|
||||||
|
except ConversionError:
|
||||||
|
pass
|
||||||
|
self.current_para.CR()
|
||||||
|
elif tagname in ['ul', 'ol']:
|
||||||
|
self.in_ol = 1 if tagname == 'ol' else 0
|
||||||
|
self.end_current_para()
|
||||||
|
self.process_children(tag, tag_css)
|
||||||
|
self.in_ol = 0
|
||||||
|
self.end_current_para()
|
||||||
|
elif tagname == 'li':
|
||||||
|
prepend = str(self.in_ol)+'. ' if self.in_ol else u'\u2022' + ' '
|
||||||
|
if get_text(self.current_para).strip():
|
||||||
|
self.current_para.append(CR())
|
||||||
|
self.current_block.append(self.current_para)
|
||||||
|
self.current_para = Paragraph()
|
||||||
|
self.current_para.append(Space(xsize=100))
|
||||||
|
self.current_para.append(prepend)
|
||||||
|
self.process_children(tag, tag_css)
|
||||||
|
if self.in_ol:
|
||||||
|
self.in_ol += 1
|
||||||
|
elif tagname in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
|
||||||
|
self.end_current_para()
|
||||||
|
if self.current_block.contents:
|
||||||
|
self.current_block.append(CR())
|
||||||
|
self.process_children(tag, tag_css)
|
||||||
|
self.end_current_para()
|
||||||
|
self.current_block.append(CR())
|
||||||
|
elif tagname in ['p', 'div']:
|
||||||
# TODO: Implement ol
|
# TODO: Implement ol
|
||||||
indent = tag_css.pop('text-indent', '')
|
#indent = tag_css.pop('text-indent', '')
|
||||||
if indent:
|
#if indent:
|
||||||
# TODO: If indent is different from current textblock's parindent
|
# TODO: If indent is different from current textblock's parindent
|
||||||
# start a new TextBlock
|
# start a new TextBlock
|
||||||
pass
|
#pass
|
||||||
self.current_para.CR() # Put a paragraph end
|
self.end_current_para()
|
||||||
self.current_block.append(self.current_para)
|
self.process_children(tag, tag_css)
|
||||||
self.current_para = Paragraph()
|
self.end_current_para()
|
||||||
process_text_tag(tag, tag_css)
|
|
||||||
elif tagname in ['b', 'strong', 'i', 'em', 'span']:
|
elif tagname in ['b', 'strong', 'i', 'em', 'span']:
|
||||||
process_text_tag(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
elif tagname == 'font':
|
elif tagname == 'font':
|
||||||
if tag.has_key('face'):
|
if tag.has_key('face'):
|
||||||
tag_css['font-family'] = tag['face']
|
tag_css['font-family'] = tag['face']
|
||||||
process_text_tag(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
elif tagname == 'br':
|
elif tagname in ['br', 'tr']:
|
||||||
self.current_para.append(CR())
|
self.current_para.append(CR())
|
||||||
|
self.process_children(tag, tag_css)
|
||||||
elif tagname == 'hr':
|
elif tagname == 'hr':
|
||||||
self.current_para.append(CR())
|
if self.current_para.contents:
|
||||||
# TODO: Horizontal line?
|
self.current_block.append(self.current_para)
|
||||||
|
self.current_para = Paragraph()
|
||||||
|
self.current_block.append(CR())
|
||||||
|
self.current_page.append(self.current_block)
|
||||||
|
self.current_block = TextBlock()
|
||||||
|
self.current_page.RuledLine(linelength=self.page_width)
|
||||||
else:
|
else:
|
||||||
self.process_children(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
|
|
||||||
|
if end_page:
|
||||||
|
self.end_page()
|
||||||
|
|
||||||
def writeto(self, path, lrs=False):
|
def writeto(self, path, lrs=False):
|
||||||
self.book.renderLrs(path) if lrs else self.book.renderLrf(path)
|
self.book.renderLrs(path) if lrs else self.book.renderLrf(path)
|
||||||
|
|
||||||
@ -632,6 +762,7 @@ def process_file(path, options):
|
|||||||
suffix = '.lrs' if options.lrs else '.lrf'
|
suffix = '.lrs' if options.lrs else '.lrf'
|
||||||
name = os.path.splitext(os.path.basename(path))[0] + suffix
|
name = os.path.splitext(os.path.basename(path))[0] + suffix
|
||||||
oname = os.path.join(cwd,name)
|
oname = os.path.join(cwd,name)
|
||||||
|
oname = os.path.abspath(os.path.expanduser(oname))
|
||||||
conv.writeto(oname, lrs=options.lrs)
|
conv.writeto(oname, lrs=options.lrs)
|
||||||
print 'Output written to', oname
|
print 'Output written to', oname
|
||||||
finally:
|
finally:
|
||||||
@ -692,7 +823,7 @@ def console_query(dirpath, candidate, docs):
|
|||||||
|
|
||||||
|
|
||||||
def get_path(path, query=console_query):
|
def get_path(path, query=console_query):
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(os.path.expanduser(path))
|
||||||
ext = os.path.splitext(path)[1][1:].lower()
|
ext = os.path.splitext(path)[1][1:].lower()
|
||||||
if ext in ['htm', 'html', 'xhtml']:
|
if ext in ['htm', 'html', 'xhtml']:
|
||||||
return None, path
|
return None, path
|
||||||
|
73
src/libprs500/lrf/html/demo/demo.html
Normal file
73
src/libprs500/lrf/html/demo/demo.html
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
<html>
|
||||||
|
<h1>Demo of <span style='font-family:monospace'>html2lrf</span></h1>
|
||||||
|
<p>
|
||||||
|
This file contains a demonstration of the capabilities of <span style='font-family:monospace'>html2lrf,</span> the HTML to LRF converter from <em>libprs500.</em> To obtain libprs500 visit <span style='font:sans-serif'>https://libprs500.kovidgoyal.net</span>
|
||||||
|
</p>
|
||||||
|
<h2><a name='toc'>Table of Contents</a></h2>
|
||||||
|
<ul style='page-break-after:always'>
|
||||||
|
<li><a href='#lists'>Demonstration of Lists</a></li>
|
||||||
|
<li><a href='#text'>Text formatting and ruled lines</a></li>
|
||||||
|
<li><a href='#images'>Inline images</a></li>
|
||||||
|
<li><a href='#recursive'>Recursive link following</a></li>
|
||||||
|
<li><a href='demo_ext.html'>The HTML used to create this file</a>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<h2><a name='lists'>Lists</a></h2>
|
||||||
|
<p><h3>Unordered lists</h3>
|
||||||
|
<ul>
|
||||||
|
<li>Item 1</li>
|
||||||
|
<li>Item 2</li>
|
||||||
|
</ul>
|
||||||
|
</p>
|
||||||
|
<p><h3>Ordered lists</h3>
|
||||||
|
<ol>
|
||||||
|
<li>Item 1</li>
|
||||||
|
<li>Item 2</li>
|
||||||
|
</ol>
|
||||||
|
</p>
|
||||||
|
<br/>
|
||||||
|
<p>
|
||||||
|
Note that nested lists are not supported.
|
||||||
|
</p>
|
||||||
|
<p style='page-break-after:always'>
|
||||||
|
<hr />
|
||||||
|
<a href='#toc'>Table of Contents</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h2><a name='text'>Text formatting</a></h2>
|
||||||
|
<p>
|
||||||
|
A simple <i>paragraph</i> of <b>formatted
|
||||||
|
<i>text</i></b> with a ruled line following it.
|
||||||
|
</p>
|
||||||
|
<hr/>
|
||||||
|
<p> A
|
||||||
|
<span style='font-style:italic'>similar</span>
|
||||||
|
paragraph, but now using
|
||||||
|
<span style='font-weight:bold'>CSS</span>
|
||||||
|
to perform the text formatting.</p>
|
||||||
|
<hr/>
|
||||||
|
<center>A centered phrase</center>
|
||||||
|
<span style='text-align:right'>A right aligned phrase</span>
|
||||||
|
A normal phrase
|
||||||
|
<p style='page-break-after:always'>
|
||||||
|
<hr />
|
||||||
|
<a href='#toc'>Table of Contents</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h2 style='page-break-before:always'><a name='images'>Inline images</a></h2>
|
||||||
|
<p>
|
||||||
|
Here I demonstrate the use of inline images in the midst of text. Here is a small image <img src='small.jpg' /> embedded in a sentence. Now we have a slightly larger image that is automatically put in its own block <img src='medium.jpg' /> and finally we have a large image which is automatically placed on a page by itself and prevented from being autoscaled when the user changes from S to M to L. Try changing sizes and see how the different embedding styles behave. <img src='large.jpg' />
|
||||||
|
</p>
|
||||||
|
<p style='page-break-after:always'>
|
||||||
|
<hr />
|
||||||
|
<a href='#toc'>Table of Contents</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h2 style='page-break-before:always'><a name='recursive'>Recursive link following</a></h2>
|
||||||
|
<span style='font:monospace'>html2lrf</span> follows links in HTML files that point to other files, recursively. Thus it can be used to convert a whole tree of HTML files into a single LRF file.
|
||||||
|
<p style='page-break-after:always'>
|
||||||
|
<hr />
|
||||||
|
<a href='#toc'>Table of Contents</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
</html>
|
BIN
src/libprs500/lrf/html/demo/large.jpg
Normal file
BIN
src/libprs500/lrf/html/demo/large.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 61 KiB |
BIN
src/libprs500/lrf/html/demo/medium.jpg
Normal file
BIN
src/libprs500/lrf/html/demo/medium.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.0 KiB |
BIN
src/libprs500/lrf/html/demo/small.jpg
Normal file
BIN
src/libprs500/lrf/html/demo/small.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.0 KiB |
@ -40,7 +40,7 @@ def main():
|
|||||||
if len(args) != 1:
|
if len(args) != 1:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
src = args[0]
|
src = os.path.abspath(os.path.expanduser(args[0]))
|
||||||
if options.title == None:
|
if options.title == None:
|
||||||
options.title = os.path.splitext(os.path.basename(src))[0]
|
options.title = os.path.splitext(os.path.basename(src))[0]
|
||||||
try:
|
try:
|
||||||
@ -78,6 +78,7 @@ def convert_txt(path, options):
|
|||||||
buffer = ''
|
buffer = ''
|
||||||
basename = os.path.basename(path)
|
basename = os.path.basename(path)
|
||||||
oname = options.output
|
oname = options.output
|
||||||
|
oname = os.path.abspath(os.path.expanduser(oname))
|
||||||
if not oname:
|
if not oname:
|
||||||
oname = os.path.splitext(basename)[0]+'.lrf'
|
oname = os.path.splitext(basename)[0]+'.lrf'
|
||||||
try:
|
try:
|
||||||
|
7
upload
7
upload
@ -7,6 +7,13 @@ DOWNLOADS=$PREFIX/httpdocs/downloads
|
|||||||
DOCS=$PREFIX/httpdocs/apidocs
|
DOCS=$PREFIX/httpdocs/apidocs
|
||||||
exe=`cd dist && ls -1 libprs500-*.exe | tail -n1 && cd ..`
|
exe=`cd dist && ls -1 libprs500-*.exe | tail -n1 && cd ..`
|
||||||
|
|
||||||
|
|
||||||
|
echo "<h2>The HTML</h2><pre>" > src/libprs500/lrf/html/demo/demo_ext.html
|
||||||
|
cat src/libprs500/lrf/html/demo/demo.html >> src/libprs500/lrf/html/demo/demo_ext.html
|
||||||
|
echo '</pre>' >> src/libprs500/lrf/html/demo/demo_ext.html
|
||||||
|
html2lrf --title='Demonstration of html2lrf' --author='Kovid Goyal' --header --output=/tmp/html2lrf.lrf src/libprs500/lrf/html/demo/demo.html
|
||||||
|
scp /tmp/html2lrf.lrf castalia:$DOWNLOADS/
|
||||||
|
|
||||||
ssh castalia rm -f $DOWNLOADS/libprs500\*.exe
|
ssh castalia rm -f $DOWNLOADS/libprs500\*.exe
|
||||||
scp dist/$exe castalia:$DOWNLOADS/
|
scp dist/$exe castalia:$DOWNLOADS/
|
||||||
ssh castalia chmod a+r $DOWNLOADS/\*
|
ssh castalia chmod a+r $DOWNLOADS/\*
|
||||||
|
Loading…
x
Reference in New Issue
Block a user