This commit is contained in:
Kovid Goyal 2008-09-12 23:56:58 -07:00
parent 51a0ce414a
commit bc115198c7
3 changed files with 27 additions and 4 deletions

View File

@ -5,6 +5,7 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, sys, re, shutil, cStringIO import os, sys, re, shutil, cStringIO
from lxml.etree import XPath from lxml.etree import XPath
from lxml import etree
from calibre.ebooks.html import Parser, get_text, merge_metadata, get_filelist,\ from calibre.ebooks.html import Parser, get_text, merge_metadata, get_filelist,\
opf_traverse, create_metadata, rebase_toc opf_traverse, create_metadata, rebase_toc
@ -15,7 +16,7 @@ from calibre.ebooks.metadata import MetaInformation
class HTMLProcessor(Parser): class HTMLProcessor(Parser):
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles): def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, toc=None):
Parser.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, Parser.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
name='html2epub') name='html2epub')
if opts.verbose > 2: if opts.verbose > 2:
@ -26,6 +27,9 @@ class HTMLProcessor(Parser):
if opts.verbose > 2: if opts.verbose > 2:
self.debug_tree('nocss') self.debug_tree('nocss')
if toc is not None:
self.populate_toc(toc)
self.collect_font_statistics() self.collect_font_statistics()
self.split() self.split()
@ -37,6 +41,23 @@ class HTMLProcessor(Parser):
style += ';page-break-before: always' style += ';page-break-before: always'
elem.set(style, style) elem.set(style, style)
def save(self):
head = self.root.xpath('//head')
if head:
head = head[0]
else:
head = self.root.xpath('//body')
head = head[0] if head else self.root
style = etree.SubElement(head, 'style', attrib={'type':'text/css'})
style.text='\n'+self.css
style.tail = '\n\n'
Parser.save(self)
def populate_toc(self, toc):
if self.level >= self.opts.max_toc_recursion:
return
def collect_font_statistics(self): def collect_font_statistics(self):
''' '''
Collect font statistics to figure out the base font size used in this Collect font statistics to figure out the base font size used in this
@ -50,8 +71,9 @@ class HTMLProcessor(Parser):
def split(self): def split(self):
''' Split into individual flows to accommodate Adobe's incompetence ''' ''' Split into individual flows to accommodate Adobe's incompetence '''
# TODO: Split on page breaks, keeping track of anchors (a.name and id) # TODO: Only split file larger than 300K (as specified in profile)
# and preserving tree structure so that CSS continues to apply # Split on page breaks first and then on <h1-6> tags and then on
# <div> and finally on <p>.
pass pass

View File

@ -447,6 +447,7 @@ class Parser(PreProcessor, LoggingInterface):
css.append('#%s {%s}'%(id, 'page-break-before:always')) css.append('#%s {%s}'%(id, 'page-break-before:always'))
self.raw_css = '\n\n'.join(css) self.raw_css = '\n\n'.join(css)
self.css = unicode(self.raw_css)
# TODO: Figure out what to do about CSS imports from linked stylesheets # TODO: Figure out what to do about CSS imports from linked stylesheets
def config(defaults=None, config_name='html', def config(defaults=None, config_name='html',

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, glob, sys import os, glob
from urlparse import urlparse from urlparse import urlparse
from urllib import unquote from urllib import unquote