mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
IGN:...
This commit is contained in:
parent
51a0ce414a
commit
bc115198c7
@ -5,6 +5,7 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
import os, sys, re, shutil, cStringIO
|
import os, sys, re, shutil, cStringIO
|
||||||
from lxml.etree import XPath
|
from lxml.etree import XPath
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.ebooks.html import Parser, get_text, merge_metadata, get_filelist,\
|
from calibre.ebooks.html import Parser, get_text, merge_metadata, get_filelist,\
|
||||||
opf_traverse, create_metadata, rebase_toc
|
opf_traverse, create_metadata, rebase_toc
|
||||||
@ -15,7 +16,7 @@ from calibre.ebooks.metadata import MetaInformation
|
|||||||
|
|
||||||
class HTMLProcessor(Parser):
|
class HTMLProcessor(Parser):
|
||||||
|
|
||||||
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles):
|
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, toc=None):
|
||||||
Parser.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
|
Parser.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
|
||||||
name='html2epub')
|
name='html2epub')
|
||||||
if opts.verbose > 2:
|
if opts.verbose > 2:
|
||||||
@ -26,6 +27,9 @@ class HTMLProcessor(Parser):
|
|||||||
if opts.verbose > 2:
|
if opts.verbose > 2:
|
||||||
self.debug_tree('nocss')
|
self.debug_tree('nocss')
|
||||||
|
|
||||||
|
if toc is not None:
|
||||||
|
self.populate_toc(toc)
|
||||||
|
|
||||||
self.collect_font_statistics()
|
self.collect_font_statistics()
|
||||||
|
|
||||||
self.split()
|
self.split()
|
||||||
@ -37,6 +41,23 @@ class HTMLProcessor(Parser):
|
|||||||
style += ';page-break-before: always'
|
style += ';page-break-before: always'
|
||||||
elem.set(style, style)
|
elem.set(style, style)
|
||||||
|
|
||||||
|
def save(self):
|
||||||
|
head = self.root.xpath('//head')
|
||||||
|
if head:
|
||||||
|
head = head[0]
|
||||||
|
else:
|
||||||
|
head = self.root.xpath('//body')
|
||||||
|
head = head[0] if head else self.root
|
||||||
|
style = etree.SubElement(head, 'style', attrib={'type':'text/css'})
|
||||||
|
style.text='\n'+self.css
|
||||||
|
style.tail = '\n\n'
|
||||||
|
Parser.save(self)
|
||||||
|
|
||||||
|
def populate_toc(self, toc):
|
||||||
|
if self.level >= self.opts.max_toc_recursion:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
def collect_font_statistics(self):
|
def collect_font_statistics(self):
|
||||||
'''
|
'''
|
||||||
Collect font statistics to figure out the base font size used in this
|
Collect font statistics to figure out the base font size used in this
|
||||||
@ -50,8 +71,9 @@ class HTMLProcessor(Parser):
|
|||||||
|
|
||||||
def split(self):
|
def split(self):
|
||||||
''' Split into individual flows to accommodate Adobe's incompetence '''
|
''' Split into individual flows to accommodate Adobe's incompetence '''
|
||||||
# TODO: Split on page breaks, keeping track of anchors (a.name and id)
|
# TODO: Only split file larger than 300K (as specified in profile)
|
||||||
# and preserving tree structure so that CSS continues to apply
|
# Split on page breaks first and then on <h1-6> tags and then on
|
||||||
|
# <div> and finally on <p>.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@ -447,6 +447,7 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
css.append('#%s {%s}'%(id, 'page-break-before:always'))
|
css.append('#%s {%s}'%(id, 'page-break-before:always'))
|
||||||
|
|
||||||
self.raw_css = '\n\n'.join(css)
|
self.raw_css = '\n\n'.join(css)
|
||||||
|
self.css = unicode(self.raw_css)
|
||||||
# TODO: Figure out what to do about CSS imports from linked stylesheets
|
# TODO: Figure out what to do about CSS imports from linked stylesheets
|
||||||
|
|
||||||
def config(defaults=None, config_name='html',
|
def config(defaults=None, config_name='html',
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
import os, glob, sys
|
import os, glob
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user