This commit is contained in:
Kovid Goyal 2015-03-19 10:27:33 +05:30
parent ca26df381a
commit c981e4b50d

View File

@ -18,8 +18,8 @@
#
# Contributor(s):
#
#import pdb
#pdb.set_trace()
# import pdb
# pdb.set_trace()
from xml.sax import handler
from xml.sax.saxutils import escape, quoteattr
from xml.dom import Node
@ -55,6 +55,7 @@ if False: # Added by Kovid
# In ODF a style can have a parent, these parents can be chained.
class StyleToCSS:
""" The purpose of the StyleToCSS class is to contain the rules to convert
ODF styles to CSS2. Since it needs the generic fonts, it would probably
make sense to also contain the Styles in a dict as well..
@ -76,7 +77,7 @@ class StyleToCSS:
(FONS,u"border-right"): self.c_fo,
(FONS,u"border-top"): self.c_fo,
(FONS,u"break-after"): self.c_break, # Added by Kovid
(FONS,u"break-before"): self.c_break,# Added by Kovid
(FONS,u"break-before"): self.c_break, # Added by Kovid
(FONS,u"color"): self.c_fo,
(FONS,u"font-family"): self.c_fo,
(FONS,u"font-size"): self.c_fo,
@ -119,12 +120,18 @@ class StyleToCSS:
This method put the font and fallback into a dictionary
"""
htmlgeneric = "sans-serif"
if generic == "roman": htmlgeneric = "serif"
elif generic == "swiss": htmlgeneric = "sans-serif"
elif generic == "modern": htmlgeneric = "monospace"
elif generic == "decorative": htmlgeneric = "sans-serif"
elif generic == "script": htmlgeneric = "monospace"
elif generic == "system": htmlgeneric = "serif"
if generic == "roman":
htmlgeneric = "serif"
elif generic == "swiss":
htmlgeneric = "sans-serif"
elif generic == "modern":
htmlgeneric = "monospace"
elif generic == "decorative":
htmlgeneric = "sans-serif"
elif generic == "script":
htmlgeneric = "monospace"
elif generic == "system":
htmlgeneric = "serif"
self.fontdict[name] = (family, htmlgeneric)
def c_drawfillimage(self, ruleset, sdict, rule, val):
@ -159,8 +166,10 @@ class StyleToCSS:
def c_text_align(self, ruleset, sdict, rule, align):
""" Text align """
if align == "start": align = "left"
if align == "end": align = "right"
if align == "start":
align = "left"
if align == "end":
align = "right"
sdict['text-align'] = align
def c_fn(self, ruleset, sdict, rule, fontstyle):
@ -168,7 +177,7 @@ class StyleToCSS:
A generic font can be found in two ways. In a <style:font-face>
element or as a font-family-generic attribute in text-properties.
"""
generic = ruleset.get((STYLENS,'font-family-generic') )
generic = ruleset.get((STYLENS,'font-family-generic'))
if generic is not None:
self.save_font(fontstyle, fontstyle, generic)
family, htmlgeneric = self.fontdict.get(fontstyle, (fontstyle, 'serif'))
@ -205,7 +214,7 @@ class StyleToCSS:
sdict['vertical-align'] = textpos[0]
def c_hp(self, ruleset, sdict, rule, hpos):
#FIXME: Frames wrap-style defaults to 'parallel', graphics to 'none'.
# FIXME: Frames wrap-style defaults to 'parallel', graphics to 'none'.
# It is properly set in the parent-styles, but the program doesn't
# collect the information.
wrap = ruleset.get((STYLENS,'wrap'),'parallel')
@ -213,23 +222,23 @@ class StyleToCSS:
if hpos == "center":
sdict['margin-left'] = "auto"
sdict['margin-right'] = "auto"
# else:
# # force it to be *something* then delete it
# sdict['margin-left'] = sdict['margin-right'] = ''
# del sdict['margin-left'], sdict['margin-right']
# else:
# # force it to be *something* then delete it
# sdict['margin-left'] = sdict['margin-right'] = ''
# del sdict['margin-left'], sdict['margin-right']
if hpos in ("right","outside"):
if wrap in ( "left", "parallel","dynamic"):
if wrap in ("left", "parallel","dynamic"):
sdict['float'] = "right"
elif wrap == "run-through":
sdict['position'] = "absolute" # Simulate run-through
sdict['top'] = "0"
sdict['right'] = "0";
sdict['right'] = "0"
else: # No wrapping
sdict['margin-left'] = "auto"
sdict['margin-right'] = "0px"
elif hpos in ("left", "inside"):
if wrap in ( "right", "parallel","dynamic"):
if wrap in ("right", "parallel","dynamic"):
sdict['float'] = "left"
elif wrap == "run-through":
sdict['position'] = "absolute" # Simulate run-through
@ -239,11 +248,11 @@ class StyleToCSS:
sdict['margin-left'] = "0px"
sdict['margin-right'] = "auto"
elif hpos in ("from-left", "from-inside"):
if wrap in ( "right", "parallel"):
if wrap in ("right", "parallel"):
sdict['float'] = "left"
else:
sdict['position'] = "relative" # No wrapping
if ruleset.has_key( (SVGNS,'x') ):
if (SVGNS,'x') in ruleset:
sdict['left'] = ruleset[(SVGNS,'x')]
def c_page_width(self, ruleset, sdict, rule, val):
@ -279,18 +288,19 @@ class StyleToCSS:
if rule[0] == '':
sdict[rule[1]] = val
continue
method = self.ruleconversions.get(rule, None )
method = self.ruleconversions.get(rule, None)
if method:
method(ruleset, sdict, rule, val)
return sdict
class TagStack:
def __init__(self):
self.stack = []
def push(self, tag, attrs):
self.stack.append( (tag, attrs) )
self.stack.append((tag, attrs))
def pop(self):
item = self.stack.pop()
@ -303,13 +313,14 @@ class TagStack:
def rfindattr(self, attr):
""" Find a tag with the given attribute """
for tag, attrs in self.stack:
if attrs.has_key(attr):
if attr in attrs:
return attrs[attr]
return None
def count_tags(self, tag):
c = 0
for ttag, tattrs in self.stack:
if ttag == tag: c = c + 1
if ttag == tag:
c = c + 1
return c
special_styles = {
@ -335,12 +346,13 @@ special_styles = {
# 'P-Text_20_body':'p'
}
#-----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
#
# ODFCONTENTHANDLER
#
#-----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
class ODF2XHTML(handler.ContentHandler):
""" The ODF2XHTML parses an ODF file and produces XHTML"""
def __init__(self, generate_css=True, embedable=False):
@ -459,7 +471,6 @@ class ODF2XHTML(handler.ContentHandler):
self.elements[(OFFICENS, u"presentation")] = (None,None)
self.elements[(OFFICENS, u"document-content")] = (None,None)
def add_style_file(self, stylefilename, media=None):
""" Add a link to an external style file.
Also turns of the embedding of styles in the HTML
@ -506,7 +517,6 @@ class ODF2XHTML(handler.ContentHandler):
# Tags from meta.xml
self.metatags = []
def writeout(self, s):
if s != '':
self._wfunc(s)
@ -526,14 +536,14 @@ class ODF2XHTML(handler.ContentHandler):
self.writeout("<%s>" % tag)
else:
self.writeout("<%s %s>" % (tag, " ".join(a)))
if block == True:
if block:
self.writeout("\n")
def closetag(self, tag, block=True):
""" Close an open HTML tag """
self.htmlstack.pop()
self.writeout("</%s>" % tag)
if block == True:
if block:
self.writeout("\n")
def emptytag(self, tag, attrs={}):
@ -542,34 +552,34 @@ class ODF2XHTML(handler.ContentHandler):
a.append('''%s=%s''' % (key, quoteattr(val)))
self.writeout("<%s %s/>\n" % (tag, " ".join(a)))
#--------------------------------------------------
# --------------------------------------------------
# Interface to parser
#--------------------------------------------------
# --------------------------------------------------
def characters(self, data):
if self.processelem and self.processcont:
self.data.append(data)
def startElementNS(self, tag, qname, attrs):
self.pstack.append( (self.processelem, self.processcont) )
self.pstack.append((self.processelem, self.processcont))
if self.processelem:
method = self.elements.get(tag, (None, None) )[0]
method = self.elements.get(tag, (None, None))[0]
if method:
self.handle_starttag(tag, method, attrs)
else:
self.unknown_starttag(tag,attrs)
self.tagstack.push( tag, attrs )
self.tagstack.push(tag, attrs)
def endElementNS(self, tag, qname):
stag, attrs = self.tagstack.pop()
if self.processelem:
method = self.elements.get(tag, (None, None) )[1]
method = self.elements.get(tag, (None, None))[1]
if method:
self.handle_endtag(tag, attrs, method)
else:
self.unknown_endtag(tag, attrs)
self.processelem, self.processcont = self.pstack.pop()
#--------------------------------------------------
# --------------------------------------------------
def handle_starttag(self, tag, method, attrs):
method(tag,attrs)
@ -604,27 +614,27 @@ class ODF2XHTML(handler.ContentHandler):
def get_anchor(self, name):
""" Create a unique anchor id for a href name """
if not self.anchors.has_key(name):
if name not in self.anchors:
# Changed by Kovid
self.anchors[name] = "anchor%d" % (len(self.anchors) + 1)
return self.anchors.get(name)
#--------------------------------------------------
# --------------------------------------------------
def purgedata(self):
self.data = []
#-----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
#
# Handle meta data
#
#-----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def e_dc_title(self, tag, attrs):
""" Get the title from the meta data and create a HTML <title>
"""
self.title = ''.join(self.data)
#self.metatags.append('<title>%s</title>\n' % escape(self.title))
# self.metatags.append('<title>%s</title>\n' % escape(self.title))
self.data = []
def e_dc_metatag(self, tag, attrs):
@ -652,9 +662,9 @@ class ODF2XHTML(handler.ContentHandler):
"""
anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
htmltag = 'div'
name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
name = "G-" + attrs.get((DRAWNS,'style-name'), "")
if name == 'G-':
name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "")
name = "PR-" + attrs.get((PRESENTATIONNS,'style-name'), "")
name = name.replace(".","_")
if anchor_type == "paragraph":
style = 'position:absolute;'
@ -665,13 +675,13 @@ class ODF2XHTML(handler.ContentHandler):
style = ''
else:
style = "position: absolute;"
if attrs.has_key( (SVGNS,"width") ):
if (SVGNS,"width") in attrs:
style = style + "width:" + attrs[(SVGNS,"width")] + ";"
if attrs.has_key( (SVGNS,"height") ):
if (SVGNS,"height") in attrs:
style = style + "height:" + attrs[(SVGNS,"height")] + ";"
if attrs.has_key( (SVGNS,"x") ):
if (SVGNS,"x") in attrs:
style = style + "left:" + attrs[(SVGNS,"x")] + ";"
if attrs.has_key( (SVGNS,"y") ):
if (SVGNS,"y") in attrs:
style = style + "top:" + attrs[(SVGNS,"y")] + ";"
if self.generate_css:
self.opentag(htmltag, {'class': name, 'style': style})
@ -688,9 +698,9 @@ class ODF2XHTML(handler.ContentHandler):
"""
anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
htmltag = 'div'
name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
name = "G-" + attrs.get((DRAWNS,'style-name'), "")
if name == 'G-':
name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "")
name = "PR-" + attrs.get((PRESENTATIONNS,'style-name'), "")
name = name.replace(".","_")
if anchor_type == "paragraph":
style = 'position:relative;'
@ -701,13 +711,13 @@ class ODF2XHTML(handler.ContentHandler):
style = ''
else:
style = "position:absolute;"
if attrs.has_key( (SVGNS,"width") ):
if (SVGNS,"width") in attrs:
style = style + "width:" + attrs[(SVGNS,"width")] + ";"
if attrs.has_key( (SVGNS,"height") ):
if (SVGNS,"height") in attrs:
style = style + "height:" + attrs[(SVGNS,"height")] + ";"
if attrs.has_key( (SVGNS,"x") ):
if (SVGNS,"x") in attrs:
style = style + "left:" + attrs[(SVGNS,"x")] + ";"
if attrs.has_key( (SVGNS,"y") ):
if (SVGNS,"y") in attrs:
style = style + "top:" + attrs[(SVGNS,"y")] + ";"
if self.generate_css:
self.opentag(htmltag, {'class': name, 'style': style})
@ -720,7 +730,7 @@ class ODF2XHTML(handler.ContentHandler):
self.closetag('div')
def s_draw_fill_image(self, tag, attrs):
name = attrs.get( (DRAWNS,'name'), "NoName")
name = attrs.get((DRAWNS,'name'), "NoName")
imghref = attrs[(XLINKNS,"href")]
imghref = self.rewritelink(imghref)
self.cs.fillimages[name] = imghref
@ -738,7 +748,7 @@ class ODF2XHTML(handler.ContentHandler):
anchor_type = parent.get((TEXTNS,'anchor-type'))
imghref = attrs[(XLINKNS,"href")]
imghref = self.rewritelink(imghref)
htmlattrs = {'alt':"", 'src':imghref }
htmlattrs = {'alt':"", 'src':imghref}
if self.generate_css:
if anchor_type != "char":
htmlattrs['style'] = "display: block;"
@ -766,8 +776,8 @@ class ODF2XHTML(handler.ContentHandler):
class_id = attrs[(DRAWNS,"class-id")]
except KeyError: # Added by Kovid to ignore <draw> without the right
return # attributes
if class_id and class_id.lower() == "00020803-0000-0000-c000-000000000046": ## Microsoft Graph 97 Chart
tagattrs = { 'name':'object_ole_graph', 'class':'ole-graph' }
if class_id and class_id.lower() == "00020803-0000-0000-c000-000000000046": # Microsoft Graph 97 Chart
tagattrs = {'name':'object_ole_graph', 'class':'ole-graph'}
self.opentag('a', tagattrs)
self.closetag('a', tagattrs)
@ -776,13 +786,13 @@ class ODF2XHTML(handler.ContentHandler):
Therefore if you convert a ODP file, you get a series of <fieldset>s.
Override this for your own purpose.
"""
name = attrs.get( (DRAWNS,'name'), "NoName")
stylename = attrs.get( (DRAWNS,'style-name'), "")
name = attrs.get((DRAWNS,'name'), "NoName")
stylename = attrs.get((DRAWNS,'style-name'), "")
stylename = stylename.replace(".","_")
masterpage = attrs.get( (DRAWNS,'master-page-name'),"")
masterpage = attrs.get((DRAWNS,'master-page-name'),"")
masterpage = masterpage.replace(".","_")
if self.generate_css:
self.opentag('fieldset', {'class':"DP-%s MP-%s" % (stylename, masterpage) })
self.opentag('fieldset', {'class':"DP-%s MP-%s" % (stylename, masterpage)})
else:
self.opentag('fieldset')
self.opentag('legend')
@ -794,7 +804,7 @@ class ODF2XHTML(handler.ContentHandler):
def s_draw_textbox(self, tag, attrs):
style = ''
if attrs.has_key( (FONS,"min-height") ):
if (FONS,"min-height") in attrs:
style = style + "min-height:" + attrs[(FONS,"min-height")] + ";"
self.opentag('div')
# self.opentag('div', {'style': style})
@ -837,14 +847,14 @@ dl.notes dd:last-of-type { page-break-after: avoid }
for name in self.stylestack:
styles = self.styledict.get(name)
# Preload with the family's default style
if styles.has_key('__style-family') and self.styledict.has_key(styles['__style-family']):
if '__style-family' in styles and styles['__style-family'] in self.styledict:
familystyle = self.styledict[styles['__style-family']].copy()
del styles['__style-family']
for style, val in styles.items():
familystyle[style] = val
styles = familystyle
# Resolve the remaining parent styles
while styles.has_key('__parent-style-name') and self.styledict.has_key(styles['__parent-style-name']):
while '__parent-style-name' in styles and styles['__parent-style-name'] in self.styledict:
parentstyle = self.styledict[styles['__parent-style-name']].copy()
del styles['__parent-style-name']
for style, val in styles.items():
@ -864,7 +874,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
css_styles[css2] = [name]
def filter_margins(css2):
names = { k for k, v in css2 }
names = {k for k, v in css2}
ignore = set()
if {'margin-left', 'margin-right', 'margin-top',
'margin-bottom'}.issubset(names):
@ -881,7 +891,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
for css2, names in css_styles.iteritems():
self.writeout("%s {\n" % ', '.join(names))
for style, val in filter_margins(css2):
self.writeout("\t%s: %s;\n" % (style, val) )
self.writeout("\t%s: %s;\n" % (style, val))
self.writeout("}\n")
def generate_footnotes(self):
@ -895,12 +905,12 @@ dl.notes dd:last-of-type { page-break-after: avoid }
for key in range(1,self.currentnote+1):
note = self.notedict[key]
# for key,note in self.notedict.items():
self.opentag('dt', { 'id':"footnote-%d" % key })
self.opentag('dt', {'id':"footnote-%d" % key})
# self.opentag('sup')
# self.writeout(escape(note['citation']))
# self.closetag('sup', False)
self.writeout('[')
self.opentag('a', { 'href': "#citation-%d" % key })
self.opentag('a', {'href': "#citation-%d" % key})
self.writeout(u"\u2190%d".encode('utf-8') % key)
self.closetag('a')
self.writeout(u']\xa0'.encode('utf-8'))
@ -922,7 +932,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
self.writeout('"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
self.opentag('html', {'xmlns':"http://www.w3.org/1999/xhtml"}, True)
self.opentag('head', block=True)
self.emptytag('meta', { 'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"})
self.emptytag('meta', {'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"})
for metaline in self.metatags:
self.writeout(metaline)
self.writeout('<title>%s</title>\n' % escape(self.title))
@ -939,8 +949,8 @@ dl.notes dd:last-of-type { page-break-after: avoid }
for the 'paragraph'. We therefore force a standard when we see
it is a presentation
"""
self.styledict['p'] = {(FONS,u'font-size'): u"24pt" }
self.styledict['presentation'] = {(FONS,u'font-size'): u"24pt" }
self.styledict['p'] = {(FONS,u'font-size'): u"24pt"}
self.styledict['presentation'] = {(FONS,u'font-size'): u"24pt"}
self.html_body(tag, attrs)
def e_office_presentation(self, tag, attrs):
@ -959,7 +969,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
def s_office_text(self, tag, attrs):
""" OpenDocument text """
self.styledict['frame'] = { (STYLENS,'wrap'): u'parallel'}
self.styledict['frame'] = {(STYLENS,'wrap'): u'parallel'}
self.html_body(tag, attrs)
def e_office_text(self, tag, attrs):
@ -976,11 +986,10 @@ dl.notes dd:last-of-type { page-break-after: avoid }
for key,attr in attrs.items():
self.styledict[self.currentstyle][key] = attr
familymap = {'frame':'frame', 'paragraph':'p', 'presentation':'presentation',
'text':'span','section':'div',
'table':'table','table-cell':'td','table-column':'col',
'table-row':'tr','graphic':'graphic' }
'table-row':'tr','graphic':'graphic'}
def s_style_default_style(self, tag, attrs):
""" A default style is like a style on an HTML tag
@ -1003,11 +1012,11 @@ dl.notes dd:last-of-type { page-break-after: avoid }
"""
name = attrs[(STYLENS,"name")]
family = attrs[(SVGNS,"font-family")]
generic = attrs.get( (STYLENS,'font-family-generic'),"" )
generic = attrs.get((STYLENS,'font-family-generic'),"")
self.cs.save_font(name, family, generic)
def s_style_footer(self, tag, attrs):
self.opentag('div', { 'id':"footer" })
self.opentag('div', {'id':"footer"})
self.purgedata()
def e_style_footer(self, tag, attrs):
@ -1021,7 +1030,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
self.styledict[self.currentstyle] = {}
def s_style_header(self, tag, attrs):
self.opentag('div', { 'id':"header" })
self.opentag('div', {'id':"header"})
self.purgedata()
def e_style_header(self, tag, attrs):
@ -1067,10 +1076,10 @@ dl.notes dd:last-of-type { page-break-after: avoid }
self.stylestack.append(self.currentstyle)
self.styledict[self.currentstyle] = {('','position'):'relative'}
# Then load the pagelayout style if we find it
pagelayout = attrs.get( (STYLENS,'page-layout-name'), None)
pagelayout = attrs.get((STYLENS,'page-layout-name'), None)
if pagelayout:
pagelayout = ".PL-" + pagelayout
if self.styledict.has_key( pagelayout ):
if pagelayout in self.styledict:
styles = self.styledict[pagelayout]
for style, val in styles.items():
self.styledict[self.currentstyle][style] = val
@ -1082,7 +1091,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
_familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR',
'text':'S', 'section':'D',
'table':'T', 'table-cell':'TD', 'table-column':'TC',
'table-row':'TR', 'graphic':'G' }
'table-row':'TR', 'graphic':'G'}
def s_style_style(self, tag, attrs):
""" Collect the formatting for the style.
@ -1097,10 +1106,10 @@ dl.notes dd:last-of-type { page-break-after: avoid }
htmlfamily = self.familymap.get(family,'unknown')
sfamily = self._familyshort.get(family,'X')
name = "%s%s-%s" % (self.autoprefix, sfamily, name)
parent = attrs.get( (STYLENS,'parent-style-name') )
parent = attrs.get((STYLENS,'parent-style-name'))
self.currentstyle = special_styles.get(name,"."+name)
self.stylestack.append(self.currentstyle)
if not self.styledict.has_key(self.currentstyle):
if self.currentstyle not in self.styledict:
self.styledict[self.currentstyle] = {}
self.styledict[self.currentstyle]['__style-family'] = htmlfamily
@ -1109,7 +1118,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
if parent:
parent = "%s-%s" % (sfamily, parent)
parent = special_styles.get(parent, "."+parent)
if self.styledict.has_key( parent ):
if parent in self.styledict:
styles = self.styledict[parent]
for style, val in styles.items():
self.styledict[self.currentstyle][style] = val
@ -1124,10 +1133,10 @@ dl.notes dd:last-of-type { page-break-after: avoid }
def s_table_table(self, tag, attrs):
""" Start a table
"""
c = attrs.get( (TABLENS,'style-name'), None)
c = attrs.get((TABLENS,'style-name'), None)
if c and self.generate_css:
c = c.replace(".","_")
self.opentag('table',{ 'class': "T-%s" % c })
self.opentag('table',{'class': "T-%s" % c})
else:
self.opentag('table')
self.purgedata()
@ -1141,17 +1150,17 @@ dl.notes dd:last-of-type { page-break-after: avoid }
def s_table_table_cell(self, tag, attrs):
""" Start a table cell """
#FIXME: number-columns-repeated § 8.1.3
#repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
# FIXME: number-columns-repeated § 8.1.3
# repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
htmlattrs = {}
rowspan = attrs.get( (TABLENS,'number-rows-spanned') )
rowspan = attrs.get((TABLENS,'number-rows-spanned'))
if rowspan:
htmlattrs['rowspan'] = rowspan
colspan = attrs.get( (TABLENS,'number-columns-spanned') )
colspan = attrs.get((TABLENS,'number-columns-spanned'))
if colspan:
htmlattrs['colspan'] = colspan
c = attrs.get( (TABLENS,'style-name') )
c = attrs.get((TABLENS,'style-name'))
if c:
htmlattrs['class'] = 'TD-%s' % c.replace(".","_")
self.opentag('td', htmlattrs)
@ -1165,8 +1174,8 @@ dl.notes dd:last-of-type { page-break-after: avoid }
def s_table_table_column(self, tag, attrs):
""" Start a table column """
c = attrs.get( (TABLENS,'style-name'), None)
repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
c = attrs.get((TABLENS,'style-name'), None)
repeated = int(attrs.get((TABLENS,'number-columns-repeated'), 1))
htmlattrs = {}
if c:
htmlattrs['class'] = "TC-%s" % c.replace(".","_")
@ -1176,8 +1185,8 @@ dl.notes dd:last-of-type { page-break-after: avoid }
def s_table_table_row(self, tag, attrs):
""" Start a table row """
#FIXME: table:number-rows-repeated
c = attrs.get( (TABLENS,'style-name'), None)
# FIXME: table:number-rows-repeated
c = attrs.get((TABLENS,'style-name'), None)
htmlattrs = {}
if c:
htmlattrs['class'] = "TR-%s" % c.replace(".","_")
@ -1225,8 +1234,10 @@ dl.notes dd:last-of-type { page-break-after: avoid }
def s_text_h(self, tag, attrs):
""" Headings start """
level = int(attrs[(TEXTNS,'outline-level')])
if level > 6: level = 6 # Heading levels go only to 6 in XHTML
if level < 1: level = 1
if level > 6:
level = 6 # Heading levels go only to 6 in XHTML
if level < 1:
level = 1
self.headinglevels[level] = self.headinglevels[level] + 1
name = self.classname(attrs)
for x in range(level + 1,10):
@ -1235,7 +1246,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
if special or not self.generate_css:
self.opentag('h%s' % level)
else:
self.opentag('h%s' % level, {'class':"P-%s" % name })
self.opentag('h%s' % level, {'class':"P-%s" % name})
self.purgedata()
def e_text_h(self, tag, attrs):
@ -1245,19 +1256,22 @@ dl.notes dd:last-of-type { page-break-after: avoid }
"""
self.writedata()
level = int(attrs[(TEXTNS,'outline-level')])
if level > 6: level = 6 # Heading levels go only to 6 in XHTML
if level < 1: level = 1
if level > 6:
level = 6 # Heading levels go only to 6 in XHTML
if level < 1:
level = 1
lev = self.headinglevels[1:level+1]
outline = '.'.join(map(str,lev) )
outline = '.'.join(map(str,lev))
heading = ''.join(self.data)
if self.title == '': self.title = heading
if self.title == '':
self.title = heading
# Changed by Kovid
tail = ''.join(self.data)
anchor = self.get_anchor("%s.%s" % ( outline, tail))
anchor = self.get_anchor("%s.%s" % (outline, tail))
anchor2 = self.get_anchor(tail) # Added by kovid to fix #7506
self.opentag('a', {'id': anchor} )
self.opentag('a', {'id': anchor})
self.closetag('a', False)
self.opentag('a', {'id': anchor2} )
self.opentag('a', {'id': anchor2})
self.closetag('a', False)
self.closetag('h%s' % level)
self.purgedata()
@ -1273,7 +1287,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
To know which level we're at, we have to count the number
of <text:list> elements on the tagstack.
"""
name = attrs.get( (TEXTNS,'style-name') )
name = attrs.get((TEXTNS,'style-name'))
level = self.tagstack.count_tags(tag) + 1
if name:
name = name.replace(".","_")
@ -1281,10 +1295,10 @@ dl.notes dd:last-of-type { page-break-after: avoid }
# FIXME: If a list is contained in a table cell or text box,
# the list level must return to 1, even though the table or
# textbox itself may be nested within another list.
name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
name = self.tagstack.rfindattr((TEXTNS,'style-name'))
list_class = "%s_%d" % (name, level)
if self.generate_css:
self.opentag('%s' % self.listtypes.get(list_class,'ul'), {'class': list_class })
self.opentag('%s' % self.listtypes.get(list_class,'ul'), {'class': list_class})
else:
self.opentag('%s' % self.listtypes.get(list_class,'ul'))
self.purgedata()
@ -1292,7 +1306,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
def e_text_list(self, tag, attrs):
""" End a list """
self.writedata()
name = attrs.get( (TEXTNS,'style-name') )
name = attrs.get((TEXTNS,'style-name'))
level = self.tagstack.count_tags(tag) + 1
if name:
name = name.replace(".","_")
@ -1300,7 +1314,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
# FIXME: If a list is contained in a table cell or text box,
# the list level must return to 1, even though the table or
# textbox itself may be nested within another list.
name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
name = self.tagstack.rfindattr((TEXTNS,'style-name'))
list_class = "%s_%d" % (name, level)
self.closetag(self.listtypes.get(list_class,'ul'))
self.purgedata()
@ -1321,12 +1335,12 @@ dl.notes dd:last-of-type { page-break-after: avoid }
to a particular character, so we just go through
the available glyphs
"""
name = self.tagstack.rfindattr( (STYLENS,'name') )
name = self.tagstack.rfindattr((STYLENS,'name'))
level = attrs[(TEXTNS,'level')]
self.prevstyle = self.currentstyle
list_class = "%s_%s" % (name, level)
self.listtypes[list_class] = 'ul'
self.currentstyle = ".%s_%s" % ( name.replace(".","_"), level)
self.currentstyle = ".%s_%s" % (name.replace(".","_"), level)
self.stylestack.append(self.currentstyle)
self.styledict[self.currentstyle] = {}
@ -1341,19 +1355,25 @@ dl.notes dd:last-of-type { page-break-after: avoid }
def s_text_list_level_style_number(self, tag, attrs):
name = self.tagstack.stackparent()[(STYLENS,'name')]
level = attrs[(TEXTNS,'level')]
num_format = attrs.get( (STYLENS,'name'),"1")
num_format = attrs.get((STYLENS,'name'),"1")
list_class = "%s_%s" % (name, level)
self.prevstyle = self.currentstyle
self.currentstyle = ".%s_%s" % ( name.replace(".","_"), level)
self.currentstyle = ".%s_%s" % (name.replace(".","_"), level)
self.listtypes[list_class] = 'ol'
self.stylestack.append(self.currentstyle)
self.styledict[self.currentstyle] = {}
if num_format == "1": listtype = "decimal"
elif num_format == "I": listtype = "upper-roman"
elif num_format == "i": listtype = "lower-roman"
elif num_format == "A": listtype = "upper-alpha"
elif num_format == "a": listtype = "lower-alpha"
else: listtype = "decimal"
if num_format == "1":
listtype = "decimal"
elif num_format == "I":
listtype = "upper-roman"
elif num_format == "i":
listtype = "lower-roman"
elif num_format == "A":
listtype = "upper-alpha"
elif num_format == "a":
listtype = "lower-alpha"
else:
listtype = "decimal"
self.styledict[self.currentstyle][('','list-style-type')] = listtype
def e_text_list_level_style_number(self, tag, attrs):
@ -1406,7 +1426,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
"""
htmlattrs = {}
specialtag = "p"
c = attrs.get( (TEXTNS,'style-name'), None)
c = attrs.get((TEXTNS,'style-name'), None)
if c:
c = c.replace(".","_")
specialtag = special_styles.get("P-"+c)
@ -1421,7 +1441,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
""" End Paragraph
"""
specialtag = "p"
c = attrs.get( (TEXTNS,'style-name'), None)
c = attrs.get((TEXTNS,'style-name'), None)
if c:
c = c.replace(".","_")
specialtag = special_styles.get("P-"+c)
@ -1454,7 +1474,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
typically used to properties of the text.
"""
self.writedata()
c = attrs.get( (TEXTNS,'style-name'), None)
c = attrs.get((TEXTNS,'style-name'), None)
htmlattrs = {}
# Changed by Kovid to handle inline special styles defined on <text:span> tags.
# Apparently LibreOffice does this.
@ -1473,7 +1493,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
def e_text_span(self, tag, attrs):
""" End the <text:span> """
self.writedata()
c = attrs.get( (TEXTNS,'style-name'), None)
c = attrs.get((TEXTNS,'style-name'), None)
# Changed by Kovid to handle inline special styles defined on <text:span> tags.
# Apparently LibreOffice does this.
special = 'span'
@ -1507,11 +1527,11 @@ dl.notes dd:last-of-type { page-break-after: avoid }
self.purgedata()
#-----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
#
# Reading the file
#
#-----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def load(self, odffile):
""" Loads a document into the parser and parses it.
@ -1535,7 +1555,6 @@ dl.notes dd:last-of-type { page-break-after: avoid }
if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
self.characters(unicode(node))
def odf2xhtml(self, odffile):
""" Load a file and return the XHTML
"""
@ -1543,7 +1562,8 @@ dl.notes dd:last-of-type { page-break-after: avoid }
return self.xhtml()
def _wlines(self,s):
if s != '': self.lines.append(s)
if s != '':
self.lines.append(s)
def xhtml(self):
""" Returns the xhtml
@ -1551,7 +1571,8 @@ dl.notes dd:last-of-type { page-break-after: avoid }
return ''.join(self.lines)
def _writecss(self, s):
if s != '': self._csslines.append(s)
if s != '':
self._csslines.append(s)
def _writenothing(self, s):
pass
@ -1583,6 +1604,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
class ODF2XHTMLembedded(ODF2XHTML):
""" The ODF2XHTML parses an ODF file and produces XHTML"""
def __init__(self, lines, generate_css=True, embedable=False):
@ -1625,21 +1647,21 @@ class ODF2XHTMLembedded(ODF2XHTML):
# (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
(OFFICENS, "scripts"):(self.s_ignorexml, None),
(PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
## (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
# (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
# (STYLENS, "default-page-layout"):(self.s_ignorexml, None),
# (STYLENS, "default-style"):(self.s_style_default_style, self.e_style_default_style),
# (STYLENS, "drawing-page-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "font-face"):(self.s_style_font_face, None),
## (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer),
## (STYLENS, "footer-style"):(self.s_style_footer_style, None),
# (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer),
# (STYLENS, "footer-style"):(self.s_style_footer_style, None),
# (STYLENS, "graphic-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "handout-master"):(self.s_ignorexml, None),
## (STYLENS, "header"):(self.s_style_header, self.e_style_header),
## (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None),
## (STYLENS, "header-style"):(self.s_style_header_style, None),
# (STYLENS, "header"):(self.s_style_header, self.e_style_header),
# (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "header-style"):(self.s_style_header_style, None),
# (STYLENS, "master-page"):(self.s_style_master_page, None),
# (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
## (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
# (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
# (STYLENS, "page-layout"):(self.s_ignorexml, None),
# (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "style"):(self.s_style_style, self.e_style_style),
@ -1680,4 +1702,3 @@ class ODF2XHTMLembedded(ODF2XHTML):
(TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, "page-number"):(None, None),
}