From 50ad3d91364c3c802fb917619415b7e0910784c2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 08:33:06 -0700 Subject: [PATCH 01/19] Fix ESPN soccernet feed --- resources/recipes/espn.recipe | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/resources/recipes/espn.recipe b/resources/recipes/espn.recipe index 178dbf27a8..34c772f767 100644 --- a/resources/recipes/espn.recipe +++ b/resources/recipes/espn.recipe @@ -41,7 +41,8 @@ class ESPN(BasicNewsRecipe): ''' - feeds = [('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'), + feeds = [ + ('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'), 'http://sports.espn.go.com/espn/rss/nfl/news', 'http://sports.espn.go.com/espn/rss/nba/news', 'http://sports.espn.go.com/espn/rss/mlb/news', @@ -107,10 +108,11 @@ class ESPN(BasicNewsRecipe): if match and 'soccernet' not in url and 'bassmaster' not in url: return 'http://sports.espn.go.com/espn/print?'+match.group(1)+'&type=story' else: - if match and 'soccernet' in url: - splitlist = url.split("&", 5) - newurl = 'http://soccernet.espn.go.com/print?'+match.group(1)+'&type=story' + '&' + str(splitlist[2] ) - return newurl + if 'soccernet' in url: + match = re.search(r'/id/(\d+)/', url) + if match: + return \ + 'http://soccernet.espn.go.com/print?id=%s&type=story' % match.group(1) #else: # if 'bassmaster' in url: # return url From 2101dcf2b5e0925b42c8af00ddc64243a021e042 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 09:18:10 -0700 Subject: [PATCH 02/19] EPUB Output: Remove unnecessary CSS page breaks as they confuse the latest release of iBooks --- resources/templates/html.css | 5 ----- src/calibre/ebooks/conversion/plumber.py | 4 +++- src/calibre/ebooks/lit/output.py | 3 ++- src/calibre/ebooks/oeb/transforms/flatcss.py | 5 ++++- src/calibre/ebooks/oeb/transforms/split.py | 7 ++++++- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/resources/templates/html.css b/resources/templates/html.css index e9b683ca34..79c80583bf 100644 --- a/resources/templates/html.css +++ b/resources/templates/html.css @@ -391,11 +391,6 @@ noembed, param, link { display: none; } -/* Page breaks at body tags, to help out with LIT-generation */ -body { - page-break-before: always; -} - /* Explicit line-breaks are blocks, sure... */ br { display: block; diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 1d263eb762..9a0c3f3c7f 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -984,7 +984,9 @@ OptionRecommendation(name='sr3_replace', flattener = CSSFlattener(fbase=fbase, fkey=fkey, lineh=line_height, untable=self.output_plugin.file_type in ('mobi','lit'), - unfloat=self.output_plugin.file_type in ('mobi', 'lit')) + unfloat=self.output_plugin.file_type in ('mobi', 'lit'), + page_break_on_body=self.output_plugin.file_type in ('mobi', + 'lit')) flattener(self.oeb, self.opts) self.opts.insert_blank_line = oibl self.opts.remove_paragraph_spacing = orps diff --git a/src/calibre/ebooks/lit/output.py b/src/calibre/ebooks/lit/output.py index 423fb9ce7c..0b07bc7705 100644 --- a/src/calibre/ebooks/lit/output.py +++ b/src/calibre/ebooks/lit/output.py @@ -22,7 +22,8 @@ class LITOutput(OutputFormatPlugin): from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder from calibre.ebooks.lit.writer import LitWriter from calibre.ebooks.oeb.transforms.split import Split - split = Split(split_on_page_breaks=True, max_flow_size=0) + split = Split(split_on_page_breaks=True, max_flow_size=0, + remove_css_pagebreaks=False) split(self.oeb, self.opts) diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index db6bdf0a7a..368f5eb289 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -100,12 +100,13 @@ def FontMapper(sbase=None, dbase=None, dkey=None): class CSSFlattener(object): def __init__(self, fbase=None, fkey=None, lineh=None, unfloat=False, - untable=False): + untable=False, page_break_on_body=False): self.fbase = fbase self.fkey = fkey self.lineh = lineh self.unfloat = unfloat self.untable = untable + self.page_break_on_body = page_break_on_body @classmethod def config(cls, cfg): @@ -139,6 +140,8 @@ class CSSFlattener(object): bs.append('margin-right : %fpt'%\ float(self.context.margin_right)) bs.extend(['padding-left: 0pt', 'padding-right: 0pt']) + if self.page_break_on_body: + bs.extend(['page-break-before: always']) if self.context.change_justification != 'original': bs.append('text-align: '+ self.context.change_justification) body.set('style', '; '.join(bs)) diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index 4633131dc0..69de740ddc 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -38,11 +38,12 @@ class SplitError(ValueError): class Split(object): def __init__(self, split_on_page_breaks=True, page_breaks_xpath=None, - max_flow_size=0): + max_flow_size=0, remove_css_pagebreaks=True): self.split_on_page_breaks = split_on_page_breaks self.page_breaks_xpath = page_breaks_xpath self.max_flow_size = max_flow_size self.page_break_selectors = None + self.remove_css_pagebreaks = remove_css_pagebreaks if self.page_breaks_xpath is not None: self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)] @@ -83,12 +84,16 @@ class Split(object): if before and before != 'avoid': self.page_break_selectors.add((CSSSelector(rule.selectorText), True)) + if self.remove_css_pagebreaks: + rule.style.removeProperty('page-break-before') except: pass try: if after and after != 'avoid': self.page_break_selectors.add((CSSSelector(rule.selectorText), False)) + if self.remove_css_pagebreaks: + rule.style.removeProperty('page-break-after') except: pass page_breaks = set([]) From 019c17973e1907c2e50cbf3ab67e73f008c279e0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 09:21:51 -0700 Subject: [PATCH 03/19] ODT Input: Do not force the background color to white. Fixes #9118 (White background color from OpenOffice to Mobi added) --- src/odf/odf2xhtml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/odf/odf2xhtml.py b/src/odf/odf2xhtml.py index 6e3e753ebb..53a3e87dc2 100644 --- a/src/odf/odf2xhtml.py +++ b/src/odf/odf2xhtml.py @@ -659,7 +659,8 @@ class ODF2XHTML(handler.ContentHandler): self.opentag('style', {'type':"text/css"}, True) self.writeout('/* Date: Thu, 24 Feb 2011 09:26:10 -0700 Subject: [PATCH 04/19] ... --- src/calibre/web/feeds/templates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/web/feeds/templates.py b/src/calibre/web/feeds/templates.py index eefd897614..225a78be5c 100644 --- a/src/calibre/web/feeds/templates.py +++ b/src/calibre/web/feeds/templates.py @@ -136,7 +136,7 @@ class FeedTemplate(Template): head.append(STYLE(style, type='text/css')) if extra_css: head.append(STYLE(extra_css, type='text/css')) - body = BODY(style='page-break-before:always') + body = BODY() body.append(self.get_navbar(f, feeds)) div = DIV( @@ -322,7 +322,7 @@ class TouchscreenFeedTemplate(Template): head.append(STYLE(style, type='text/css')) if extra_css: head.append(STYLE(extra_css, type='text/css')) - body = BODY(style='page-break-before:always') + body = BODY() div = DIV( top_navbar, H2(feed.title, CLASS('feed_title')) From 07d1ca7ec3669ecaa2d202819be3659c5dada94b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 10:38:26 -0700 Subject: [PATCH 05/19] ... --- src/calibre/ebooks/mobi/writer.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 0c33dffef2..2be699e525 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -2256,22 +2256,22 @@ class MobiWriter(object): return sectionIndices, sectionParents def _generate_section_article_indices(self, i, section, entries, sectionIndices, sectionParents): - sectionArticles = list(section.iter())[1:] - # Iterate over the section's articles + sectionArticles = list(section.iter())[1:] + # Iterate over the section's articles - for (j, article) in enumerate(sectionArticles): - # Recompute offset and length for each article - offset, length = self._compute_offset_length(i, article, entries) - if self.opts.verbose > 2 : - self._oeb.logger.info( "article %02d: offset = 0x%06X length = 0x%06X" % (j, offset, length) ) + for (j, article) in enumerate(sectionArticles): + # Recompute offset and length for each article + offset, length = self._compute_offset_length(i, article, entries) + if self.opts.verbose > 2 : + self._oeb.logger.info( "article %02d: offset = 0x%06X length = 0x%06X" % (j, offset, length) ) - ctoc_map_index = i + j + 1 + ctoc_map_index = i + j + 1 - #hasAuthor = self._ctoc_map[ctoc_map_index].get('authorOffset') - #hasDescription = self._ctoc_map[ctoc_map_index].get('descriptionOffset') - mySectionParent = sectionParents[sectionIndices[i-1]] - myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index ) - mySectionParent.addArticle( myNewArticle ) + #hasAuthor = self._ctoc_map[ctoc_map_index].get('authorOffset') + #hasDescription = self._ctoc_map[ctoc_map_index].get('descriptionOffset') + mySectionParent = sectionParents[sectionIndices[i-1]] + myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index ) + mySectionParent.addArticle( myNewArticle ) def _add_book_chapters(self, myDoc, indxt, indices): chapterCount = myDoc.documentStructure.chapterCount() From 02562da2a96226ee23d36b2c0793753cb4ad85c4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 10:43:22 -0700 Subject: [PATCH 06/19] ODT input: Update odfpy library to latest version, adds support for bookmarks --- src/odf/attrconverters.py | 76 ++++-- src/odf/element.py | 84 +++++-- src/odf/grammar.py | 2 +- src/odf/load.py | 4 +- src/odf/namespaces.py | 17 +- src/odf/odf2xhtml.py | 469 ++++++++++++++++++++++++++++++-------- src/odf/opendocument.py | 99 +++++--- 7 files changed, 576 insertions(+), 175 deletions(-) diff --git a/src/odf/attrconverters.py b/src/odf/attrconverters.py index 0117324bba..b75f80a2dd 100644 --- a/src/odf/attrconverters.py +++ b/src/odf/attrconverters.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2006-2008 Søren Roug, European Environment Agency +# Copyright (C) 2006-2010 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -40,6 +40,9 @@ def cnv_boolean(attribute, arg, element): # Potentially accept color values def cnv_color(attribute, arg, element): + """ A RGB color in conformance with §5.9.11 of [XSL], that is a RGB color in notation “#rrggbb”, where + rr, gg and bb are 8-bit hexadecimal digits. + """ return str(arg) def cnv_configtype(attribute, arg, element): @@ -55,9 +58,15 @@ def cnv_data_source_has_labels(attribute, arg, element): # Understand different date formats def cnv_date(attribute, arg, element): + """ A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime + value. + """ return str(arg) def cnv_dateTime(attribute, arg, element): + """ A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime + value. + """ return str(arg) def cnv_double(attribute, arg, element): @@ -67,11 +76,31 @@ def cnv_duration(attribute, arg, element): return str(arg) def cnv_family(attribute, arg, element): + """ A style family """ if str(arg) not in ("text", "paragraph", "section", "ruby", "table", "table-column", "table-row", "table-cell", "graphic", "presentation", "drawing-page", "chart"): raise ValueError, "'%s' not allowed" % str(arg) return str(arg) +def __save_prefix(attribute, arg, element): + prefix = arg.split(':',1)[0] + if prefix == arg: + return unicode(arg) + namespace = element.get_knownns(prefix) + if namespace is None: + #raise ValueError, "'%s' is an unknown prefix" % str(prefix) + return unicode(arg) + p = element.get_nsprefix(namespace) + return unicode(arg) + +def cnv_formula(attribute, arg, element): + """ A string containing a formula. Formulas do not have a predefined syntax, but the string should + begin with a namespace prefix, followed by a “:” (COLON, U+003A) separator, followed by the text + of the formula. The namespace bound to the prefix determines the syntax and semantics of the + formula. + """ + return __save_prefix(attribute, arg, element) + def cnv_ID(attribute, arg, element): return str(arg) @@ -89,6 +118,9 @@ def cnv_legend_position(attribute, arg, element): pattern_length = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px))') def cnv_length(attribute, arg, element): + """ A (positive or negative) physical length, consisting of magnitude and unit, in conformance with the + Units of Measure defined in §5.9.13 of [XSL]. + """ global pattern_length if not pattern_length.match(arg): raise ValueError, "'%s' is not a valid length" % arg @@ -120,12 +152,12 @@ def cnv_namespacedToken(attribute, arg, element): if not pattern_namespacedToken.match(arg): raise ValueError, "'%s' is not a valid namespaced token" % arg - return arg + return __save_prefix(attribute, arg, element) -# Must accept string as argument -# NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName -# Essentially an XML name minus ':' def cnv_NCName(attribute, arg, element): + """ NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName + Essentially an XML name minus ':' + """ if type(arg) in types.StringTypes: return make_NCName(arg) else: @@ -226,6 +258,7 @@ attrconverters = { ((ANIMNS,u'name'), None): cnv_string, ((ANIMNS,u'sub-item'), None): cnv_string, ((ANIMNS,u'value'), None): cnv_string, +# ((DBNS,u'type'), None): cnv_namespacedToken, ((CHARTNS,u'attached-axis'), None): cnv_string, ((CHARTNS,u'class'), (CHARTNS,u'grid')): cnv_major_minor, ((CHARTNS,u'class'), None): cnv_namespacedToken, @@ -288,7 +321,7 @@ attrconverters = { ((CHARTNS,u'values-cell-range-address'), None): cnv_string, ((CHARTNS,u'vertical'), None): cnv_boolean, ((CHARTNS,u'visible'), None): cnv_boolean, - ((CONFIGNS,u'name'), None): cnv_string, + ((CONFIGNS,u'name'), None): cnv_formula, ((CONFIGNS,u'type'), None): cnv_configtype, ((DR3DNS,u'ambient-color'), None): cnv_string, ((DR3DNS,u'back-scale'), None): cnv_string, @@ -369,11 +402,11 @@ attrconverters = { ((DRAWNS,u'decimal-places'), None): cnv_string, ((DRAWNS,u'display'), None): cnv_string, ((DRAWNS,u'display-name'), None): cnv_string, - ((DRAWNS,u'distance'), None): cnv_string, + ((DRAWNS,u'distance'), None): cnv_lengthorpercent, ((DRAWNS,u'dots1'), None): cnv_integer, - ((DRAWNS,u'dots1-length'), None): cnv_length, + ((DRAWNS,u'dots1-length'), None): cnv_lengthorpercent, ((DRAWNS,u'dots2'), None): cnv_integer, - ((DRAWNS,u'dots2-length'), None): cnv_length, + ((DRAWNS,u'dots2-length'), None): cnv_lengthorpercent, ((DRAWNS,u'end-angle'), None): cnv_double, ((DRAWNS,u'end'), None): cnv_string, ((DRAWNS,u'end-color'), None): cnv_string, @@ -383,7 +416,7 @@ attrconverters = { ((DRAWNS,u'end-line-spacing-horizontal'), None): cnv_string, ((DRAWNS,u'end-line-spacing-vertical'), None): cnv_string, ((DRAWNS,u'end-shape'), None): cnv_IDREF, - ((DRAWNS,u'engine'), None): cnv_string, + ((DRAWNS,u'engine'), None): cnv_namespacedToken, ((DRAWNS,u'enhanced-path'), None): cnv_string, ((DRAWNS,u'escape-direction'), None): cnv_string, ((DRAWNS,u'extrusion-allowed'), None): cnv_boolean, @@ -604,7 +637,7 @@ attrconverters = { ((FORMNS,u'button-type'), None): cnv_string, ((FORMNS,u'command'), None): cnv_string, ((FORMNS,u'command-type'), None): cnv_string, - ((FORMNS,u'control-implementation'), None): cnv_string, + ((FORMNS,u'control-implementation'), None): cnv_namespacedToken, ((FORMNS,u'convert-empty-to-null'), None): cnv_boolean, ((FORMNS,u'current-selected'), None): cnv_boolean, ((FORMNS,u'current-state'), None): cnv_string, @@ -800,8 +833,8 @@ attrconverters = { ((PRESENTATIONNS,u'user-transformed'), None): cnv_boolean, ((PRESENTATIONNS,u'verb'), None): cnv_nonNegativeInteger, ((PRESENTATIONNS,u'visibility'), None): cnv_string, - ((SCRIPTNS,u'event-name'), None): cnv_string, - ((SCRIPTNS,u'language'), None): cnv_string, + ((SCRIPTNS,u'event-name'), None): cnv_formula, + ((SCRIPTNS,u'language'), None): cnv_formula, ((SCRIPTNS,u'macro-name'), None): cnv_string, ((SMILNS,u'accelerate'), None): cnv_double, ((SMILNS,u'accumulate'), None): cnv_string, @@ -1087,7 +1120,7 @@ attrconverters = { ((SVGNS,u'y2'), None): cnv_lengthorpercent, ((TABLENS,u'acceptance-state'), None): cnv_string, ((TABLENS,u'add-empty-lines'), None): cnv_boolean, - ((TABLENS,u'algorithm'), None): cnv_string, + ((TABLENS,u'algorithm'), None): cnv_formula, ((TABLENS,u'align'), None): cnv_string, ((TABLENS,u'allow-empty-cell'), None): cnv_boolean, ((TABLENS,u'application-data'), None): cnv_string, @@ -1106,7 +1139,7 @@ attrconverters = { ((TABLENS,u'cell-range'), None): cnv_string, ((TABLENS,u'column'), None): cnv_integer, ((TABLENS,u'comment'), None): cnv_string, - ((TABLENS,u'condition'), None): cnv_string, + ((TABLENS,u'condition'), None): cnv_formula, ((TABLENS,u'condition-source'), None): cnv_string, ((TABLENS,u'condition-source-range-address'), None): cnv_string, ((TABLENS,u'contains-error'), None): cnv_boolean, @@ -1144,13 +1177,13 @@ attrconverters = { ((TABLENS,u'end-x'), None): cnv_length, ((TABLENS,u'end-y'), None): cnv_length, ((TABLENS,u'execute'), None): cnv_boolean, - ((TABLENS,u'expression'), None): cnv_string, + ((TABLENS,u'expression'), None): cnv_formula, ((TABLENS,u'field-name'), None): cnv_string, ((TABLENS,u'field-number'), None): cnv_nonNegativeInteger, ((TABLENS,u'field-number'), None): cnv_string, ((TABLENS,u'filter-name'), None): cnv_string, ((TABLENS,u'filter-options'), None): cnv_string, - ((TABLENS,u'formula'), None): cnv_string, + ((TABLENS,u'formula'), None): cnv_formula, ((TABLENS,u'function'), None): cnv_string, ((TABLENS,u'function'), None): cnv_string, ((TABLENS,u'grand-total'), None): cnv_string, @@ -1290,7 +1323,7 @@ attrconverters = { ((TEXTNS,u'combine-entries-with-pp'), None): cnv_boolean, ((TEXTNS,u'comma-separated'), None): cnv_boolean, ((TEXTNS,u'cond-style-name'), None): cnv_StyleNameRef, - ((TEXTNS,u'condition'), None): cnv_string, + ((TEXTNS,u'condition'), None): cnv_formula, ((TEXTNS,u'connection-name'), None): cnv_string, ((TEXTNS,u'consecutive-numbering'), None): cnv_boolean, ((TEXTNS,u'continue-numbering'), None): cnv_boolean, @@ -1321,7 +1354,7 @@ attrconverters = { ((TEXTNS,u'first-row-start-column'), None): cnv_string, ((TEXTNS,u'fixed'), None): cnv_boolean, ((TEXTNS,u'footnotes-position'), None): cnv_string, - ((TEXTNS,u'formula'), None): cnv_string, + ((TEXTNS,u'formula'), None): cnv_formula, ((TEXTNS,u'global'), None): cnv_boolean, ((TEXTNS,u'howpublished'), None): cnv_string, ((TEXTNS,u'id'), None): cnv_ID, @@ -1437,7 +1470,10 @@ attrconverters = { class AttrConverters: def convert(self, attribute, value, element): - conversion = attrconverters.get((attribute,element), None) + """ Based on the element, figures out how to check/convert the attribute value + All values are converted to string + """ + conversion = attrconverters.get((attribute, element.qname), None) if conversion is not None: return conversion(attribute, value, element) else: diff --git a/src/odf/element.py b/src/odf/element.py index f0938ba53e..aad698045e 100644 --- a/src/odf/element.py +++ b/src/odf/element.py @@ -1,6 +1,6 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# Copyright (C) 2007-2008 Søren Roug, European Environment Agency +# Copyright (C) 2007-2010 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -112,6 +112,9 @@ class Node(xml.dom.Node): return self.childNodes[-1] def insertBefore(self, newChild, refChild): + """ Inserts the node newChild before the existing child node refChild. + If refChild is null, insert newChild at the end of the list of children. + """ if newChild.nodeType not in self._child_node_types: raise IllegalChild, "%s cannot be child of %s" % (newChild.tagName, self.tagName) if newChild.parentNode is not None: @@ -135,21 +138,26 @@ class Node(xml.dom.Node): newChild.parentNode = self return newChild - def appendChild(self, node): - if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: - for c in tuple(node.childNodes): + def appendChild(self, newChild): + """ Adds the node newChild to the end of the list of children of this node. + If the newChild is already in the tree, it is first removed. + """ + if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: + for c in tuple(newChild.childNodes): self.appendChild(c) ### The DOM does not clearly specify what to return in this case - return node - if node.nodeType not in self._child_node_types: - raise IllegalChild, "<%s> is not allowed in %s" % ( node.tagName, self.tagName) - if node.parentNode is not None: - node.parentNode.removeChild(node) - _append_child(self, node) - node.nextSibling = None - return node + return newChild + if newChild.nodeType not in self._child_node_types: + raise IllegalChild, "<%s> is not allowed in %s" % ( newChild.tagName, self.tagName) + if newChild.parentNode is not None: + newChild.parentNode.removeChild(newChild) + _append_child(self, newChild) + newChild.nextSibling = None + return newChild def removeChild(self, oldChild): + """ Removes the child node indicated by oldChild from the list of children, and returns it. + """ #FIXME: update ownerDocument.element_dict or find other solution try: self.childNodes.remove(oldChild) @@ -191,8 +199,8 @@ def _append_child(self, node): node.__dict__["parentNode"] = self class Childless: - """Mixin that makes childless-ness easy to implement and avoids - the complexity of the Node methods that deal with children. + """ Mixin that makes childless-ness easy to implement and avoids + the complexity of the Node methods that deal with children. """ attributes = None @@ -207,6 +215,7 @@ class Childless: return None def appendChild(self, node): + """ Raises an error """ raise xml.dom.HierarchyRequestErr( self.tagName + " nodes cannot have children") @@ -214,14 +223,17 @@ class Childless: return False def insertBefore(self, newChild, refChild): + """ Raises an error """ raise xml.dom.HierarchyRequestErr( self.tagName + " nodes do not have children") def removeChild(self, oldChild): + """ Raises an error """ raise xml.dom.NotFoundErr( self.tagName + " nodes do not have children") def replaceChild(self, newChild, oldChild): + """ Raises an error """ raise xml.dom.HierarchyRequestErr( self.tagName + " nodes do not have children") @@ -247,8 +259,12 @@ class CDATASection(Childless, Text): nodeType = Node.CDATA_SECTION_NODE def toXml(self,level,f): + """ Generate XML output of the node. If the text contains "]]>", then + escape it by going out of CDATA mode (]]>), then write the string + and then go into CDATA mode again. (' % self.data) + f.write('' % self.data.replace(']]>',']]>]]>" % (r[1].lower().replace('-',''), self.tagName) + def get_knownns(self, prefix): + """ Odfpy maintains a list of known namespaces. In some cases a prefix is used, and + we need to know which namespace it resolves to. + """ + global nsdict + for ns,p in nsdict.items(): + if p == prefix: return ns + return None + def get_nsprefix(self, namespace): + """ Odfpy maintains a list of known namespaces. In some cases we have a namespace URL, + and needs to look up or assign the prefix for it. + """ if namespace is None: namespace = "" prefix = _nsassign(namespace) if not self.namespaces.has_key(namespace): @@ -339,6 +367,9 @@ class Element(Node): self.ownerDocument.rebuild_caches(element) def addText(self, text, check_grammar=True): + """ Adds text to an element + Setting check_grammar=False turns off grammar checking + """ if check_grammar and self.qname not in grammar.allows_text: raise IllegalText, "The <%s> element does not allow text" % self.tagName else: @@ -346,6 +377,9 @@ class Element(Node): self.appendChild(Text(text)) def addCDATA(self, cdata, check_grammar=True): + """ Adds CDATA to an element + Setting check_grammar=False turns off grammar checking + """ if check_grammar and self.qname not in grammar.allows_text: raise IllegalText, "The <%s> element does not allow text" % self.tagName else: @@ -403,17 +437,18 @@ class Element(Node): # if allowed_attrs and (namespace, localpart) not in allowed_attrs: # raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName) c = AttrConverters() - self.attributes[prefix + ":" + localpart] = c.convert((namespace, localpart), value, self.qname) + self.attributes[(namespace, localpart)] = c.convert((namespace, localpart), value, self) def getAttrNS(self, namespace, localpart): prefix = self.get_nsprefix(namespace) - return self.attributes.get(prefix + ":" + localpart) + return self.attributes.get((namespace, localpart)) def removeAttrNS(self, namespace, localpart): - prefix = self.get_nsprefix(namespace) - del self.attributes[prefix + ":" + localpart] + del self.attributes[(namespace, localpart)] def getAttribute(self, attr): + """ Get an attribute value. The method knows which namespace the attribute is in + """ allowed_attrs = self.allowed_attributes() if allowed_attrs is None: if type(attr) == type(()): @@ -432,8 +467,9 @@ class Element(Node): if level == 0: for namespace, prefix in self.namespaces.items(): f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"') - for attkey in self.attributes.keys(): - f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8'))) + for qname in self.attributes.keys(): + prefix = self.get_nsprefix(qname[0]) + f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8'))) f.write('>') def write_close_tag(self, level, f): @@ -445,8 +481,9 @@ class Element(Node): if level == 0: for namespace, prefix in self.namespaces.items(): f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"') - for attkey in self.attributes.keys(): - f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8'))) + for qname in self.attributes.keys(): + prefix = self.get_nsprefix(qname[0]) + f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8'))) if self.childNodes: f.write('>') for element in self.childNodes: @@ -464,6 +501,7 @@ class Element(Node): return accumulator def getElementsByType(self, element): + """ Gets elements based on the type, which is function from text.py, draw.py etc. """ obj = element(check_grammar=False) return self._getElementsByObj(obj,[]) diff --git a/src/odf/grammar.py b/src/odf/grammar.py index 09ec02cbaa..d5d8d5970e 100644 --- a/src/odf/grammar.py +++ b/src/odf/grammar.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2006-2009 Søren Roug, European Environment Agency +# Copyright (C) 2006-2010 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public diff --git a/src/odf/load.py b/src/odf/load.py index 1f0e45ea23..e48fcaa412 100644 --- a/src/odf/load.py +++ b/src/odf/load.py @@ -63,8 +63,8 @@ class LoadParser(handler.ContentHandler): self.level = self.level + 1 # Add any accumulated text content - content = ''.join(self.data).strip() - if len(content) > 0: + content = ''.join(self.data) + if len(content.strip()) > 0: self.parent.addText(content, check_grammar=False) self.data = [] # Create the element diff --git a/src/odf/namespaces.py b/src/odf/namespaces.py index 3109210bb5..96ea958e79 100644 --- a/src/odf/namespaces.py +++ b/src/odf/namespaces.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2006-2009 Søren Roug, European Environment Agency +# Copyright (C) 2006-2010 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -17,7 +17,7 @@ # # Contributor(s): # -TOOLSVERSION = u"ODFPY/0.9.2dev" +TOOLSVERSION = u"ODFPY/0.9.4dev" ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0" DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0" @@ -28,19 +28,23 @@ DCNS = u"http://purl.org/dc/elements/1.1/" DOMNS = u"http://www.w3.org/2001/xml-events" DR3DNS = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" DRAWNS = u"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" +FIELDNS = u"urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" FONS = u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" FORMNS = u"urn:oasis:names:tc:opendocument:xmlns:form:1.0" +GRDDLNS = u"http://www.w3.org/2003/g/data-view#" KOFFICENS = u"http://www.koffice.org/2005/" MANIFESTNS = u"urn:oasis:names:tc:opendocument:xmlns:manifest:1.0" MATHNS = u"http://www.w3.org/1998/Math/MathML" METANS = u"urn:oasis:names:tc:opendocument:xmlns:meta:1.0" NUMBERNS = u"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" OFFICENS = u"urn:oasis:names:tc:opendocument:xmlns:office:1.0" +OFNS = u"urn:oasis:names:tc:opendocument:xmlns:of:1.2" OOONS = u"http://openoffice.org/2004/office" OOOWNS = u"http://openoffice.org/2004/writer" OOOCNS = u"http://openoffice.org/2004/calc" PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0" RDFANS = u"http://docs.oasis-open.org/opendocument/meta/rdfa#" +RPTNS = u"http://openoffice.org/2005/report" SCRIPTNS = u"urn:oasis:names:tc:opendocument:xmlns:script:1.0" SMILNS = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0" STYLENS = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0" @@ -50,7 +54,8 @@ TEXTNS = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0" XFORMSNS = u"http://www.w3.org/2002/xforms" XLINKNS = u"http://www.w3.org/1999/xlink" XMLNS = u"http://www.w3.org/XML/1998/namespace" - +XSDNS = u"http://www.w3.org/2001/XMLSchema" +XSINS = u"http://www.w3.org/2001/XMLSchema-instance" nsdict = { ANIMNS: u'anim', @@ -61,19 +66,23 @@ nsdict = { DOMNS: u'dom', DR3DNS: u'dr3d', DRAWNS: u'draw', + FIELDNS: u'field', FONS: u'fo', FORMNS: u'form', + GRDDLNS: u'grddl', KOFFICENS: u'koffice', MANIFESTNS: u'manifest', MATHNS: u'math', METANS: u'meta', NUMBERNS: u'number', OFFICENS: u'office', + OFNS: u'of', OOONS: u'ooo', OOOWNS: u'ooow', OOOCNS: u'oooc', PRESENTATIONNS: u'presentation', RDFANS: u'rdfa', + RPTNS: u'rpt', SCRIPTNS: u'script', SMILNS: u'smil', STYLENS: u'style', @@ -83,4 +92,6 @@ nsdict = { XFORMSNS: u'xforms', XLINKNS: u'xlink', XMLNS: u'xml', + XSDNS: u'xsd', + XSINS: u'xsi', } diff --git a/src/odf/odf2xhtml.py b/src/odf/odf2xhtml.py index 53a3e87dc2..390d407d16 100644 --- a/src/odf/odf2xhtml.py +++ b/src/odf/odf2xhtml.py @@ -1,6 +1,6 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# Copyright (C) 2006-2007 Søren Roug, European Environment Agency +# Copyright (C) 2006-2010 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -20,15 +20,18 @@ # #import pdb #pdb.set_trace() -import zipfile -from xml.sax import handler, expatreader -from xml.sax.xmlreader import InputSource +from xml.sax import handler from xml.sax.saxutils import escape, quoteattr -from cStringIO import StringIO +from xml.dom import Node -from namespaces import DCNS, DRAWNS, FONS, \ - METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, \ - STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS +from opendocument import load + +from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \ + FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \ + SMILNS, STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS + +if False: # Added by Kovid + DR3DNS, MATHNS, CHARTNS, CONFIGNS, ANIMNS, FORMNS, SMILNS, SCRIPTNS # Handling of styles # @@ -72,8 +75,8 @@ class StyleToCSS: (FONS,u"border-left"): self.c_fo, (FONS,u"border-right"): self.c_fo, (FONS,u"border-top"): self.c_fo, - (FONS,u"break-after"): self.c_break, - (FONS,u"break-before"): self.c_break, + (FONS,u"break-after"): self.c_break, # Added by Kovid + (FONS,u"break-before"): self.c_break,# Added by Kovid (FONS,u"color"): self.c_fo, (FONS,u"font-family"): self.c_fo, (FONS,u"font-size"): self.c_fo, @@ -136,7 +139,7 @@ class StyleToCSS: selector = rule[1] sdict[selector] = val - def c_break(self, ruleset, sdict, rule, val): + def c_break(self, ruleset, sdict, rule, val): # Added by Kovid property = 'page-' + rule[1] values = {'auto': 'auto', 'column': 'always', 'page': 'always', 'even-page': 'left', 'odd-page': 'right', @@ -346,13 +349,16 @@ class ODF2XHTML(handler.ContentHandler): self.elements = { (DCNS, 'title'): (self.s_processcont, self.e_dc_title), (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage), - (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag), + (DCNS, 'creator'): (self.s_processcont, self.e_dc_creator), (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag), (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag), + (DRAWNS, 'custom-shape'): (self.s_custom_shape, self.e_custom_shape), (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame), (DRAWNS, 'image'): (self.s_draw_image, None), (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None), (DRAWNS, "layer-set"):(self.s_ignorexml, None), + (DRAWNS, 'object'): (self.s_draw_object, None), + (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None), (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page), (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox), (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag), @@ -364,7 +370,9 @@ class ODF2XHTML(handler.ContentHandler): (NUMBERNS, "date-style"):(self.s_ignorexml, None), (NUMBERNS, "number-style"):(self.s_ignorexml, None), (NUMBERNS, "text-style"):(self.s_ignorexml, None), + (OFFICENS, "annotation"):(self.s_ignorexml, None), (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None), + (OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content), (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content), (OFFICENS, "forms"):(self.s_ignorexml, None), (OFFICENS, "master-styles"):(self.s_office_master_styles, None), @@ -374,6 +382,7 @@ class ODF2XHTML(handler.ContentHandler): (OFFICENS, "styles"):(self.s_office_styles, None), (OFFICENS, "text"):(self.s_office_text, self.e_office_text), (OFFICENS, "scripts"):(self.s_ignorexml, None), + (OFFICENS, "settings"):(self.s_ignorexml, None), (PRESENTATIONNS, "notes"):(self.s_ignorexml, None), # (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout), (STYLENS, "default-page-layout"):(self.s_ignorexml, None), @@ -389,8 +398,8 @@ class ODF2XHTML(handler.ContentHandler): # (STYLENS, "header-style"):(self.s_style_header_style, None), (STYLENS, "master-page"):(self.s_style_master_page, None), (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None), -# (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout), - (STYLENS, "page-layout"):(self.s_ignorexml, None), + (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout), +# (STYLENS, "page-layout"):(self.s_ignorexml, None), (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None), (STYLENS, "style"):(self.s_style_style, self.e_style_style), (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None), @@ -407,6 +416,10 @@ class ODF2XHTML(handler.ContentHandler): (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source), (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None), (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, 'bookmark'): (self.s_text_bookmark, None), + (TEXTNS, 'bookmark-start'): (self.s_text_bookmark, None), + (TEXTNS, 'bookmark-ref'): (self.s_text_bookmark_ref, self.e_text_a), + (TEXTNS, 'bookmark-ref-start'): (self.s_text_bookmark_ref, None), (TEXTNS, 'h'): (self.s_text_h, self.e_text_h), (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source), (TEXTNS, 'line-break'):(self.s_text_line_break, None), @@ -430,10 +443,66 @@ class ODF2XHTML(handler.ContentHandler): (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source), } if embedable: - self.elements[(OFFICENS, u"text")] = (None,None) - self.elements[(OFFICENS, u"spreadsheet")] = (None,None) - self.elements[(OFFICENS, u"presentation")] = (None,None) - self.elements[(OFFICENS, u"document-content")] = (None,None) + self.make_embedable() + self._resetobject() + + def set_plain(self): + """ Tell the parser to not generate CSS """ + self.generate_css = False + + def set_embedable(self): + """ Tells the converter to only output the parts inside the """ + self.elements[(OFFICENS, u"text")] = (None,None) + self.elements[(OFFICENS, u"spreadsheet")] = (None,None) + self.elements[(OFFICENS, u"presentation")] = (None,None) + self.elements[(OFFICENS, u"document-content")] = (None,None) + + + def add_style_file(self, stylefilename, media=None): + """ Add a link to an external style file. + Also turns of the embedding of styles in the HTML + """ + self.use_internal_css = False + self.stylefilename = stylefilename + if media: + self.metatags.append('\n' % (stylefilename,media)) + else: + self.metatags.append('\n' % (stylefilename)) + + def _resetfootnotes(self): + # Footnotes and endnotes + self.notedict = {} + self.currentnote = 0 + self.notebody = '' + + def _resetobject(self): + self.lines = [] + self._wfunc = self._wlines + self.xmlfile = '' + self.title = '' + self.language = '' + self.creator = '' + self.data = [] + self.tagstack = TagStack() + self.htmlstack = [] + self.pstack = [] + self.processelem = True + self.processcont = True + self.listtypes = {} + self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10 + self.use_internal_css = True + self.cs = StyleToCSS() + self.anchors = {} + + # Style declarations + self.stylestack = [] + self.styledict = {} + self.currentstyle = None + + self._resetfootnotes() + + # Tags from meta.xml + self.metatags = [] def writeout(self, s): @@ -447,6 +516,7 @@ class ODF2XHTML(handler.ContentHandler): def opentag(self, tag, attrs={}, block=False): """ Create an open HTML tag """ + self.htmlstack.append((tag,attrs,block)) a = [] for key,val in attrs.items(): a.append('''%s=%s''' % (key, quoteattr(val))) @@ -458,6 +528,8 @@ class ODF2XHTML(handler.ContentHandler): self.writeout("\n") def closetag(self, tag, block=True): + """ Close an open HTML tag """ + self.htmlstack.pop() self.writeout("" % tag) if block == True: self.writeout("\n") @@ -468,17 +540,13 @@ class ODF2XHTML(handler.ContentHandler): a.append('''%s=%s''' % (key, quoteattr(val))) self.writeout("<%s %s/>\n" % (tag, " ".join(a))) +#-------------------------------------------------- +# Interface to parser #-------------------------------------------------- def characters(self, data): if self.processelem and self.processcont: self.data.append(data) - def handle_starttag(self, tag, method, attrs): - method(tag,attrs) - - def handle_endtag(self, tag, attrs, method): - method(tag, attrs) - def startElementNS(self, tag, qname, attrs): self.pstack.append( (self.processelem, self.processcont) ) if self.processelem: @@ -499,6 +567,13 @@ class ODF2XHTML(handler.ContentHandler): self.unknown_endtag(tag, attrs) self.processelem, self.processcont = self.pstack.pop() +#-------------------------------------------------- + def handle_starttag(self, tag, method, attrs): + method(tag,attrs) + + def handle_endtag(self, tag, attrs, method): + method(tag, attrs) + def unknown_starttag(self, tag, attrs): pass @@ -512,18 +587,21 @@ class ODF2XHTML(handler.ContentHandler): self.processelem = False def s_ignorecont(self, tag, attrs): + """ Stop processing the text nodes """ self.processcont = False def s_processcont(self, tag, attrs): + """ Start processing the text nodes """ self.processcont = True def classname(self, attrs): """ Generate a class name from a style name """ - c = attrs[(TEXTNS,'style-name')] + c = attrs.get((TEXTNS,'style-name'),'') c = c.replace(".","_") return c def get_anchor(self, name): + """ Create a unique anchor id for a href name """ if not self.anchors.has_key(name): # Changed by Kovid self.anchors[name] = "anchor%d" % (len(self.anchors) + 1) @@ -543,8 +621,8 @@ class ODF2XHTML(handler.ContentHandler): def e_dc_title(self, tag, attrs): """ Get the title from the meta data and create a HTML """ - self.metatags.append('<title>%s\n' % escape(''.join(self.data))) self.title = ''.join(self.data) + #self.metatags.append('%s\n' % escape(self.title)) self.data = [] def e_dc_metatag(self, tag, attrs): @@ -556,13 +634,57 @@ class ODF2XHTML(handler.ContentHandler): def e_dc_contentlanguage(self, tag, attrs): """ Set the content language. Identifies the targeted audience """ - self.metatags.append('\n' % ''.join(self.data)) + self.language = ''.join(self.data) + self.metatags.append('\n' % escape(self.language)) self.data = [] + def e_dc_creator(self, tag, attrs): + """ Set the content creator. Identifies the targeted audience + """ + self.creator = ''.join(self.data) + self.metatags.append('\n' % escape(self.creator)) + self.data = [] + + def s_custom_shape(self, tag, attrs): + """ A is made into a
in HTML which is then styled + """ + anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound') + htmltag = 'div' + name = "G-" + attrs.get( (DRAWNS,'style-name'), "") + if name == 'G-': + name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "") + name = name.replace(".","_") + if anchor_type == "paragraph": + style = 'position:absolute;' + elif anchor_type == 'char': + style = "position:absolute;" + elif anchor_type == 'as-char': + htmltag = 'div' + style = '' + else: + style = "position: absolute;" + if attrs.has_key( (SVGNS,"width") ): + style = style + "width:" + attrs[(SVGNS,"width")] + ";" + if attrs.has_key( (SVGNS,"height") ): + style = style + "height:" + attrs[(SVGNS,"height")] + ";" + if attrs.has_key( (SVGNS,"x") ): + style = style + "left:" + attrs[(SVGNS,"x")] + ";" + if attrs.has_key( (SVGNS,"y") ): + style = style + "top:" + attrs[(SVGNS,"y")] + ";" + if self.generate_css: + self.opentag(htmltag, {'class': name, 'style': style}) + else: + self.opentag(htmltag) + + def e_custom_shape(self, tag, attrs): + """ End the + """ + self.closetag('div') + def s_draw_frame(self, tag, attrs): """ A is made into a
in HTML which is then styled """ - anchor_type = attrs.get((TEXTNS,'anchor-type'),'char') + anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound') htmltag = 'div' name = "G-" + attrs.get( (DRAWNS,'style-name'), "") if name == 'G-': @@ -576,7 +698,7 @@ class ODF2XHTML(handler.ContentHandler): htmltag = 'div' style = '' else: - style = "position: absolute;" + style = "position:absolute;" if attrs.has_key( (SVGNS,"width") ): style = style + "width:" + attrs[(SVGNS,"width")] + ";" if attrs.has_key( (SVGNS,"height") ): @@ -620,6 +742,30 @@ class ODF2XHTML(handler.ContentHandler): htmlattrs['style'] = "display: block;" self.emptytag('img', htmlattrs) + def s_draw_object(self, tag, attrs): + """ A is embedded object in the document (e.g. spreadsheet in presentation). + """ + return # Added by Kovid + objhref = attrs[(XLINKNS,"href")] + # Remove leading "./": from "./Object 1" to "Object 1" +# objhref = objhref [2:] + + # Not using os.path.join since it fails to find the file on Windows. +# objcontentpath = '/'.join([objhref, 'content.xml']) + + for c in self.document.childnodes: + if c.folder == objhref: + self._walknode(c.topnode) + + def s_draw_object_ole(self, tag, attrs): + """ A is embedded OLE object in the document (e.g. MS Graph). + """ + class_id = attrs[(DRAWNS,"class-id")] + if class_id and class_id.lower() == "00020803-0000-0000-c000-000000000046": ## Microsoft Graph 97 Chart + tagattrs = { 'name':'object_ole_graph', 'class':'ole-graph' } + self.opentag('a', tagattrs) + self.closetag('a', tagattrs) + def s_draw_page(self, tag, attrs): """ A is a slide in a presentation. We use a
element in HTML. Therefore if you convert a ODP file, you get a series of
s. @@ -655,14 +801,9 @@ class ODF2XHTML(handler.ContentHandler): def html_body(self, tag, attrs): self.writedata() - if self.generate_css: + if self.generate_css and self.use_internal_css: self.opentag('style', {'type':"text/css"}, True) self.writeout('/**/\n') self.closetag('style') @@ -670,6 +811,16 @@ class ODF2XHTML(handler.ContentHandler): self.closetag('head') self.opentag('body', block=True) + # background-color: white removed by Kovid for #9118 + # Specifying an explicit bg color prevents ebook readers + # from successfully inverting colors + default_styles = """ +img { width: 100%; height: 100%; } +* { padding: 0; margin: 0; } +body { margin: 0 1em; } +ol, ul { padding-left: 2em; } +""" + def generate_stylesheet(self): for name in self.stylestack: styles = self.styledict.get(name) @@ -689,6 +840,7 @@ class ODF2XHTML(handler.ContentHandler): styles = parentstyle self.styledict[name] = styles # Write the styles to HTML + self.writeout(self.default_styles) for name in self.stylestack: styles = self.styledict.get(name) css2 = self.cs.convert_styles(styles) @@ -730,6 +882,7 @@ class ODF2XHTML(handler.ContentHandler): self.emptytag('meta', { 'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"}) for metaline in self.metatags: self.writeout(metaline) + self.writeout('%s\n' % escape(self.title)) def e_office_document_content(self, tag, attrs): """ Last tag """ @@ -774,7 +927,7 @@ class ODF2XHTML(handler.ContentHandler): """ Copy all attributes to a struct. We will later convert them to CSS2 """ - if self.currentstyle is None: + if self.currentstyle is None: # Added by Kovid return for key,attr in attrs.items(): self.styledict[self.currentstyle][key] = attr @@ -800,7 +953,7 @@ class ODF2XHTML(handler.ContentHandler): def s_style_font_face(self, tag, attrs): """ It is possible that the HTML browser doesn't know how to show a particular font. Luckily ODF provides generic fallbacks - Unluckily they are not the same as CSS2. + Unfortunately they are not the same as CSS2. CSS2: serif, sans-serif, cursive, fantasy, monospace ODF: roman, swiss, modern, decorative, script, system """ @@ -851,7 +1004,7 @@ class ODF2XHTML(handler.ContentHandler): """ name = attrs[(STYLENS,'name')] name = name.replace(".","_") - self.currentstyle = "@page " + name + self.currentstyle = ".PL-" + name self.stylestack.append(self.currentstyle) self.styledict[self.currentstyle] = {} @@ -882,7 +1035,7 @@ class ODF2XHTML(handler.ContentHandler): self.s_ignorexml(tag, attrs) # Short prefixes for class selectors - familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR', + _familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR', 'text':'S', 'section':'D', 'table':'T', 'table-cell':'TD', 'table-column':'TC', 'table-row':'TR', 'graphic':'G' } @@ -898,7 +1051,7 @@ class ODF2XHTML(handler.ContentHandler): name = name.replace(".","_") family = attrs[(STYLENS,'family')] htmlfamily = self.familymap.get(family,'unknown') - sfamily = self.familyshort.get(family,'X') + sfamily = self._familyshort.get(family,'X') name = "%s%s-%s" % (self.autoprefix, sfamily, name) parent = attrs.get( (STYLENS,'parent-style-name') ) self.currentstyle = special_styles.get(name,"."+name) @@ -943,6 +1096,7 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def s_table_table_cell(self, tag, attrs): + """ Start a table cell """ #FIXME: number-columns-repeated § 8.1.3 #repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1)) htmlattrs = {} @@ -960,11 +1114,13 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def e_table_table_cell(self, tag, attrs): + """ End a table cell """ self.writedata() self.closetag('td') self.purgedata() def s_table_table_column(self, tag, attrs): + """ Start a table column """ c = attrs.get( (TABLENS,'style-name'), None) repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1)) htmlattrs = {} @@ -975,6 +1131,7 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def s_table_table_row(self, tag, attrs): + """ Start a table row """ #FIXME: table:number-rows-repeated c = attrs.get( (TABLENS,'style-name'), None) htmlattrs = {} @@ -984,6 +1141,7 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def e_table_table_row(self, tag, attrs): + """ End a table row """ self.writedata() self.closetag('tr') self.purgedata() @@ -998,10 +1156,28 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def e_text_a(self, tag, attrs): + """ End an anchor or bookmark reference """ self.writedata() self.closetag('a', False) self.purgedata() + def s_text_bookmark(self, tag, attrs): + """ Bookmark definition """ + name = attrs[(TEXTNS,'name')] + html_id = self.get_anchor(name) + self.writedata() + self.opentag('span', {'id':html_id}) + self.closetag('span', False) + self.purgedata() + + def s_text_bookmark_ref(self, tag, attrs): + """ Bookmark reference """ + name = attrs[(TEXTNS,'ref-name')] + html_id = "#" + self.get_anchor(name) + self.writedata() + self.opentag('a', {'href':html_id}) + self.purgedata() + def s_text_h(self, tag, attrs): """ Headings start """ level = int(attrs[(TEXTNS,'outline-level')]) @@ -1019,13 +1195,19 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def e_text_h(self, tag, attrs): - """ Headings end """ + """ Headings end + Side-effect: If there is no title in the metadata, then it is taken + from the first heading of any level. + """ self.writedata() level = int(attrs[(TEXTNS,'outline-level')]) if level > 6: level = 6 # Heading levels go only to 6 in XHTML if level < 1: level = 1 lev = self.headinglevels[1:level+1] outline = '.'.join(map(str,lev) ) + heading = ''.join(self.data) + if self.title == '': self.title = heading + # Changed by Kovid tail = ''.join(self.data) anchor = self.get_anchor("%s.%s" % ( outline, tail)) anchor2 = self.get_anchor(tail) # Added by kovid to fix #7506 @@ -1037,12 +1219,14 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def s_text_line_break(self, tag, attrs): + """ Force a line break (
) """ self.writedata() self.emptytag('br') self.purgedata() def s_text_list(self, tag, attrs): - """ To know which level we're at, we have to count the number + """ Start a list (
    or
      ) + To know which level we're at, we have to count the number of elements on the tagstack. """ name = attrs.get( (TEXTNS,'style-name') ) @@ -1056,12 +1240,13 @@ class ODF2XHTML(handler.ContentHandler): name = self.tagstack.rfindattr( (TEXTNS,'style-name') ) list_class = "%s_%d" % (name, level) if self.generate_css: - self.opentag('%s' % self.listtypes.get(list_class,'UL'), {'class': list_class }) + self.opentag('%s' % self.listtypes.get(list_class,'ul'), {'class': list_class }) else: - self.opentag('%s' % self.listtypes.get(list_class,'UL')) + self.opentag('%s' % self.listtypes.get(list_class,'ul')) self.purgedata() def e_text_list(self, tag, attrs): + """ End a list """ self.writedata() name = attrs.get( (TEXTNS,'style-name') ) level = self.tagstack.count_tags(tag) + 1 @@ -1073,14 +1258,16 @@ class ODF2XHTML(handler.ContentHandler): # textbox itself may be nested within another list. name = self.tagstack.rfindattr( (TEXTNS,'style-name') ) list_class = "%s_%d" % (name, level) - self.closetag(self.listtypes.get(list_class,'UL')) + self.closetag(self.listtypes.get(list_class,'ul')) self.purgedata() def s_text_list_item(self, tag, attrs): + """ Start list item """ self.opentag('li') self.purgedata() def e_text_list_item(self, tag, attrs): + """ End list item """ self.writedata() self.closetag('li') self.purgedata() @@ -1192,7 +1379,7 @@ class ODF2XHTML(handler.ContentHandler): if specialtag is None: specialtag = 'p' self.writedata() - if not self.data: + if not self.data: # Added by Kovid # Give substance to empty paragraphs, as rendered by OOo self.writeout(' ') self.closetag(specialtag) @@ -1255,55 +1442,30 @@ class ODF2XHTML(handler.ContentHandler): #----------------------------------------------------------------------------- def load(self, odffile): - self._odffile = odffile + """ Loads a document into the parser and parses it. + The argument can either be a filename or a document in memory. + """ + self.lines = [] + self._wfunc = self._wlines + if isinstance(odffile, basestring) \ + or hasattr(odffile, 'read'): # Added by Kovid + self.document = load(odffile) + else: + self.document = odffile + self._walknode(self.document.topnode) - def parseodf(self): - self.xmlfile = '' - self.title = '' - self.data = [] - self.tagstack = TagStack() - self.pstack = [] - self.processelem = True - self.processcont = True - self.listtypes = {} - self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10 - self.cs = StyleToCSS() - self.anchors = {} + def _walknode(self, node): + if node.nodeType == Node.ELEMENT_NODE: + self.startElementNS(node.qname, node.tagName, node.attributes) + for c in node.childNodes: + self._walknode(c) + self.endElementNS(node.qname, node.tagName) + if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE: + self.characters(unicode(node)) - # Style declarations - self.stylestack = [] - self.styledict = {} - self.currentstyle = None - - # Footnotes and endnotes - self.notedict = {} - self.currentnote = 0 - self.notebody = '' - - # Tags from meta.xml - self.metatags = [] - - # Extract the interesting files - z = zipfile.ZipFile(self._odffile) - - # For some reason Trac has trouble when xml.sax.make_parser() is used. - # Could it be because PyXML is installed, and therefore a different parser - # might be chosen? By calling expatreader directly we avoid this issue - parser = expatreader.create_parser() - parser.setFeature(handler.feature_namespaces, 1) - parser.setContentHandler(self) - parser.setErrorHandler(handler.ErrorHandler()) - inpsrc = InputSource() - - for xmlfile in ('meta.xml', 'styles.xml', 'content.xml'): - self.xmlfile = xmlfile - content = z.read(xmlfile) - inpsrc.setByteStream(StringIO(content)) - parser.parse(inpsrc) - z.close() def odf2xhtml(self, odffile): - """ Load a file and return XHTML + """ Load a file and return the XHTML """ self.load(odffile) return self.xhtml() @@ -1312,9 +1474,8 @@ class ODF2XHTML(handler.ContentHandler): if s != '': self.lines.append(s) def xhtml(self): - self.lines = [] - self._wfunc = self._wlines - self.parseodf() + """ Returns the xhtml + """ return ''.join(self.lines) def _writecss(self, s): @@ -1324,11 +1485,127 @@ class ODF2XHTML(handler.ContentHandler): pass def css(self): - self._wfunc = self._writenothing - self.parseodf() + """ Returns the CSS content """ self._csslines = [] self._wfunc = self._writecss self.generate_stylesheet() res = ''.join(self._csslines) + self._wfunc = self._wlines del self._csslines return res + + def save(self, outputfile, addsuffix=False): + """ Save the HTML under the filename. + If the filename is '-' then save to stdout + We have the last style filename in self.stylefilename + """ + if outputfile == '-': + import sys # Added by Kovid + outputfp = sys.stdout + else: + if addsuffix: + outputfile = outputfile + ".html" + outputfp = file(outputfile, "w") + outputfp.write(self.xhtml().encode('us-ascii','xmlcharrefreplace')) + outputfp.close() + + +class ODF2XHTMLembedded(ODF2XHTML): + """ The ODF2XHTML parses an ODF file and produces XHTML""" + + def __init__(self, lines, generate_css=True, embedable=False): + self._resetobject() + self.lines = lines + + # Tags + self.generate_css = generate_css + self.elements = { +# (DCNS, 'title'): (self.s_processcont, self.e_dc_title), +# (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage), +# (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag), +# (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag), +# (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag), + (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame), + (DRAWNS, 'image'): (self.s_draw_image, None), + (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None), + (DRAWNS, "layer-set"):(self.s_ignorexml, None), + (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page), + (DRAWNS, 'object'): (self.s_draw_object, None), + (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None), + (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox), +# (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag), +# (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag), +# (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag), +# (METANS, 'keyword'): (self.s_processcont, self.e_dc_metatag), + (NUMBERNS, "boolean-style"):(self.s_ignorexml, None), + (NUMBERNS, "currency-style"):(self.s_ignorexml, None), + (NUMBERNS, "date-style"):(self.s_ignorexml, None), + (NUMBERNS, "number-style"):(self.s_ignorexml, None), + (NUMBERNS, "text-style"):(self.s_ignorexml, None), +# (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None), +# (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content), + (OFFICENS, "forms"):(self.s_ignorexml, None), +# (OFFICENS, "master-styles"):(self.s_office_master_styles, None), + (OFFICENS, "meta"):(self.s_ignorecont, None), +# (OFFICENS, "presentation"):(self.s_office_presentation, self.e_office_presentation), +# (OFFICENS, "spreadsheet"):(self.s_office_spreadsheet, self.e_office_spreadsheet), +# (OFFICENS, "styles"):(self.s_office_styles, None), +# (OFFICENS, "text"):(self.s_office_text, self.e_office_text), + (OFFICENS, "scripts"):(self.s_ignorexml, None), + (PRESENTATIONNS, "notes"):(self.s_ignorexml, None), +## (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout), +# (STYLENS, "default-page-layout"):(self.s_ignorexml, None), +# (STYLENS, "default-style"):(self.s_style_default_style, self.e_style_default_style), +# (STYLENS, "drawing-page-properties"):(self.s_style_handle_properties, None), +# (STYLENS, "font-face"):(self.s_style_font_face, None), +## (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer), +## (STYLENS, "footer-style"):(self.s_style_footer_style, None), +# (STYLENS, "graphic-properties"):(self.s_style_handle_properties, None), +# (STYLENS, "handout-master"):(self.s_ignorexml, None), +## (STYLENS, "header"):(self.s_style_header, self.e_style_header), +## (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None), +## (STYLENS, "header-style"):(self.s_style_header_style, None), +# (STYLENS, "master-page"):(self.s_style_master_page, None), +# (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None), +## (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout), +# (STYLENS, "page-layout"):(self.s_ignorexml, None), +# (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None), +# (STYLENS, "style"):(self.s_style_style, self.e_style_style), +# (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None), +# (STYLENS, "table-column-properties"):(self.s_style_handle_properties, None), +# (STYLENS, "table-properties"):(self.s_style_handle_properties, None), +# (STYLENS, "text-properties"):(self.s_style_handle_properties, None), + (SVGNS, 'desc'): (self.s_ignorexml, None), + (TABLENS, 'covered-table-cell'): (self.s_ignorexml, None), + (TABLENS, 'table-cell'): (self.s_table_table_cell, self.e_table_table_cell), + (TABLENS, 'table-column'): (self.s_table_table_column, None), + (TABLENS, 'table-row'): (self.s_table_table_row, self.e_table_table_row), + (TABLENS, 'table'): (self.s_table_table, self.e_table_table), + (TEXTNS, 'a'): (self.s_text_a, self.e_text_a), + (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None), + (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, 'h'): (self.s_text_h, self.e_text_h), + (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, 'line-break'):(self.s_text_line_break, None), + (TEXTNS, "linenumbering-configuration"):(self.s_ignorexml, None), + (TEXTNS, "list"):(self.s_text_list, self.e_text_list), + (TEXTNS, "list-item"):(self.s_text_list_item, self.e_text_list_item), + (TEXTNS, "list-level-style-bullet"):(self.s_text_list_level_style_bullet, self.e_text_list_level_style_bullet), + (TEXTNS, "list-level-style-number"):(self.s_text_list_level_style_number, self.e_text_list_level_style_number), + (TEXTNS, "list-style"):(None, None), + (TEXTNS, "note"):(self.s_text_note, None), + (TEXTNS, "note-body"):(self.s_text_note_body, self.e_text_note_body), + (TEXTNS, "note-citation"):(None, self.e_text_note_citation), + (TEXTNS, "notes-configuration"):(self.s_ignorexml, None), + (TEXTNS, "object-index-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, 'p'): (self.s_text_p, self.e_text_p), + (TEXTNS, 's'): (self.s_text_s, None), + (TEXTNS, 'span'): (self.s_text_span, self.e_text_span), + (TEXTNS, 'tab'): (self.s_text_tab, None), + (TEXTNS, "table-index-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, "page-number"):(None, None), + } + diff --git a/src/odf/opendocument.py b/src/odf/opendocument.py index 9fd16229f6..63196382d5 100644 --- a/src/odf/opendocument.py +++ b/src/odf/opendocument.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2006-2009 Søren Roug, European Environment Agency +# Copyright (C) 2006-2010 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -41,7 +41,7 @@ IS_IMAGE = 1 # We need at least Python 2.2 assert sys.version_info[0]>=2 and sys.version_info[1] >= 2 -sys.setrecursionlimit=50 +#sys.setrecursionlimit(100) #The recursion limit is set conservative so mistakes like # s=content() s.addElement(s) won't eat up too much processor time. @@ -128,12 +128,12 @@ class OpenDocument: self.element_dict[element.qname] = [] self.element_dict[element.qname].append(element) if element.qname == (STYLENS, u'style'): - self._register_stylename(element) # Add to style dictionary + self.__register_stylename(element) # Add to style dictionary styleref = element.getAttrNS(TEXTNS,u'style-name') if styleref is not None and self._styles_ooo_fix.has_key(styleref): element.setAttrNS(TEXTNS,u'style-name', self._styles_ooo_fix[styleref]) - def _register_stylename(self, element): + def __register_stylename(self, element): ''' Register a style. But there are three style dictionaries: office:styles, office:automatic-styles and office:master-styles Chapter 14 @@ -165,7 +165,7 @@ class OpenDocument: """ Generates the full document as an XML file Always written as a bytestream in UTF-8 encoding """ - self._replaceGenerator() + self.__replaceGenerator() xml=StringIO() xml.write(_XMLPROLOGUE) self.topnode.toXml(0, xml) @@ -197,8 +197,10 @@ class OpenDocument: x.write_close_tag(0, xml) return xml.getvalue() - def manifestxml(self): - """ Generates the manifest.xml file """ + def __manifestxml(self): + """ Generates the manifest.xml file + The self.manifest isn't avaible unless the document is being saved + """ xml=StringIO() xml.write(_XMLPROLOGUE) self.manifest.toXml(0,xml) @@ -206,7 +208,7 @@ class OpenDocument: def metaxml(self): """ Generates the meta.xml file """ - self._replaceGenerator() + self.__replaceGenerator() x = DocumentMeta() x.addElement(self.meta) xml=StringIO() @@ -344,7 +346,7 @@ class OpenDocument: self.thumbnail = filecontent def addObject(self, document, objectname=None): - """ Add an object. The object must be an OpenDocument class + """ Adds an object (subdocument). The object must be an OpenDocument class The return value will be the folder in the zipfile the object is stored in """ self.childobjects.append(document) @@ -367,15 +369,16 @@ class OpenDocument: zi.compress_type = zipfile.ZIP_STORED zi.external_attr = UNIXPERMS self._z.writestr(zi, fileobj) - if hasPictures: - self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder,mediatype="")) + # According to section 17.7.3 in ODF 1.1, the pictures folder should not have a manifest entry +# if hasPictures: +# self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder, mediatype="")) # Look in subobjects subobjectnum = 1 for subobject in object.childobjects: self._savePictures(subobject,'%sObject %d/' % (folder, subobjectnum)) subobjectnum += 1 - def _replaceGenerator(self): + def __replaceGenerator(self): """ Section 3.1.1: The application MUST NOT export the original identifier belonging to the application that created the document. """ @@ -385,22 +388,29 @@ class OpenDocument: self.meta.addElement(meta.Generator(text=TOOLSVERSION)) def save(self, outputfile, addsuffix=False): - """ Save the document under the filename """ + """ Save the document under the filename. + If the filename is '-' then save to stdout + """ if outputfile == '-': outputfp = zipfile.ZipFile(sys.stdout,"w") else: if addsuffix: outputfile = outputfile + odmimetypes.get(self.mimetype,'.xxx') outputfp = zipfile.ZipFile(outputfile, "w") - self._zipwrite(outputfp) + self.__zipwrite(outputfp) outputfp.close() def write(self, outputfp): + """ User API to write the ODF file to an open file descriptor + Writes the ZIP format + """ zipoutputfp = zipfile.ZipFile(outputfp,"w") - self._zipwrite(zipoutputfp) + self.__zipwrite(zipoutputfp) - def _zipwrite(self, outputfp): - """ Write the document to an open file pointer """ + def __zipwrite(self, outputfp): + """ Write the document to an open file pointer + This is where the real work is done + """ self._z = outputfp self._now = time.localtime()[:6] self.manifest = manifest.Manifest() @@ -438,7 +448,7 @@ class OpenDocument: zi = zipfile.ZipInfo("META-INF/manifest.xml", self._now) zi.compress_type = zipfile.ZIP_DEFLATED zi.external_attr = UNIXPERMS - self._z.writestr(zi, self.manifestxml() ) + self._z.writestr(zi, self.__manifestxml() ) del self._z del self._now del self.manifest @@ -464,8 +474,8 @@ class OpenDocument: self._z.writestr(zi, object.contentxml() ) # Write settings - if self == object and self.settings.hasChildNodes(): - self.manifest.addElement(manifest.FileEntry(fullpath="settings.xml",mediatype="text/xml")) + if object.settings.hasChildNodes(): + self.manifest.addElement(manifest.FileEntry(fullpath="%ssettings.xml" % folder, mediatype="text/xml")) zi = zipfile.ZipInfo("%ssettings.xml" % folder, self._now) zi.compress_type = zipfile.ZIP_DEFLATED zi.external_attr = UNIXPERMS @@ -473,7 +483,7 @@ class OpenDocument: # Write meta if self == object: - self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml",mediatype="text/xml")) + self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml", mediatype="text/xml")) zi = zipfile.ZipInfo("meta.xml", self._now) zi.compress_type = zipfile.ZIP_DEFLATED zi.external_attr = UNIXPERMS @@ -497,6 +507,7 @@ class OpenDocument: return element.Text(data) def createCDATASection(self, data): + """ Method to create a CDATA section """ return element.CDATASection(cdata) def getMediaType(self): @@ -504,12 +515,14 @@ class OpenDocument: return self.mimetype def getStyleByName(self, name): + """ Finds a style object based on the name """ ncname = make_NCName(name) if self._styles_dict == {}: self.rebuild_caches() return self._styles_dict.get(ncname, None) def getElementsByType(self, element): + """ Gets elements based on the type, which is function from text.py, draw.py etc. """ obj = element(check_grammar=False) if self.element_dict == {}: self.rebuild_caches() @@ -517,53 +530,59 @@ class OpenDocument: # Convenience functions def OpenDocumentChart(): + """ Creates a chart document """ doc = OpenDocument('application/vnd.oasis.opendocument.chart') doc.chart = Chart() doc.body.addElement(doc.chart) return doc def OpenDocumentDrawing(): + """ Creates a drawing document """ doc = OpenDocument('application/vnd.oasis.opendocument.graphics') doc.drawing = Drawing() doc.body.addElement(doc.drawing) return doc def OpenDocumentImage(): + """ Creates an image document """ doc = OpenDocument('application/vnd.oasis.opendocument.image') doc.image = Image() doc.body.addElement(doc.image) return doc def OpenDocumentPresentation(): + """ Creates a presentation document """ doc = OpenDocument('application/vnd.oasis.opendocument.presentation') doc.presentation = Presentation() doc.body.addElement(doc.presentation) return doc def OpenDocumentSpreadsheet(): + """ Creates a spreadsheet document """ doc = OpenDocument('application/vnd.oasis.opendocument.spreadsheet') doc.spreadsheet = Spreadsheet() doc.body.addElement(doc.spreadsheet) return doc def OpenDocumentText(): + """ Creates a text document """ doc = OpenDocument('application/vnd.oasis.opendocument.text') doc.text = Text() doc.body.addElement(doc.text) return doc +def OpenDocumentTextMaster(): + """ Creates a text master document """ + doc = OpenDocument('application/vnd.oasis.opendocument.text-master') + doc.text = Text() + doc.body.addElement(doc.text) + return doc -def load(odffile): +def __loadxmlparts(z, manifest, doc, objectpath): from load import LoadParser from xml.sax import make_parser, handler - z = zipfile.ZipFile(odffile) - mimetype = z.read('mimetype') - doc = OpenDocument(mimetype, add_generator=False) - # Look in the manifest file to see if which of the four files there are - manifestpart = z.read('META-INF/manifest.xml') - manifest = manifestlist(manifestpart) - for xmlfile in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'): + for xmlfile in (objectpath+'settings.xml', objectpath+'meta.xml', objectpath+'content.xml', objectpath+'styles.xml'): if not manifest.has_key(xmlfile): continue try: @@ -580,7 +599,19 @@ def load(odffile): parser.parse(inpsrc) del doc._parsing except KeyError, v: pass - # FIXME: Add subobjects correctly here + +def load(odffile): + """ Load an ODF file into memory + Returns a reference to the structure + """ + z = zipfile.ZipFile(odffile) + mimetype = z.read('mimetype') + doc = OpenDocument(mimetype, add_generator=False) + + # Look in the manifest file to see if which of the four files there are + manifestpart = z.read('META-INF/manifest.xml') + manifest = manifestlist(manifestpart) + __loadxmlparts(z, manifest, doc, '') for mentry,mvalue in manifest.items(): if mentry[:9] == "Pictures/" and len(mentry) > 9: doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry)) @@ -588,6 +619,13 @@ def load(odffile): doc.addThumbnail(z.read(mentry)) elif mentry in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'): pass + # Load subobjects into structure + elif mentry[:7] == "Object " and len(mentry) < 11 and mentry[-1] == "/": + subdoc = OpenDocument(mvalue['media-type'], add_generator=False) + doc.addObject(subdoc, "/" + mentry[:-1]) + __loadxmlparts(z, manifest, subdoc, mentry) + elif mentry[:7] == "Object ": + pass # Don't load subobjects as opaque objects else: if mvalue['full-path'][-1] == '/': doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None)) @@ -612,4 +650,5 @@ def load(odffile): elif mimetype[:42] == 'application/vnd.oasis.opendocument.formula': doc.formula = b[0].firstChild return doc + # vim: set expandtab sw=4 : From 81522f6d8c400ed1a7d27de2214cfc1b5ffa563b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 10:57:40 -0700 Subject: [PATCH 07/19] Fix #9131 (Calibre should honor metadata in epub files when downloading/importing news items items) --- src/calibre/library/database2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index dce0b34aef..0fa25e88fd 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -2451,7 +2451,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): stream.seek(0) mi = get_metadata(stream, format, use_libprs_metadata=False) stream.seek(0) - mi.series_index = 1.0 mi.tags = [_('News')] if arg['add_title_tag']: mi.tags += [arg['title']] From 1e31e39ac565dfae046124178e55052b273f9285 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 10:58:16 -0700 Subject: [PATCH 08/19] ... --- src/calibre/library/database2.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 0fa25e88fd..c53d938297 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -2451,6 +2451,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): stream.seek(0) mi = get_metadata(stream, format, use_libprs_metadata=False) stream.seek(0) + if not mi.series_index: + mi.series_index = 1.0 mi.tags = [_('News')] if arg['add_title_tag']: mi.tags += [arg['title']] From 4cefe30cd63c51dd254bf3877bc5a1b03521cb5e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 11:07:13 -0700 Subject: [PATCH 09/19] Flickr Blog by Ricardo Jurado --- resources/recipes/flickr.recipe | 48 ++++++++++++++++++++++++++++++ resources/recipes/flickr_es.recipe | 47 +++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 resources/recipes/flickr.recipe create mode 100644 resources/recipes/flickr_es.recipe diff --git a/resources/recipes/flickr.recipe b/resources/recipes/flickr.recipe new file mode 100644 index 0000000000..5b0276d28c --- /dev/null +++ b/resources/recipes/flickr.recipe @@ -0,0 +1,48 @@ +__license__ = 'GPL v3' +__author__ = 'Ricardo Jurado' +__copyright__ = 'Ricardo Jurado' +__version__ = 'v0.1' +__date__ = '22 February 2011' + +''' +http://blog.flickr.net/ +''' + + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1297031650(BasicNewsRecipe): + + title = u'Flickr Blog' + masthead_url = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif' + cover_url = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif' + publisher = u'' + + __author__ = 'Ricardo Jurado' + description = 'Pictures Blog' + category = 'Blog,Pictures' + + oldest_article = 120 + max_articles_per_feed = 10 + no_stylesheets = True + use_embedded_content = False + encoding = 'UTF-8' + remove_javascript = True + language = 'en' + + extra_css = """ + p{text-align: justify; font-size: 100%} + body{ text-align: left; font-size:100% } + h2{font-family: sans-serif; font-size:130%; font-weight:bold; text-align: justify; } + .published{font-family:Arial,Helvetica,sans-serif; font-size:80%; } + .posted{font-family:Arial,Helvetica,sans-serif; font-size:80%; } + """ + + keep_only_tags = [ + dict(name='div', attrs={'class':'entry'}) + ] + + feeds = [ + (u'BLOG', u'http://feeds.feedburner.com/Flickrblog'), + #(u'BLOG', u'http://blog.flickr.net/es/feed/atom/') + ] diff --git a/resources/recipes/flickr_es.recipe b/resources/recipes/flickr_es.recipe new file mode 100644 index 0000000000..1d9c2062eb --- /dev/null +++ b/resources/recipes/flickr_es.recipe @@ -0,0 +1,47 @@ +__license__ = 'GPL v3' +__author__ = 'Ricardo Jurado' +__copyright__ = 'Ricardo Jurado' +__version__ = 'v0.1' +__date__ = '22 February 2011' + +''' +http://blog.flickr.net/ +''' + + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1297031650(BasicNewsRecipe): + + title = u'Flickr Blog' + masthead_url = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif' + cover_url = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif' + publisher = u'' + + __author__ = 'Ricardo Jurado' + description = 'Pictures Blog' + category = 'Blog,Pictures' + + oldest_article = 120 + max_articles_per_feed = 10 + no_stylesheets = True + use_embedded_content = False + encoding = 'UTF-8' + remove_javascript = True + language = 'es' + + extra_css = """ + p{text-align: justify; font-size: 100%} + body{ text-align: left; font-size:100% } + h2{font-family: sans-serif; font-size:130%; font-weight:bold; text-align: justify; } + .published{font-family:Arial,Helvetica,sans-serif; font-size:80%; } + .posted{font-family:Arial,Helvetica,sans-serif; font-size:80%; } + """ + + keep_only_tags = [ + dict(name='div', attrs={'class':'entry'}) + ] + + feeds = [ + (u'BLOG', u'http://blog.flickr.net/es/feed/atom/') + ] From 14c64a2e1c58fe2d2abe709fbfb814bf6f9fab6e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 11:29:05 -0700 Subject: [PATCH 10/19] Comic Input: Add option to not add links to individual pages to the Table fo Contents when converting CBC files --- src/calibre/ebooks/comic/input.py | 13 ++++++--- src/calibre/gui2/convert/comic_input.py | 3 +- src/calibre/gui2/convert/comic_input.ui | 39 +++++++++++++++---------- 3 files changed, 34 insertions(+), 21 deletions(-) diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py index c9b11e31f2..7710d41fb3 100755 --- a/src/calibre/ebooks/comic/input.py +++ b/src/calibre/ebooks/comic/input.py @@ -304,6 +304,10 @@ class ComicInput(InputFormatPlugin): help=_('Specify the image size as widthxheight pixels. Normally,' ' an image size is automatically calculated from the output ' 'profile, this option overrides it.')), + OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False, + help=_('When converting a CBC do not add links to each page to' + ' the TOC. Note this only applies if the TOC has more than one' + ' section')), ]) recommendations = set([ @@ -449,10 +453,11 @@ class ComicInput(InputFormatPlugin): wrappers = comic[2] stoc = toc.add_item(href(wrappers[0]), None, comic[0], play_order=po) - for i, x in enumerate(wrappers): - stoc.add_item(href(x), None, - _('Page')+' %d'%(i+1), play_order=po) - po += 1 + if not opts.dont_add_comic_pages_to_toc: + for i, x in enumerate(wrappers): + stoc.add_item(href(x), None, + _('Page')+' %d'%(i+1), play_order=po) + po += 1 opf.set_toc(toc) m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb') opf.render(m, n, 'toc.ncx') diff --git a/src/calibre/gui2/convert/comic_input.py b/src/calibre/gui2/convert/comic_input.py index f7f8023c0e..ed8053b8e6 100644 --- a/src/calibre/gui2/convert/comic_input.py +++ b/src/calibre/gui2/convert/comic_input.py @@ -22,7 +22,8 @@ class PluginWidget(Widget, Ui_Form): ['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left', 'despeckle', 'no_sort', 'no_process', 'landscape', 'dont_sharpen', 'disable_trim', 'wide', 'output_format', - 'dont_grayscale', 'comic_image_size'] + 'dont_grayscale', 'comic_image_size', + 'dont_add_comic_pages_to_toc'] ) self.db, self.book_id = db, book_id for x in get_option('output_format').option.choices: diff --git a/src/calibre/gui2/convert/comic_input.ui b/src/calibre/gui2/convert/comic_input.ui index 52c0ad2bb5..676032942f 100644 --- a/src/calibre/gui2/convert/comic_input.ui +++ b/src/calibre/gui2/convert/comic_input.ui @@ -14,7 +14,7 @@ Form - + &Number of Colors: @@ -24,7 +24,7 @@ - + 8 @@ -37,70 +37,70 @@ - + Disable &normalize - + Keep &aspect ratio - + Disable &Sharpening - + Disable &Trimming - + &Wide - + &Landscape - + &Right to left - + Don't so&rt - + De&speckle - + Qt::Vertical @@ -120,7 +120,7 @@ - + &Output format: @@ -130,7 +130,7 @@ - + @@ -140,7 +140,7 @@ - + Override image &size: @@ -150,9 +150,16 @@ - + + + + + Don't add links to &pages to the Table of Contents for CBC files + + + From 8e0efbbb5fb8a7b5ba066c97fd2294866ea88934 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 12:13:45 -0700 Subject: [PATCH 11/19] EPUB Output: Try to ensure that the cover image always has an id="cover" to workaround Nook cover reading bug. Fixes #8182 (Book cover problem when converting to epub for Nookcolor) --- src/calibre/ebooks/oeb/output.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/calibre/ebooks/oeb/output.py b/src/calibre/ebooks/oeb/output.py index 585b56c7b6..6709141a01 100644 --- a/src/calibre/ebooks/oeb/output.py +++ b/src/calibre/ebooks/oeb/output.py @@ -32,6 +32,12 @@ class OEBOutput(OutputFormatPlugin): for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME): href, root = results.pop(key, [None, None]) if root is not None: + if key == OPF_MIME: + try: + self.workaround_nook_cover_bug(root) + except: + self.log.exception('Something went wrong while trying to' + ' workaround Nook cover bug, ignoring') raw = etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True) if key == OPF_MIME: @@ -49,3 +55,24 @@ class OEBOutput(OutputFormatPlugin): with open(path, 'wb') as f: f.write(str(item)) item.unload_data_from_memory(memory=path) + + def workaround_nook_cover_bug(self, root): # {{{ + cov = root.xpath('//*[local-name() = "meta" and @name="cover" and' + ' @content != "cover"]') + if len(cov) == 1: + manpath = ('//*[local-name() = "manifest"]/*[local-name() = "item" ' + ' and @id="%s" and @media-type]') + cov = cov[0] + covid = cov.get('content') + manifest_item = root.xpath(manpath%covid) + has_cover = root.xpath(manpath%'cover') + if len(manifest_item) == 1 and not has_cover and \ + manifest_item[0].get('media-type', + '').startswith('image/'): + self.log.warn('The cover image has an id != "cover". Renaming' + ' to work around Nook Color bug') + manifest_item = manifest_item[0] + manifest_item.set('id', 'cover') + cov.set('content', 'cover') + # }}} + From 90ab2881e0868b08814945d22551fe90e2d9513d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 12:29:58 -0700 Subject: [PATCH 12/19] Fix Gizmodo and LifeHacker recipes --- resources/recipes/gizmodo.recipe | 14 ++++++-------- resources/recipes/lifehacker.recipe | 20 +++----------------- 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/resources/recipes/gizmodo.recipe b/resources/recipes/gizmodo.recipe index 4233ef66b7..f6d3fcb782 100644 --- a/resources/recipes/gizmodo.recipe +++ b/resources/recipes/gizmodo.recipe @@ -17,10 +17,9 @@ class Gizmodo(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True encoding = 'utf-8' - use_embedded_content = False + use_embedded_content = True language = 'en' masthead_url = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png' - extra_css = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} ' conversion_options = { 'comment' : description @@ -29,13 +28,12 @@ class Gizmodo(BasicNewsRecipe): , 'language' : language } - remove_attributes = ['width','height'] - keep_only_tags = [dict(attrs={'class':'content permalink'})] - remove_tags_before = dict(name='h1') - remove_tags = [dict(attrs={'class':'contactinfo'})] - remove_tags_after = dict(attrs={'class':'contactinfo'}) + feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/vip?format=xml')] + + remove_tags = [ + {'class': 'feedflare'}, + ] - feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')] def preprocess_html(self, soup): return self.adeify_images(soup) diff --git a/resources/recipes/lifehacker.recipe b/resources/recipes/lifehacker.recipe index 42e32497be..ff95efc50a 100644 --- a/resources/recipes/lifehacker.recipe +++ b/resources/recipes/lifehacker.recipe @@ -16,15 +16,9 @@ class Lifehacker(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True encoding = 'utf-8' - use_embedded_content = False + use_embedded_content = True language = 'en' masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png' - extra_css = ''' - body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} - img{margin-bottom: 1em} - h1{font-family :Arial,Helvetica,sans-serif; font-size:large} - h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small} - ''' conversion_options = { 'comment' : description , 'tags' : category @@ -32,20 +26,12 @@ class Lifehacker(BasicNewsRecipe): , 'language' : language } - remove_attributes = ['width', 'height', 'style'] - remove_tags_before = dict(name='h1') - keep_only_tags = [dict(id='container')] - remove_tags_after = dict(attrs={'class':'post-body'}) remove_tags = [ - dict(id="sharemenu"), - {'class': 'related'}, + {'class': 'feedflare'}, ] - feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')] + feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')] def preprocess_html(self, soup): return self.adeify_images(soup) - def print_version(self, url): - return url.replace('#!', '?_escaped_fragment_=') - From eb3d1aa424b41cb09411db66f34725c2978a7d05 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 12:58:32 -0700 Subject: [PATCH 13/19] Kobo driver: Handle missing firmware version file --- src/calibre/devices/kobo/driver.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 52f0563c7b..f1c0d3f3d3 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -78,9 +78,13 @@ class KOBO(USBMS): else self._main_prefix # Determine the firmware version - f = open(self.normalize_path(self._main_prefix + '.kobo/version'), 'r') - self.fwversion = f.readline().split(',')[2] - f.close() + try: + with open(self.normalize_path(self._main_prefix + '.kobo/version'), + 'rb') as f: + self.fwversion = f.readline().split(',')[2] + except: + self.fwversion = 'unknown' + if self.fwversion != '1.0' and self.fwversion != '1.4': self.has_kepubs = True debug_print('Version of firmware: ', self.fwversion, 'Has kepubs:', self.has_kepubs) @@ -161,7 +165,7 @@ class KOBO(USBMS): return changed connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite')) - + # return bytestrings if the content cannot the decoded as unicode connection.text_factory = lambda x: unicode(x, "utf-8", "ignore") @@ -234,7 +238,7 @@ class KOBO(USBMS): debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType) connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite')) - + # return bytestrings if the content cannot the decoded as unicode connection.text_factory = lambda x: unicode(x, "utf-8", "ignore") @@ -511,7 +515,7 @@ class KOBO(USBMS): # the last book from the collection the list of books is empty # and the removal of the last book would not occur connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite')) - + # return bytestrings if the content cannot the decoded as unicode connection.text_factory = lambda x: unicode(x, "utf-8", "ignore") From e82f0068736eb22437d7da8b635a52aa03b634ea Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 15:20:44 -0700 Subject: [PATCH 14/19] Driver for the Wexler T7001 --- src/calibre/customize/builtins.py | 4 ++-- src/calibre/devices/teclast/driver.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 4f3574559e..cd4c866562 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -571,7 +571,7 @@ from calibre.devices.binatone.driver import README from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK from calibre.devices.edge.driver import EDGE from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \ - SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH + SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER from calibre.devices.sne.driver import SNE from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \ GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, \ @@ -679,7 +679,7 @@ plugins += [ ELONEX, TECLAST_K3, NEWSMY, - PICO, SUNSTECH_EB700, ARCHOS7O, SOVOS, STASH, + PICO, SUNSTECH_EB700, ARCHOS7O, SOVOS, STASH, WEXLER, IPAPYRUS, EDGE, SNE, diff --git a/src/calibre/devices/teclast/driver.py b/src/calibre/devices/teclast/driver.py index 2cca0085d7..1bbab8e120 100644 --- a/src/calibre/devices/teclast/driver.py +++ b/src/calibre/devices/teclast/driver.py @@ -104,3 +104,14 @@ class STASH(TECLAST_K3): VENDOR_NAME = 'STASH' WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'W950' +class WEXLER(TECLAST_K3): + + name = 'Wexler device interface' + gui_name = 'Wexler' + description = _('Communicate with the Wexler reader.') + + FORMATS = ['epub', 'fb2', 'pdf', 'txt'] + + VENDOR_NAME = 'WEXLER' + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'T7001' + From 954141aa0402d515ec759da2cdb0e349908cc2e2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 20:28:30 -0700 Subject: [PATCH 15/19] ... --- src/calibre/devices/misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py index d74f727a0e..a2d1817de0 100644 --- a/src/calibre/devices/misc.py +++ b/src/calibre/devices/misc.py @@ -268,5 +268,5 @@ class NEXTBOOK(USBMS): EBOOK_DIR_MAIN = '' VENDOR_NAME = 'NEXT2' - WINDOWS_MAIN_MEM = '1.0.14' + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '1.0.14' From 1fc31946d6f0b6b51360b1a5db2fc1a5294b8bab Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 20:30:32 -0700 Subject: [PATCH 16/19] ... --- src/calibre/devices/misc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py index a2d1817de0..124f8e39c4 100644 --- a/src/calibre/devices/misc.py +++ b/src/calibre/devices/misc.py @@ -269,4 +269,5 @@ class NEXTBOOK(USBMS): VENDOR_NAME = 'NEXT2' WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '1.0.14' + SUPPORTS_SUB_DIRS = True From 9fd8303b8990b474061cb692d677f0958f7b4189 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 20:34:55 -0700 Subject: [PATCH 17/19] ... --- src/calibre/devices/misc.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py index 124f8e39c4..8cf0fb5a06 100644 --- a/src/calibre/devices/misc.py +++ b/src/calibre/devices/misc.py @@ -9,6 +9,8 @@ __docformat__ = 'restructuredtext en' import os from calibre.devices.usbms.driver import USBMS +from calibre import prints +prints class PALMPRE(USBMS): @@ -271,3 +273,33 @@ class NEXTBOOK(USBMS): WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '1.0.14' SUPPORTS_SUB_DIRS = True + ''' + def upload_cover(self, path, filename, metadata, filepath): + if metadata.thumbnail and metadata.thumbnail[-1]: + path = path.replace('/', os.sep) + is_main = path.startswith(self._main_prefix) + prefix = None + if is_main: + prefix = self._main_prefix + else: + if self._card_a_prefix and \ + path.startswith(self._card_a_prefix): + prefix = self._card_a_prefix + elif self._card_b_prefix and \ + path.startswith(self._card_b_prefix): + prefix = self._card_b_prefix + if prefix is None: + prints('WARNING: Failed to find prefix for:', filepath) + return + thumbnail_dir = os.path.join(prefix, '.Cover') + + relpath = os.path.relpath(filepath, prefix) + if relpath.startswith('..\\'): + relpath = relpath[3:] + thumbnail_dir = os.path.join(thumbnail_dir, relpath) + if not os.path.exists(thumbnail_dir): + os.makedirs(thumbnail_dir) + with open(os.path.join(thumbnail_dir, filename+'.jpg'), 'wb') as f: + f.write(metadata.thumbnail[-1]) + ''' + From e16968c38169b9b60f57f0bb87fafec407ef694d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 22:55:27 -0700 Subject: [PATCH 18/19] Linux binary build: Set the MAGICK_HOME environment variable --- setup/installer/linux/freeze2.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/setup/installer/linux/freeze2.py b/setup/installer/linux/freeze2.py index 20348fa8b8..be6f3ad6e5 100644 --- a/setup/installer/linux/freeze2.py +++ b/setup/installer/linux/freeze2.py @@ -19,7 +19,7 @@ SITE_PACKAGES = ['IPython', 'PIL', 'dateutil', 'dns', 'PyQt4', 'mechanize', QTDIR = '/usr/lib/qt4' QTDLLS = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml', 'QtWebKit', 'QtDBus') - +MAGICK_PREFIX = '/usr' binary_includes = [ '/usr/bin/pdftohtml', '/usr/lib/libwmflite-0.2.so.7', @@ -41,8 +41,8 @@ binary_includes = [ '/usr/lib/libgthread-2.0.so.0', '/usr/lib/libpng14.so.14', '/usr/lib/libexslt.so.0', - '/usr/lib/libMagickWand.so.4', - '/usr/lib/libMagickCore.so.4', + MAGICK_PREFIX+'/lib/libMagickWand.so.4', + MAGICK_PREFIX+'/lib/libMagickCore.so.4', '/usr/lib/libgcrypt.so.11', '/usr/lib/libgpg-error.so.0', '/usr/lib/libphonon.so.4', @@ -116,8 +116,9 @@ class LinuxFreeze(Command): if x not in ('designer', 'sqldrivers', 'codecs'): shutil.copytree(y, self.j(dest, x)) - im = glob.glob('/usr/lib/ImageMagick-*')[0] - dest = self.j(self.lib_dir, 'ImageMagick') + im = glob.glob(MAGICK_PREFIX + '/lib/ImageMagick-*')[-1] + self.magick_base = os.path.basename(im) + dest = self.j(self.lib_dir, self.magick_base) shutil.copytree(im, dest, ignore=shutil.ignore_patterns('*.a')) def compile_mount_helper(self): @@ -278,9 +279,10 @@ class LinuxFreeze(Command): base=`dirname $path` lib=$base/lib export LD_LIBRARY_PATH=$lib:$LD_LIBRARY_PATH - export MAGICK_CONFIGURE_PATH=$lib/ImageMagick/config - export MAGICK_CODER_MODULE_PATH=$lib/ImageMagick/modules-Q16/coders - export MAGICK_CODER_FILTER_PATH=$lib/ImageMagick/modules-Q16/filters + export MAGICK_HOME=$base + export MAGICK_CONFIGURE_PATH=$lib/{1}/config + export MAGICK_CODER_MODULE_PATH=$lib/{1}/modules-Q16/coders + export MAGICK_CODER_FILTER_PATH=$lib/{1}/modules-Q16/filters $base/bin/{0} "$@" ''') @@ -292,7 +294,7 @@ class LinuxFreeze(Command): exe = self.j(self.bin_dir, bname) sh = self.j(self.base, bname) with open(sh, 'wb') as f: - f.write(launcher.format(bname)) + f.write(launcher.format(bname, self.magick_base)) os.chmod(sh, stat.S_IREAD|stat.S_IEXEC|stat.S_IWRITE|stat.S_IRGRP|stat.S_IXGRP|stat.S_IROTH|stat.S_IXOTH) From c626b60a29c51a90c3a1454401cc8989c25444e9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Feb 2011 23:51:50 -0700 Subject: [PATCH 19/19] Linux binary build: Fix ImageMagick trying to load system modules instead of bundled modules --- setup/installer/linux/freeze2.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/setup/installer/linux/freeze2.py b/setup/installer/linux/freeze2.py index be6f3ad6e5..9dd7df04f8 100644 --- a/setup/installer/linux/freeze2.py +++ b/setup/installer/linux/freeze2.py @@ -7,7 +7,7 @@ __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' import sys, os, shutil, platform, subprocess, stat, py_compile, glob, \ - textwrap, tarfile + textwrap, tarfile, re from setup import Command, modules, basenames, functions, __version__, \ __appname__ @@ -120,6 +120,21 @@ class LinuxFreeze(Command): self.magick_base = os.path.basename(im) dest = self.j(self.lib_dir, self.magick_base) shutil.copytree(im, dest, ignore=shutil.ignore_patterns('*.a')) + from calibre import walk + for x in walk(dest): + if x.endswith('.la'): + raw = open(x).read() + raw = re.sub('libdir=.*', '', raw) + open(x, 'wb').write(raw) + + dest = self.j(dest, 'config') + src = self.j(MAGICK_PREFIX, 'share', self.magick_base, 'config') + for x in glob.glob(src+'/*'): + d = self.j(dest, os.path.basename(x)) + if os.path.isdir(x): + shutil.copytree(x, d) + else: + shutil.copyfile(x, d) def compile_mount_helper(self): self.info('Compiling mount helper...')