Sync to trunk.

This commit is contained in:
John Schember 2011-02-24 18:46:46 -05:00
commit 6d568aee69
56 changed files with 1830 additions and 483 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 800 B

View File

@ -0,0 +1,68 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.20minutos.es
'''
from calibre.web.feeds.news import BasicNewsRecipe
class t20Minutos(BasicNewsRecipe):
title = '20 Minutos'
__author__ = 'Darko Miletic'
description = 'Diario de informacion general y local mas leido de Espania, noticias de ultima hora de Espania, el mundo, local, deportes, noticias curiosas y mas'
publisher = '20 Minutos Online SL'
category = 'news, politics, Spain'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = True
language = 'es'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://estaticos.20minutos.es/css4/img/ui/logo-301x54.png'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags = [dict(attrs={'class':'mf-viral'})]
remove_attributes=['border']
feeds = [
(u'Principal' , u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss')
,(u'Cine' , u'http://20minutos.feedsportal.com/c/32489/f/478285/index.rss')
,(u'Internacional' , u'http://20minutos.feedsportal.com/c/32489/f/492689/index.rss')
,(u'Deportes' , u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss')
,(u'Nacional' , u'http://20minutos.feedsportal.com/c/32489/f/492688/index.rss')
,(u'Economia' , u'http://20minutos.feedsportal.com/c/32489/f/492690/index.rss')
,(u'Tecnologia' , u'http://20minutos.feedsportal.com/c/32489/f/478292/index.rss')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -32,16 +32,25 @@ class Adevarul(BasicNewsRecipe):
} }
keep_only_tags = [ dict(name='div', attrs={'class':'article_header'}) keep_only_tags = [ dict(name='div', attrs={'class':'article_header'})
,dict(name='div', attrs={'class':'bd'}) ,dict(name='div', attrs={'class':'bb-tu first-t bb-article-body'})
] ]
remove_tags = [ dict(name='div', attrs={'class':'bb-wg-article_related_attachements'}) remove_tags = [
dict(name='li', attrs={'class':'author'})
,dict(name='li', attrs={'class':'date'})
,dict(name='li', attrs={'class':'comments'})
,dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'}) ,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'})
,dict(name='form', attrs={'id':'bb-comment-create-form'}) ,dict(name='form', attrs={'id':'bb-comment-create-form'})
,dict(name='div', attrs={'id':'mediatag'})
,dict(name='div', attrs={'id':'ft'})
,dict(name='div', attrs={'id':'comment_wrapper'})
] ]
remove_tags_after = [ dict(name='form', attrs={'id':'bb-comment-create-form'}) ] remove_tags_after = [
dict(name='div', attrs={'id':'comment_wrapper'}),
]
feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ] feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ]

View File

@ -41,7 +41,8 @@ class ESPN(BasicNewsRecipe):
''' '''
feeds = [('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'), feeds = [
('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'),
'http://sports.espn.go.com/espn/rss/nfl/news', 'http://sports.espn.go.com/espn/rss/nfl/news',
'http://sports.espn.go.com/espn/rss/nba/news', 'http://sports.espn.go.com/espn/rss/nba/news',
'http://sports.espn.go.com/espn/rss/mlb/news', 'http://sports.espn.go.com/espn/rss/mlb/news',
@ -107,10 +108,11 @@ class ESPN(BasicNewsRecipe):
if match and 'soccernet' not in url and 'bassmaster' not in url: if match and 'soccernet' not in url and 'bassmaster' not in url:
return 'http://sports.espn.go.com/espn/print?'+match.group(1)+'&type=story' return 'http://sports.espn.go.com/espn/print?'+match.group(1)+'&type=story'
else: else:
if match and 'soccernet' in url: if 'soccernet' in url:
splitlist = url.split("&", 5) match = re.search(r'/id/(\d+)/', url)
newurl = 'http://soccernet.espn.go.com/print?'+match.group(1)+'&type=story' + '&' + str(splitlist[2] ) if match:
return newurl return \
'http://soccernet.espn.go.com/print?id=%s&type=story' % match.group(1)
#else: #else:
# if 'bassmaster' in url: # if 'bassmaster' in url:
# return url # return url

View File

@ -0,0 +1,48 @@
__license__ = 'GPL v3'
__author__ = 'Ricardo Jurado'
__copyright__ = 'Ricardo Jurado'
__version__ = 'v0.1'
__date__ = '22 February 2011'
'''
http://blog.flickr.net/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1297031650(BasicNewsRecipe):
title = u'Flickr Blog'
masthead_url = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
cover_url = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
publisher = u''
__author__ = 'Ricardo Jurado'
description = 'Pictures Blog'
category = 'Blog,Pictures'
oldest_article = 120
max_articles_per_feed = 10
no_stylesheets = True
use_embedded_content = False
encoding = 'UTF-8'
remove_javascript = True
language = 'en'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h2{font-family: sans-serif; font-size:130%; font-weight:bold; text-align: justify; }
.published{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
.posted{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
"""
keep_only_tags = [
dict(name='div', attrs={'class':'entry'})
]
feeds = [
(u'BLOG', u'http://feeds.feedburner.com/Flickrblog'),
#(u'BLOG', u'http://blog.flickr.net/es/feed/atom/')
]

View File

@ -0,0 +1,47 @@
__license__ = 'GPL v3'
__author__ = 'Ricardo Jurado'
__copyright__ = 'Ricardo Jurado'
__version__ = 'v0.1'
__date__ = '22 February 2011'
'''
http://blog.flickr.net/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1297031650(BasicNewsRecipe):
title = u'Flickr Blog'
masthead_url = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
cover_url = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
publisher = u''
__author__ = 'Ricardo Jurado'
description = 'Pictures Blog'
category = 'Blog,Pictures'
oldest_article = 120
max_articles_per_feed = 10
no_stylesheets = True
use_embedded_content = False
encoding = 'UTF-8'
remove_javascript = True
language = 'es'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h2{font-family: sans-serif; font-size:130%; font-weight:bold; text-align: justify; }
.published{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
.posted{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
"""
keep_only_tags = [
dict(name='div', attrs={'class':'entry'})
]
feeds = [
(u'BLOG', u'http://blog.flickr.net/es/feed/atom/')
]

View File

@ -17,10 +17,9 @@ class Gizmodo(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
use_embedded_content = False use_embedded_content = True
language = 'en' language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png' masthead_url = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
extra_css = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} '
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
@ -29,13 +28,12 @@ class Gizmodo(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
remove_attributes = ['width','height'] feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/vip?format=xml')]
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1') remove_tags = [
remove_tags = [dict(attrs={'class':'contactinfo'})] {'class': 'feedflare'},
remove_tags_after = dict(attrs={'class':'contactinfo'}) ]
feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
return self.adeify_images(soup) return self.adeify_images(soup)

View File

@ -1,20 +1,43 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
gsp.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1286351181(BasicNewsRecipe): class GSP(BasicNewsRecipe):
title = u'gsp.ro' title = u'Gazeta Sporturilor'
__author__ = 'bucsie' language = 'ro'
oldest_article = 2 __author__ = u'Silviu Cotoar\u0103'
description = u'Gazeta Sporturilor'
publisher = u'Gazeta Sporturilor'
category = 'Ziare,Sport,Stiri,Romania'
oldest_article = 5
max_articles_per_feed = 100 max_articles_per_feed = 100
language='ro' no_stylesheets = True
cover_url ='http://www.gsp.ro/images/sigla_rosu.jpg' use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://www.gsp.ro/images/logo.jpg'
remove_tags = [ conversion_options = {
dict(name='div', attrs={'class':['related_articles', 'articol_noteaza straight_line dotted_line_top', 'comentarii','mai_multe_articole']}), 'comments' : description
dict(name='div', attrs={'id':'icons'}) ,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='h1', attrs={'class':'serif title_2'})
,dict(name='div', attrs={'id':'only_text'})
,dict(name='span', attrs={'class':'block poza_principala'})
] ]
remove_tags_after = dict(name='div', attrs={'id':'adoceanintactrovccmgpmnyt'})
feeds = [(u'toate stirile', u'http://www.gsp.ro/index.php?section=section&screen=rss')] feeds = [ (u'\u0218tiri', u'http://www.gsp.ro/rss.xml') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)
def print_version(self, url):
return 'http://www1.gsp.ro/print/' + url[(url.rindex('/')+1):]

View File

@ -16,15 +16,9 @@ class Lifehacker(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
use_embedded_content = False use_embedded_content = True
language = 'en' language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png' masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
'''
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
, 'tags' : category , 'tags' : category
@ -32,20 +26,12 @@ class Lifehacker(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
remove_attributes = ['width', 'height', 'style']
remove_tags_before = dict(name='h1')
keep_only_tags = [dict(id='container')]
remove_tags_after = dict(attrs={'class':'post-body'})
remove_tags = [ remove_tags = [
dict(id="sharemenu"), {'class': 'feedflare'},
{'class': 'related'},
] ]
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')] feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
return self.adeify_images(soup) return self.adeify_images(soup)
def print_version(self, url):
return url.replace('#!', '?_escaped_fragment_=')

View File

@ -88,8 +88,8 @@ class NYTimes(BasicNewsRecipe):
if headlinesOnly: if headlinesOnly:
title='New York Times Headlines' title='New York Times Headlines'
description = 'Headlines from the New York Times' description = 'Headlines from the New York Times. Needs a subscription from http://www.nytimes.com'
needs_subscription = False needs_subscription = 'optional'
elif webEdition: elif webEdition:
title='New York Times (Web)' title='New York Times (Web)'
description = 'New York Times on the Web' description = 'New York Times on the Web'

View File

@ -96,7 +96,7 @@ class NYTimes(BasicNewsRecipe):
if headlinesOnly: if headlinesOnly:
title='New York Times Headlines' title='New York Times Headlines'
description = 'Headlines from the New York Times' description = 'Headlines from the New York Times'
needs_subscription = False needs_subscription = True
elif webEdition: elif webEdition:
title='New York Times (Web)' title='New York Times (Web)'
description = 'New York Times on the Web' description = 'New York Times on the Web'
@ -107,7 +107,7 @@ class NYTimes(BasicNewsRecipe):
needs_subscription = True needs_subscription = True
else: else:
title='New York Times' title='New York Times'
description = 'Today\'s New York Times' description = 'Today\'s New York Times. Needs subscription from http://www.nytimes.com'
needs_subscription = True needs_subscription = True

View File

@ -15,6 +15,7 @@
"template": "def evaluate(self, formatter, kwargs, mi, locals, template):\n template = template.replace('[[', '{').replace(']]', '}')\n return formatter.__class__().safe_format(template, kwargs, 'TEMPLATE', mi)\n", "template": "def evaluate(self, formatter, kwargs, mi, locals, template):\n template = template.replace('[[', '{').replace(']]', '}')\n return formatter.__class__().safe_format(template, kwargs, 'TEMPLATE', mi)\n",
"print": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n print args\n return None\n", "print": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n print args\n return None\n",
"titlecase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return titlecase(val)\n", "titlecase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return titlecase(val)\n",
"sublist": "def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):\n if not val:\n return ''\n si = int(start_index)\n ei = int(end_index)\n val = val.split(sep)\n try:\n if ei == 0:\n return sep.join(val[si:])\n else:\n return sep.join(val[si:ei])\n except:\n return ''\n",
"test": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):\n if val:\n return value_if_set\n else:\n return value_not_set\n", "test": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):\n if val:\n return value_if_set\n else:\n return value_not_set\n",
"eval": "def evaluate(self, formatter, kwargs, mi, locals, template):\n from formatter import eval_formatter\n template = template.replace('[[', '{').replace(']]', '}')\n return eval_formatter.safe_format(template, locals, 'EVAL', None)\n", "eval": "def evaluate(self, formatter, kwargs, mi, locals, template):\n from formatter import eval_formatter\n template = template.replace('[[', '{').replace(']]', '}')\n return eval_formatter.safe_format(template, locals, 'EVAL', None)\n",
"multiply": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x * y)\n", "multiply": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x * y)\n",

View File

@ -391,11 +391,6 @@ noembed, param, link {
display: none; display: none;
} }
/* Page breaks at body tags, to help out with LIT-generation */
body {
page-break-before: always;
}
/* Explicit line-breaks are blocks, sure... */ /* Explicit line-breaks are blocks, sure... */
br { br {
display: block; display: block;

View File

@ -571,7 +571,7 @@ from calibre.devices.binatone.driver import README
from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
from calibre.devices.edge.driver import EDGE from calibre.devices.edge.driver import EDGE
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \ from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER
from calibre.devices.sne.driver import SNE from calibre.devices.sne.driver import SNE
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, \ GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, \
@ -679,7 +679,7 @@ plugins += [
ELONEX, ELONEX,
TECLAST_K3, TECLAST_K3,
NEWSMY, NEWSMY,
PICO, SUNSTECH_EB700, ARCHOS7O, SOVOS, STASH, PICO, SUNSTECH_EB700, ARCHOS7O, SOVOS, STASH, WEXLER,
IPAPYRUS, IPAPYRUS,
EDGE, EDGE,
SNE, SNE,

View File

@ -594,7 +594,7 @@ def main(args=sys.argv):
if remove_plugin(opts.remove_plugin): if remove_plugin(opts.remove_plugin):
print 'Plugin removed' print 'Plugin removed'
else: else:
print 'No custom pluginnamed', opts.remove_plugin print 'No custom plugin named', opts.remove_plugin
if opts.customize_plugin is not None: if opts.customize_plugin is not None:
name, custom = opts.customize_plugin.split(',') name, custom = opts.customize_plugin.split(',')
plugin = find_plugin(name.strip()) plugin = find_plugin(name.strip())

View File

@ -74,6 +74,9 @@ class ANDROID(USBMS):
# T-Mobile # T-Mobile
0x0408 : { 0x03ba : [0x0109], }, 0x0408 : { 0x03ba : [0x0109], },
# Xperia
0x13d3 : { 0x3304 : [0x0001, 0x0002] },
} }
EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books'] EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books']
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to ' EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
@ -83,7 +86,7 @@ class ANDROID(USBMS):
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS', 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
'TELECHIP', 'HUAWEI', 'T-MOBILE', ] 'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',

View File

@ -78,9 +78,13 @@ class KOBO(USBMS):
else self._main_prefix else self._main_prefix
# Determine the firmware version # Determine the firmware version
f = open(self.normalize_path(self._main_prefix + '.kobo/version'), 'r') try:
with open(self.normalize_path(self._main_prefix + '.kobo/version'),
'rb') as f:
self.fwversion = f.readline().split(',')[2] self.fwversion = f.readline().split(',')[2]
f.close() except:
self.fwversion = 'unknown'
if self.fwversion != '1.0' and self.fwversion != '1.4': if self.fwversion != '1.0' and self.fwversion != '1.4':
self.has_kepubs = True self.has_kepubs = True
debug_print('Version of firmware: ', self.fwversion, 'Has kepubs:', self.has_kepubs) debug_print('Version of firmware: ', self.fwversion, 'Has kepubs:', self.has_kepubs)

View File

@ -104,3 +104,14 @@ class STASH(TECLAST_K3):
VENDOR_NAME = 'STASH' VENDOR_NAME = 'STASH'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'W950' WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'W950'
class WEXLER(TECLAST_K3):
name = 'Wexler device interface'
gui_name = 'Wexler'
description = _('Communicate with the Wexler reader.')
FORMATS = ['epub', 'fb2', 'pdf', 'txt']
VENDOR_NAME = 'WEXLER'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'T7001'

View File

@ -304,6 +304,10 @@ class ComicInput(InputFormatPlugin):
help=_('Specify the image size as widthxheight pixels. Normally,' help=_('Specify the image size as widthxheight pixels. Normally,'
' an image size is automatically calculated from the output ' ' an image size is automatically calculated from the output '
'profile, this option overrides it.')), 'profile, this option overrides it.')),
OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
help=_('When converting a CBC do not add links to each page to'
' the TOC. Note this only applies if the TOC has more than one'
' section')),
]) ])
recommendations = set([ recommendations = set([
@ -449,6 +453,7 @@ class ComicInput(InputFormatPlugin):
wrappers = comic[2] wrappers = comic[2]
stoc = toc.add_item(href(wrappers[0]), stoc = toc.add_item(href(wrappers[0]),
None, comic[0], play_order=po) None, comic[0], play_order=po)
if not opts.dont_add_comic_pages_to_toc:
for i, x in enumerate(wrappers): for i, x in enumerate(wrappers):
stoc.add_item(href(x), None, stoc.add_item(href(x), None,
_('Page')+' %d'%(i+1), play_order=po) _('Page')+' %d'%(i+1), play_order=po)

View File

@ -984,7 +984,9 @@ OptionRecommendation(name='sr3_replace',
flattener = CSSFlattener(fbase=fbase, fkey=fkey, flattener = CSSFlattener(fbase=fbase, fkey=fkey,
lineh=line_height, lineh=line_height,
untable=self.output_plugin.file_type in ('mobi','lit'), untable=self.output_plugin.file_type in ('mobi','lit'),
unfloat=self.output_plugin.file_type in ('mobi', 'lit')) unfloat=self.output_plugin.file_type in ('mobi', 'lit'),
page_break_on_body=self.output_plugin.file_type in ('mobi',
'lit'))
flattener(self.oeb, self.opts) flattener(self.oeb, self.opts)
self.opts.insert_blank_line = oibl self.opts.insert_blank_line = oibl
self.opts.remove_paragraph_spacing = orps self.opts.remove_paragraph_spacing = orps

View File

@ -22,7 +22,8 @@ class LITOutput(OutputFormatPlugin):
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
from calibre.ebooks.lit.writer import LitWriter from calibre.ebooks.lit.writer import LitWriter
from calibre.ebooks.oeb.transforms.split import Split from calibre.ebooks.oeb.transforms.split import Split
split = Split(split_on_page_breaks=True, max_flow_size=0) split = Split(split_on_page_breaks=True, max_flow_size=0,
remove_css_pagebreaks=False)
split(self.oeb, self.opts) split(self.oeb, self.opts)

View File

@ -83,6 +83,10 @@ CALIBRE_METADATA_FIELDS = frozenset([
'application_id', # An application id, currently set to the db_id. 'application_id', # An application id, currently set to the db_id.
'db_id', # the calibre primary key of the item. 'db_id', # the calibre primary key of the item.
'formats', # list of formats (extensions) for this book 'formats', # list of formats (extensions) for this book
# a dict of user category names, where the value is a list of item names
# from the book that are in that category
'user_categories',
] ]
) )

View File

@ -30,6 +30,7 @@ NULL_VALUES = {
'author_sort_map': {}, 'author_sort_map': {},
'authors' : [_('Unknown')], 'authors' : [_('Unknown')],
'title' : _('Unknown'), 'title' : _('Unknown'),
'user_categories' : {},
'language' : 'und' 'language' : 'und'
} }

View File

@ -470,6 +470,13 @@ def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8))
metadata_elem.append(meta) metadata_elem.append(meta)
def dump_user_categories(cats):
if not cats:
cats = {}
from calibre.ebooks.metadata.book.json_codec import object_to_unicode
return json.dumps(object_to_unicode(cats), ensure_ascii=False,
skipkeys=True)
class OPF(object): # {{{ class OPF(object): # {{{
MIMETYPE = 'application/oebps-package+xml' MIMETYPE = 'application/oebps-package+xml'
@ -524,6 +531,9 @@ class OPF(object): # {{{
publication_type = MetadataField('publication_type', is_dc=False) publication_type = MetadataField('publication_type', is_dc=False)
timestamp = MetadataField('timestamp', is_dc=False, timestamp = MetadataField('timestamp', is_dc=False,
formatter=parse_date, renderer=isoformat) formatter=parse_date, renderer=isoformat)
user_categories = MetadataField('user_categories', is_dc=False,
formatter=json.loads,
renderer=dump_user_categories)
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True, def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
@ -994,7 +1004,7 @@ class OPF(object): # {{{
for attr in ('title', 'authors', 'author_sort', 'title_sort', for attr in ('title', 'authors', 'author_sort', 'title_sort',
'publisher', 'series', 'series_index', 'rating', 'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'category', 'comments', 'isbn', 'tags', 'category', 'comments',
'pubdate'): 'pubdate', 'user_categories'):
val = getattr(mi, attr, None) val = getattr(mi, attr, None)
if val is not None and val != [] and val != (None, None): if val is not None and val != [] and val != (None, None):
setattr(self, attr, val) setattr(self, attr, val)
@ -1175,6 +1185,10 @@ class OPFCreator(Metadata):
a(CAL_ELEM('calibre:timestamp', self.timestamp.isoformat())) a(CAL_ELEM('calibre:timestamp', self.timestamp.isoformat()))
if self.publication_type is not None: if self.publication_type is not None:
a(CAL_ELEM('calibre:publication_type', self.publication_type)) a(CAL_ELEM('calibre:publication_type', self.publication_type))
if self.user_categories:
from calibre.ebooks.metadata.book.json_codec import object_to_unicode
a(CAL_ELEM('calibre:user_categories',
json.dumps(object_to_unicode(self.user_categories))))
manifest = E.manifest() manifest = E.manifest()
if self.manifest is not None: if self.manifest is not None:
for ref in self.manifest: for ref in self.manifest:
@ -1299,6 +1313,8 @@ def metadata_to_opf(mi, as_string=True):
meta('publication_type', mi.publication_type) meta('publication_type', mi.publication_type)
if mi.title_sort: if mi.title_sort:
meta('title_sort', mi.title_sort) meta('title_sort', mi.title_sort)
if mi.user_categories:
meta('user_categories', dump_user_categories(mi.user_categories))
serialize_user_metadata(metadata, mi.get_all_user_metadata(False)) serialize_user_metadata(metadata, mi.get_all_user_metadata(False))

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re import re, threading
from calibre.customize import Plugin from calibre.customize import Plugin
from calibre.utils.logging import ThreadSafeLog, FileStream from calibre.utils.logging import ThreadSafeLog, FileStream
@ -30,7 +30,21 @@ class Source(Plugin):
touched_fields = frozenset() touched_fields = frozenset()
def __init__(self, *args, **kwargs):
Plugin.__init__(self, *args, **kwargs)
self._isbn_to_identifier_cache = {}
self.cache_lock = threading.RLock()
# Utility functions {{{ # Utility functions {{{
def cache_isbn_to_identifier(self, isbn, identifier):
with self.cache_lock:
self._isbn_to_identifier_cache[isbn] = identifier
def cached_isbn_to_identifier(self, isbn):
with self.cache_lock:
return self._isbn_to_identifier_cache.get(isbn, None)
def get_author_tokens(self, authors, only_first_author=True): def get_author_tokens(self, authors, only_first_author=True):
''' '''
Take a list of authors and return a list of tokens useful for an Take a list of authors and return a list of tokens useful for an

View File

@ -13,6 +13,7 @@ from functools import partial
from lxml import etree from lxml import etree
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
@ -69,6 +70,7 @@ def to_metadata(browser, log, entry_, timeout):
id_url = entry_id(entry_)[0].text id_url = entry_id(entry_)[0].text
google_id = id_url.split('/')[-1]
title_ = ': '.join([x.text for x in title(entry_)]).strip() title_ = ': '.join([x.text for x in title(entry_)]).strip()
authors = [x.text.strip() for x in creator(entry_) if x.text] authors = [x.text.strip() for x in creator(entry_) if x.text]
if not authors: if not authors:
@ -78,6 +80,7 @@ def to_metadata(browser, log, entry_, timeout):
return None return None
mi = Metadata(title_, authors) mi = Metadata(title_, authors)
mi.identifiers = {'google':google_id}
try: try:
raw = get_details(browser, id_url, timeout) raw = get_details(browser, id_url, timeout)
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw), feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
@ -103,9 +106,12 @@ def to_metadata(browser, log, entry_, timeout):
t = str(x.text).strip() t = str(x.text).strip()
if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'): if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
if t[:5].upper() == 'ISBN:': if t[:5].upper() == 'ISBN:':
isbns.append(t[5:]) t = check_isbn(t[5:])
if t:
isbns.append(t)
if isbns: if isbns:
mi.isbn = sorted(isbns, key=len)[-1] mi.isbn = sorted(isbns, key=len)[-1]
mi.all_isbns = isbns
# Tags # Tags
try: try:
@ -133,20 +139,6 @@ def to_metadata(browser, log, entry_, timeout):
return mi return mi
def get_all_details(br, log, entries, abort, result_queue, timeout):
for i in entries:
try:
ans = to_metadata(br, log, i, timeout)
if isinstance(ans, Metadata):
result_queue.put(ans)
except:
log.exception(
'Failed to get metadata for identify entry:',
etree.tostring(i))
if abort.is_set():
break
class GoogleBooks(Source): class GoogleBooks(Source):
name = 'Google Books' name = 'Google Books'
@ -185,6 +177,36 @@ class GoogleBooks(Source):
'min-viewability':'none', 'min-viewability':'none',
}) })
def cover_url_from_identifiers(self, identifiers):
goog = identifiers.get('google', None)
if goog is None:
isbn = identifiers.get('isbn', None)
goog = self.cached_isbn_to_identifier(isbn)
if goog is not None:
return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' %
goog)
def is_cover_image_valid(self, raw):
# When no cover is present, returns a PNG saying image not available
# Try for example google identifier llNqPwAACAAJ
# I have yet to see an actual cover in PNG format
return raw and len(raw) > 17000 and raw[1:4] != 'PNG'
def get_all_details(self, br, log, entries, abort, result_queue, timeout):
for i in entries:
try:
ans = to_metadata(br, log, i, timeout)
if isinstance(ans, Metadata):
result_queue.put(ans)
for isbn in ans.all_isbns:
self.cache_isbn_to_identifier(isbn,
ans.identifiers['google'])
except:
log.exception(
'Failed to get metadata for identify entry:',
etree.tostring(i))
if abort.is_set():
break
def identify(self, log, result_queue, abort, title=None, authors=None, def identify(self, log, result_queue, abort, title=None, authors=None,
identifiers={}, timeout=5): identifiers={}, timeout=5):
@ -207,8 +229,8 @@ class GoogleBooks(Source):
return as_unicode(e) return as_unicode(e)
# There is no point running these queries in threads as google # There is no point running these queries in threads as google
# throttles requests returning Forbidden errors # throttles requests returning 403 Forbidden errors
get_all_details(br, log, entries, abort, result_queue, timeout) self.get_all_details(br, log, entries, abort, result_queue, timeout)
return None return None
@ -218,8 +240,14 @@ if __name__ == '__main__':
title_test) title_test)
test_identify_plugin(GoogleBooks.name, test_identify_plugin(GoogleBooks.name,
[ [
( (
{'title': 'Great Expectations', 'authors':['Charles Dickens']}, {'identifiers':{'isbn': '0743273567'}},
[title_test('Great Expectations', exact=True)] [title_test('The great gatsby', exact=True)]
), ),
#(
# {'title': 'Great Expectations', 'authors':['Charles Dickens']},
# [title_test('Great Expectations', exact=True)]
#),
]) ])

View File

@ -242,9 +242,11 @@ class MobiReader(object):
self.debug = debug self.debug = debug
self.embedded_mi = None self.embedded_mi = None
self.base_css_rules = textwrap.dedent(''' self.base_css_rules = textwrap.dedent('''
blockquote { margin: 0em 0em 0em 2em; text-align: justify } body { text-align: justify }
p { margin: 0em; text-align: justify; text-indent: 1.5em } blockquote { margin: 0em 0em 0em 2em; }
p { margin: 0em; text-indent: 1.5em }
.bold { font-weight: bold } .bold { font-weight: bold }

View File

@ -32,6 +32,12 @@ class OEBOutput(OutputFormatPlugin):
for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME): for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
href, root = results.pop(key, [None, None]) href, root = results.pop(key, [None, None])
if root is not None: if root is not None:
if key == OPF_MIME:
try:
self.workaround_nook_cover_bug(root)
except:
self.log.exception('Something went wrong while trying to'
' workaround Nook cover bug, ignoring')
raw = etree.tostring(root, pretty_print=True, raw = etree.tostring(root, pretty_print=True,
encoding='utf-8', xml_declaration=True) encoding='utf-8', xml_declaration=True)
if key == OPF_MIME: if key == OPF_MIME:
@ -49,3 +55,24 @@ class OEBOutput(OutputFormatPlugin):
with open(path, 'wb') as f: with open(path, 'wb') as f:
f.write(str(item)) f.write(str(item))
item.unload_data_from_memory(memory=path) item.unload_data_from_memory(memory=path)
def workaround_nook_cover_bug(self, root): # {{{
cov = root.xpath('//*[local-name() = "meta" and @name="cover" and'
' @content != "cover"]')
if len(cov) == 1:
manpath = ('//*[local-name() = "manifest"]/*[local-name() = "item" '
' and @id="%s" and @media-type]')
cov = cov[0]
covid = cov.get('content')
manifest_item = root.xpath(manpath%covid)
has_cover = root.xpath(manpath%'cover')
if len(manifest_item) == 1 and not has_cover and \
manifest_item[0].get('media-type',
'').startswith('image/'):
self.log.warn('The cover image has an id != "cover". Renaming'
' to work around Nook Color bug')
manifest_item = manifest_item[0]
manifest_item.set('id', 'cover')
cov.set('content', 'cover')
# }}}

View File

@ -100,12 +100,13 @@ def FontMapper(sbase=None, dbase=None, dkey=None):
class CSSFlattener(object): class CSSFlattener(object):
def __init__(self, fbase=None, fkey=None, lineh=None, unfloat=False, def __init__(self, fbase=None, fkey=None, lineh=None, unfloat=False,
untable=False): untable=False, page_break_on_body=False):
self.fbase = fbase self.fbase = fbase
self.fkey = fkey self.fkey = fkey
self.lineh = lineh self.lineh = lineh
self.unfloat = unfloat self.unfloat = unfloat
self.untable = untable self.untable = untable
self.page_break_on_body = page_break_on_body
@classmethod @classmethod
def config(cls, cfg): def config(cls, cfg):
@ -139,6 +140,8 @@ class CSSFlattener(object):
bs.append('margin-right : %fpt'%\ bs.append('margin-right : %fpt'%\
float(self.context.margin_right)) float(self.context.margin_right))
bs.extend(['padding-left: 0pt', 'padding-right: 0pt']) bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
if self.page_break_on_body:
bs.extend(['page-break-before: always'])
if self.context.change_justification != 'original': if self.context.change_justification != 'original':
bs.append('text-align: '+ self.context.change_justification) bs.append('text-align: '+ self.context.change_justification)
body.set('style', '; '.join(bs)) body.set('style', '; '.join(bs))

View File

@ -38,11 +38,12 @@ class SplitError(ValueError):
class Split(object): class Split(object):
def __init__(self, split_on_page_breaks=True, page_breaks_xpath=None, def __init__(self, split_on_page_breaks=True, page_breaks_xpath=None,
max_flow_size=0): max_flow_size=0, remove_css_pagebreaks=True):
self.split_on_page_breaks = split_on_page_breaks self.split_on_page_breaks = split_on_page_breaks
self.page_breaks_xpath = page_breaks_xpath self.page_breaks_xpath = page_breaks_xpath
self.max_flow_size = max_flow_size self.max_flow_size = max_flow_size
self.page_break_selectors = None self.page_break_selectors = None
self.remove_css_pagebreaks = remove_css_pagebreaks
if self.page_breaks_xpath is not None: if self.page_breaks_xpath is not None:
self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)] self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)]
@ -83,12 +84,16 @@ class Split(object):
if before and before != 'avoid': if before and before != 'avoid':
self.page_break_selectors.add((CSSSelector(rule.selectorText), self.page_break_selectors.add((CSSSelector(rule.selectorText),
True)) True))
if self.remove_css_pagebreaks:
rule.style.removeProperty('page-break-before')
except: except:
pass pass
try: try:
if after and after != 'avoid': if after and after != 'avoid':
self.page_break_selectors.add((CSSSelector(rule.selectorText), self.page_break_selectors.add((CSSSelector(rule.selectorText),
False)) False))
if self.remove_css_pagebreaks:
rule.style.removeProperty('page-break-after')
except: except:
pass pass
page_breaks = set([]) page_breaks = set([])

View File

@ -22,7 +22,8 @@ class PluginWidget(Widget, Ui_Form):
['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left', ['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left',
'despeckle', 'no_sort', 'no_process', 'landscape', 'despeckle', 'no_sort', 'no_process', 'landscape',
'dont_sharpen', 'disable_trim', 'wide', 'output_format', 'dont_sharpen', 'disable_trim', 'wide', 'output_format',
'dont_grayscale', 'comic_image_size'] 'dont_grayscale', 'comic_image_size',
'dont_add_comic_pages_to_toc']
) )
self.db, self.book_id = db, book_id self.db, self.book_id = db, book_id
for x in get_option('output_format').option.choices: for x in get_option('output_format').option.choices:

View File

@ -14,7 +14,7 @@
<string>Form</string> <string>Form</string>
</property> </property>
<layout class="QGridLayout" name="gridLayout"> <layout class="QGridLayout" name="gridLayout">
<item row="2" column="0"> <item row="3" column="0">
<widget class="QLabel" name="label_3"> <widget class="QLabel" name="label_3">
<property name="text"> <property name="text">
<string>&amp;Number of Colors:</string> <string>&amp;Number of Colors:</string>
@ -24,7 +24,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="1"> <item row="3" column="1">
<widget class="QSpinBox" name="opt_colors"> <widget class="QSpinBox" name="opt_colors">
<property name="minimum"> <property name="minimum">
<number>8</number> <number>8</number>
@ -37,70 +37,70 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="4" column="0"> <item row="5" column="0">
<widget class="QCheckBox" name="opt_dont_normalize"> <widget class="QCheckBox" name="opt_dont_normalize">
<property name="text"> <property name="text">
<string>Disable &amp;normalize</string> <string>Disable &amp;normalize</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="5" column="0"> <item row="6" column="0">
<widget class="QCheckBox" name="opt_keep_aspect_ratio"> <widget class="QCheckBox" name="opt_keep_aspect_ratio">
<property name="text"> <property name="text">
<string>Keep &amp;aspect ratio</string> <string>Keep &amp;aspect ratio</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="6" column="0"> <item row="7" column="0">
<widget class="QCheckBox" name="opt_dont_sharpen"> <widget class="QCheckBox" name="opt_dont_sharpen">
<property name="text"> <property name="text">
<string>Disable &amp;Sharpening</string> <string>Disable &amp;Sharpening</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="7" column="0"> <item row="8" column="0">
<widget class="QCheckBox" name="opt_disable_trim"> <widget class="QCheckBox" name="opt_disable_trim">
<property name="text"> <property name="text">
<string>Disable &amp;Trimming</string> <string>Disable &amp;Trimming</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="8" column="0"> <item row="9" column="0">
<widget class="QCheckBox" name="opt_wide"> <widget class="QCheckBox" name="opt_wide">
<property name="text"> <property name="text">
<string>&amp;Wide</string> <string>&amp;Wide</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="9" column="0"> <item row="10" column="0">
<widget class="QCheckBox" name="opt_landscape"> <widget class="QCheckBox" name="opt_landscape">
<property name="text"> <property name="text">
<string>&amp;Landscape</string> <string>&amp;Landscape</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="10" column="0"> <item row="11" column="0">
<widget class="QCheckBox" name="opt_right2left"> <widget class="QCheckBox" name="opt_right2left">
<property name="text"> <property name="text">
<string>&amp;Right to left</string> <string>&amp;Right to left</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="11" column="0"> <item row="12" column="0">
<widget class="QCheckBox" name="opt_no_sort"> <widget class="QCheckBox" name="opt_no_sort">
<property name="text"> <property name="text">
<string>Don't so&amp;rt</string> <string>Don't so&amp;rt</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="12" column="0"> <item row="13" column="0">
<widget class="QCheckBox" name="opt_despeckle"> <widget class="QCheckBox" name="opt_despeckle">
<property name="text"> <property name="text">
<string>De&amp;speckle</string> <string>De&amp;speckle</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="14" column="0"> <item row="15" column="0">
<spacer name="verticalSpacer"> <spacer name="verticalSpacer">
<property name="orientation"> <property name="orientation">
<enum>Qt::Vertical</enum> <enum>Qt::Vertical</enum>
@ -120,7 +120,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="13" column="0"> <item row="14" column="0">
<widget class="QLabel" name="label"> <widget class="QLabel" name="label">
<property name="text"> <property name="text">
<string>&amp;Output format:</string> <string>&amp;Output format:</string>
@ -130,7 +130,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="13" column="1"> <item row="14" column="1">
<widget class="QComboBox" name="opt_output_format"/> <widget class="QComboBox" name="opt_output_format"/>
</item> </item>
<item row="1" column="0"> <item row="1" column="0">
@ -140,7 +140,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="0"> <item row="4" column="0">
<widget class="QLabel" name="label_2"> <widget class="QLabel" name="label_2">
<property name="text"> <property name="text">
<string>Override image &amp;size:</string> <string>Override image &amp;size:</string>
@ -150,9 +150,16 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="1"> <item row="4" column="1">
<widget class="QLineEdit" name="opt_comic_image_size"/> <widget class="QLineEdit" name="opt_comic_image_size"/>
</item> </item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="opt_dont_add_comic_pages_to_toc">
<property name="text">
<string>Don't add links to &amp;pages to the Table of Contents for CBC files</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<resources/> <resources/>

View File

@ -44,7 +44,8 @@
<widget class="QLabel" name="msg"> <widget class="QLabel" name="msg">
<property name="text"> <property name="text">
<string>&lt;p&gt;This book is locked by &lt;b&gt;DRM&lt;/b&gt;. To learn more about DRM and why you cannot read or convert this book in calibre, <string>&lt;p&gt;This book is locked by &lt;b&gt;DRM&lt;/b&gt;. To learn more about DRM and why you cannot read or convert this book in calibre,
&lt;a href=&quot;http://bugs.calibre-ebook.com/wiki/DRM&quot;&gt;click here&lt;/a&gt;.</string> &lt;a href=&quot;http://drmfree.calibre-ebook.com/about#drm&quot;&gt;click here&lt;/a&gt;.&lt;p&gt;A large number of recent, DRM free releases are
available at &lt;a href=&quot;http://drmfree.calibre-ebook.com&quot;&gt;Open Books&lt;/a&gt;.</string>
</property> </property>
<property name="wordWrap"> <property name="wordWrap">
<bool>true</bool> <bool>true</bool>

View File

@ -73,16 +73,17 @@ class TagCategories(QDialog, Ui_TagCategories):
if idx == 0: if idx == 0:
continue continue
for n in category_values[idx](): for n in category_values[idx]():
t = Item(name=n, label=label, index=len(self.all_items),icon=category_icons[idx], exists=True) t = Item(name=n, label=label, index=len(self.all_items),
icon=category_icons[idx], exists=True)
self.all_items.append(t) self.all_items.append(t)
self.all_items_dict[label+':'+n] = t self.all_items_dict[icu_lower(label+':'+n)] = t
self.categories = dict.copy(db.prefs.get('user_categories', {})) self.categories = dict.copy(db.prefs.get('user_categories', {}))
if self.categories is None: if self.categories is None:
self.categories = {} self.categories = {}
for cat in self.categories: for cat in self.categories:
for item,l in enumerate(self.categories[cat]): for item,l in enumerate(self.categories[cat]):
key = ':'.join([l[1], l[0]]) key = icu_lower(':'.join([l[1], l[0]]))
t = self.all_items_dict.get(key, None) t = self.all_items_dict.get(key, None)
if l[1] in self.category_labels: if l[1] in self.category_labels:
if t is None: if t is None:
@ -231,6 +232,12 @@ class TagCategories(QDialog, Ui_TagCategories):
def accept(self): def accept(self):
self.save_category() self.save_category()
for cat in sorted(self.categories.keys(), key=sort_key):
components = cat.split('.')
for i in range(0,len(components)):
c = '.'.join(components[0:i+1])
if c not in self.categories:
self.categories[c] = []
QDialog.accept(self) QDialog.accept(self)
def save_category(self): def save_category(self):

View File

@ -58,10 +58,12 @@ class TagListEditor(QDialog, Ui_TagListEditor):
self.to_rename = {} self.to_rename = {}
self.to_delete = set([]) self.to_delete = set([])
self.original_names = {}
self.all_tags = {} self.all_tags = {}
for k,v in data: for k,v in data:
self.all_tags[v] = k self.all_tags[v] = k
self.original_names[k] = v
for tag in sorted(self.all_tags.keys(), key=key): for tag in sorted(self.all_tags.keys(), key=key):
item = ListWidgetItem(tag) item = ListWidgetItem(tag)
item.setData(Qt.UserRole, self.all_tags[tag]) item.setData(Qt.UserRole, self.all_tags[tag])

View File

@ -209,7 +209,6 @@ class EmailMixin(object): # {{{
def __init__(self): def __init__(self):
self.emailer = Emailer(self.job_manager) self.emailer = Emailer(self.job_manager)
self.emailer.start()
def send_by_mail(self, to, fmts, delete_from_library, send_ids=None, def send_by_mail(self, to, fmts, delete_from_library, send_ids=None,
do_auto_convert=True, specific_format=None): do_auto_convert=True, specific_format=None):
@ -255,6 +254,8 @@ class EmailMixin(object): # {{{
to_s = list(repeat(to, len(attachments))) to_s = list(repeat(to, len(attachments)))
if attachments: if attachments:
if not self.emailer.is_alive():
self.emailer.start()
self.emailer.send_mails(jobnames, self.emailer.send_mails(jobnames,
Dispatcher(partial(self.email_sent, remove=remove)), Dispatcher(partial(self.email_sent, remove=remove)),
attachments, to_s, subjects, texts, attachment_names) attachments, to_s, subjects, texts, attachment_names)
@ -325,6 +326,8 @@ class EmailMixin(object): # {{{
files, auto = self.library_view.model().\ files, auto = self.library_view.model().\
get_preferred_formats_from_ids([id_], fmts) get_preferred_formats_from_ids([id_], fmts)
return files return files
if not self.emailer.is_alive():
self.emailer.start()
sent_mails = self.emailer.email_news(mi, remove, sent_mails = self.emailer.email_news(mi, remove,
get_fmts, self.email_sent) get_fmts, self.email_sent)
if sent_mails: if sent_mails:

View File

@ -7,17 +7,19 @@ __docformat__ = 'restructuredtext en'
from PyQt4.Qt import QApplication, QFont, QFontInfo, QFontDialog from PyQt4.Qt import QApplication, QFont, QFontInfo, QFontDialog
from calibre.gui2.preferences import ConfigWidgetBase, test_widget from calibre.gui2.preferences import ConfigWidgetBase, test_widget, CommaSeparatedList
from calibre.gui2.preferences.look_feel_ui import Ui_Form from calibre.gui2.preferences.look_feel_ui import Ui_Form
from calibre.gui2 import config, gprefs, qt_app from calibre.gui2 import config, gprefs, qt_app
from calibre.utils.localization import available_translations, \ from calibre.utils.localization import available_translations, \
get_language, get_lang get_language, get_lang
from calibre.utils.config import prefs from calibre.utils.config import prefs
from calibre.utils.icu import sort_key
class ConfigWidget(ConfigWidgetBase, Ui_Form): class ConfigWidget(ConfigWidgetBase, Ui_Form):
def genesis(self, gui): def genesis(self, gui):
self.gui = gui self.gui = gui
db = gui.library_view.model().db
r = self.register r = self.register
@ -61,6 +63,15 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
r('tags_browser_partition_method', gprefs, choices=choices) r('tags_browser_partition_method', gprefs, choices=choices)
r('tags_browser_collapse_at', gprefs) r('tags_browser_collapse_at', gprefs)
choices = set([k for k in db.field_metadata.all_field_keys()
if db.field_metadata[k]['is_category'] and
db.field_metadata[k]['datatype'] in ['text', 'series', 'enumeration']])
choices -= set(['authors', 'publisher', 'formats', 'news'])
self.opt_categories_using_hierarchy.update_items_cache(choices)
r('categories_using_hierarchy', db.prefs, setting=CommaSeparatedList,
choices=sorted(list(choices), key=sort_key))
self.current_font = None self.current_font = None
self.change_font_button.clicked.connect(self.change_font) self.change_font_button.clicked.connect(self.change_font)

View File

@ -7,7 +7,7 @@
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>670</width> <width>670</width>
<height>392</height> <height>422</height>
</rect> </rect>
</property> </property>
<property name="windowTitle"> <property name="windowTitle">
@ -136,7 +136,7 @@
<item> <item>
<widget class="QLabel" name="label_6"> <widget class="QLabel" name="label_6">
<property name="text"> <property name="text">
<string>Tags browser category partitioning method:</string> <string>Tags browser category &amp;partitioning method:</string>
</property> </property>
<property name="buddy"> <property name="buddy">
<cstring>opt_tags_browser_partition_method</cstring> <cstring>opt_tags_browser_partition_method</cstring>
@ -157,7 +157,7 @@ if you never want subcategories</string>
<item> <item>
<widget class="QLabel" name="label_6"> <widget class="QLabel" name="label_6">
<property name="text"> <property name="text">
<string>Collapse when more items than:</string> <string>&amp;Collapse when more items than:</string>
</property> </property>
<property name="buddy"> <property name="buddy">
<cstring>opt_tags_browser_collapse_at</cstring> <cstring>opt_tags_browser_collapse_at</cstring>
@ -190,6 +190,28 @@ up into sub-categories. If the partition method is set to disable, this value is
</item> </item>
</layout> </layout>
</item> </item>
<item row="8" column="0">
<widget class="QLabel" name="label_81">
<property name="text">
<string>Categories with &amp;hierarchical items:</string>
</property>
<property name="buddy">
<cstring>opt_categories_using_hierarchy</cstring>
</property>
</widget>
</item>
<item row="8" column="1">
<widget class="MultiCompleteLineEdit" name="opt_categories_using_hierarchy">
<property name="toolTip">
<string>A comma-separated list of columns in which items containing
periods are displayed in the tag browser trees. For example, if
this box contains 'tags' then tags of the form 'Mystery.English'
and 'Mystery.Thriller' will be displayed with English and Thriller
both under 'Mystery'. If 'tags' is not in this box,
then the tags will be displayed each on their own line.</string>
</property>
</widget>
</item>
<item row="15" column="0" colspan="2"> <item row="15" column="0" colspan="2">
<widget class="QGroupBox" name="groupBox_2"> <widget class="QGroupBox" name="groupBox_2">
<property name="title"> <property name="title">
@ -275,6 +297,13 @@ up into sub-categories. If the partition method is set to disable, this value is
</item> </item>
</layout> </layout>
</widget> </widget>
<customwidgets>
<customwidget>
<class>MultiCompleteLineEdit</class>
<extends>QLineEdit</extends>
<header>calibre/gui2/complete.h</header>
</customwidget>
</customwidgets>
<resources/> <resources/>
<connections/> <connections/>
</ui> </ui>

View File

@ -167,9 +167,10 @@ class StoreDownloadMixin(object):
def __init__(self): def __init__(self):
self.store_downloader = StoreDownloader(self.job_manager) self.store_downloader = StoreDownloader(self.job_manager)
self.store_downloader.start()
def download_from_store(self, url='', save_as_loc='', add_to_lib=True): def download_from_store(self, url='', save_as_loc='', add_to_lib=True):
if not self.store_downloader.is_alive():
self.store_downloader.start()
self.store_downloader.download_from_store(Dispatcher(self.downloaded_from_store), self.library_view.model().db, url, save_as_loc, add_to_lib) self.store_downloader.download_from_store(Dispatcher(self.downloaded_from_store), self.library_view.model().db, url, save_as_loc, add_to_lib)
self.status_bar.show_message(_('Downloading') + ' ' + url, 3000) self.status_bar.show_message(_('Downloading') + ' ' + url, 3000)

File diff suppressed because it is too large Load Diff

View File

@ -635,6 +635,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
mb.stop() mb.stop()
self.hide_windows() self.hide_windows()
if self.emailer.is_alive():
self.emailer.stop() self.emailer.stop()
try: try:
try: try:

View File

@ -124,8 +124,15 @@ def _match(query, value, matchkind):
for t in value: for t in value:
t = icu_lower(t) t = icu_lower(t)
try: ### ignore regexp exceptions, required because search-ahead tries before typing is finished try: ### ignore regexp exceptions, required because search-ahead tries before typing is finished
if ((matchkind == EQUALS_MATCH and query == t) or if (matchkind == EQUALS_MATCH):
(matchkind == REGEXP_MATCH and re.search(query, t, re.I)) or ### search unanchored if query[0] == '.':
if t.startswith(query[1:]):
ql = len(query) - 1
if (len(t) == ql) or (t[ql:ql+1] == '.'):
return True
elif query == t:
return True
elif ((matchkind == REGEXP_MATCH and re.search(query, t, re.I)) or ### search unanchored
(matchkind == CONTAINS_MATCH and query in t)): (matchkind == CONTAINS_MATCH and query in t)):
return True return True
except re.error: except re.error:
@ -415,10 +422,22 @@ class ResultCache(SearchQueryParser): # {{{
if self.db_prefs is None: if self.db_prefs is None:
return res return res
user_cats = self.db_prefs.get('user_categories', []) user_cats = self.db_prefs.get('user_categories', [])
if location not in user_cats:
return res
c = set(candidates) c = set(candidates)
for (item, category, ign) in user_cats[location]: l = location.rfind('.')
if l > 0:
alt_loc = location[0:l]
alt_item = location[l+1:]
else:
alt_loc = None
for key in user_cats:
if key == location or key.startswith(location + '.'):
for (item, category, ign) in user_cats[key]:
s = self.get_matches(category, '=' + item, candidates=c)
c -= s
res |= s
elif key == alt_loc:
for (item, category, ign) in user_cats[key]:
if item == alt_item:
s = self.get_matches(category, '=' + item, candidates=c) s = self.get_matches(category, '=' + item, candidates=c)
c -= s c -= s
res |= s res |= s

View File

@ -174,6 +174,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.prefs = DBPrefs(self) self.prefs = DBPrefs(self)
defs = self.prefs.defaults defs = self.prefs.defaults
defs['gui_restriction'] = defs['cs_restriction'] = '' defs['gui_restriction'] = defs['cs_restriction'] = ''
defs['categories_using_hierarchy'] = []
# Migrate saved search and user categories to db preference scheme # Migrate saved search and user categories to db preference scheme
def migrate_preference(key, default): def migrate_preference(key, default):
@ -812,6 +813,21 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
index_is_id=index_is_id), index_is_id=index_is_id),
extra=self.get_custom_extra(idx, label=meta['label'], extra=self.get_custom_extra(idx, label=meta['label'],
index_is_id=index_is_id)) index_is_id=index_is_id))
user_cats = self.prefs['user_categories']
user_cat_vals = {}
for ucat in user_cats:
res = []
for name,cat,ign in user_cats[ucat]:
v = mi.get(cat, None)
if isinstance(v, list):
if name in v:
res.append([name,cat])
elif name == v:
res.append([name,cat])
user_cat_vals[ucat] = res
mi.user_categories = user_cat_vals
if get_cover: if get_cover:
mi.cover = self.cover(id, index_is_id=True, as_path=True) mi.cover = self.cover(id, index_is_id=True, as_path=True)
return mi return mi
@ -1406,7 +1422,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
# temporarily duplicating the categories lists. # temporarily duplicating the categories lists.
taglist = {} taglist = {}
for c in categories.keys(): for c in categories.keys():
taglist[c] = dict(map(lambda t:(t.name, t), categories[c])) taglist[c] = dict(map(lambda t:(icu_lower(t.name), t), categories[c]))
muc = self.prefs.get('grouped_search_make_user_categories', []) muc = self.prefs.get('grouped_search_make_user_categories', [])
gst = self.prefs.get('grouped_search_terms', {}) gst = self.prefs.get('grouped_search_terms', {})
@ -1422,8 +1438,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
for user_cat in sorted(user_categories.keys(), key=sort_key): for user_cat in sorted(user_categories.keys(), key=sort_key):
items = [] items = []
for (name,label,ign) in user_categories[user_cat]: for (name,label,ign) in user_categories[user_cat]:
if label in taglist and name in taglist[label]: n = icu_lower(name)
items.append(taglist[label][name]) if label in taglist and n in taglist[label]:
items.append(taglist[label][n])
# else: do nothing, to not include nodes w zero counts # else: do nothing, to not include nodes w zero counts
cat_name = '@' + user_cat # add the '@' to avoid name collision cat_name = '@' + user_cat # add the '@' to avoid name collision
# Not a problem if we accumulate entries in the icon map # Not a problem if we accumulate entries in the icon map
@ -2434,6 +2451,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
stream.seek(0) stream.seek(0)
mi = get_metadata(stream, format, use_libprs_metadata=False) mi = get_metadata(stream, format, use_libprs_metadata=False)
stream.seek(0) stream.seek(0)
if not mi.series_index:
mi.series_index = 1.0 mi.series_index = 1.0
mi.tags = [_('News')] mi.tags = [_('News')]
if arg['add_title_tag']: if arg['add_title_tag']:

View File

@ -32,7 +32,7 @@ category_icon_map = {
'news' : 'news.png', 'news' : 'news.png',
'tags' : 'tags.png', 'tags' : 'tags.png',
'custom:' : 'column.png', 'custom:' : 'column.png',
'user:' : 'drawer.png', 'user:' : 'tb_folder.png',
'search' : 'search.png' 'search' : 'search.png'
} }

View File

@ -413,6 +413,27 @@ The Book Details display shows you extra information and the cover for the curre
.. _jobs: .. _jobs:
.. _tag_browser:
Tag Browser
-------------
.. image:: images/tag_browser.png
The Tag Browser allows you to easily browse your collection by Author/Tags/Series/etc. If you click on any Item in the Tag Browser, for example, the Author name, Isaac Asimov, then the list of books to the right is restricted to books by that author. Clicking once again on Isaac Asimov will restrict the list of books to books not by Isaac Asimov. A third click will remove the restriction. If you hold down the Ctrl or Shift keys and click on multiple items, then restrictions based on multiple items are created. For example you could Hold Ctrl and click on the tags History and Europe for find books on European history. The Tag Browser works by constructing search expressions that are automatically entered into the Search bar. It is a good way to learn how to construct basic search expressions.
There is a search bar at the top of the Tag Browser that allows you to easily find any item in the Tag Browser. In addition, you can right click on any item and choose to hide it or rename it or open a "Manage x" dialog that allows you to manage items of that kind. For example the "Manage Authors" dialog allows you to rename authors and control how their names are sorted.
For convenience, you can drag and drop books from the book list to items in the Tag Browser and that item will be automatically applied to the dropped books. For example, dragging a book to Isaac Asimov will set the author of that book to Isaac Asimov or dragging it to the tag History will add the tag History to its tags.
The outer-level items in the tag browser such as Authors and Series are called categories. You can create your own categories, called User Categories, which are useful for organizing items. For example, you can use the user categories editor (push the Manage User Categories button) to create a user category called Favorite Authors, then put the items for your favorites into the category. User categories act like built-in categories; you can click on items to search for them. You can search for all items in a category by right-clicking on the category name and choosing "Search for books in ...".
User categories can have sub-categories. For example, the user category Favorites.Authors is a sub-category of Favorites. You might also have Favorites.Series, in which case there will be two sub-categories under Favorites. Sub-categories can be created using Manage User Categories by entering names like the Favorites example. They can also be created by right-clicking on a user category, choosing "Add sub-category to ...", and entering the category name.
It is also possible to create hierarchies inside some of the built-in categories (the text categories). These hierarchies show with the small triangle permitting the sub-items to be hidden. To use hierarchies in a category, you must first go to Preferences / Look & Feel and enter the category name(s) into the "Categories with hierarchical items" box. Once this is done, items in that category that contain periods will be shown using the small triangle. For example, assume you create a custom column called "Genre" and indicate that it contains hierarchical items. Once done, items such as Mystery.Thriller and Mystery.English will display as Mystery with the small triangle next to it. Clicking on the triangle will show Thriller and English as sub-items.
You can drag and drop items in the Tag browser onto user categories to add them to that category.
Jobs Jobs
----- -----
.. image:: images/jobs.png .. image:: images/jobs.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

View File

@ -396,6 +396,34 @@ class BuiltinListitem(BuiltinFormatterFunction):
except: except:
return '' return ''
class BuiltinSublist(BuiltinFormatterFunction):
name = 'sublist'
arg_count = 4
doc = _('sublist(val, start_index, end_index, separator) -- interpret the '
' value as a list of items separated by `separator`, returning a '
' new list made from the `start_index`th to the `end_index`th item. '
'The first item is number zero. If an index is negative, then it '
'counts from the end of the list. As a special case, an end_index '
'of zero is assumed to be the length of the list. Examples using '
'basic template mode and assuming a #genre value if A.B.C: '
'{#genre:sublist(-1,0,.)} returns C<br/>'
'{#genre:sublist(0,1,.)} returns A<br/>'
'{#genre:sublist(0,-1,.)} returns A.B')
def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):
if not val:
return ''
si = int(start_index)
ei = int(end_index)
val = val.split(sep)
try:
if ei == 0:
return sep.join(val[si:])
else:
return sep.join(val[si:ei])
except:
return ''
class BuiltinUppercase(BuiltinFormatterFunction): class BuiltinUppercase(BuiltinFormatterFunction):
name = 'uppercase' name = 'uppercase'
arg_count = 1 arg_count = 1
@ -447,6 +475,7 @@ builtin_re = BuiltinRe()
builtin_shorten = BuiltinShorten() builtin_shorten = BuiltinShorten()
builtin_strcat = BuiltinStrcat() builtin_strcat = BuiltinStrcat()
builtin_strcmp = BuiltinStrcmp() builtin_strcmp = BuiltinStrcmp()
builtin_sublist = BuiltinSublist()
builtin_substr = BuiltinSubstr() builtin_substr = BuiltinSubstr()
builtin_subtract = BuiltinSubtract() builtin_subtract = BuiltinSubtract()
builtin_switch = BuiltinSwitch() builtin_switch = BuiltinSwitch()

View File

@ -136,7 +136,7 @@ class FeedTemplate(Template):
head.append(STYLE(style, type='text/css')) head.append(STYLE(style, type='text/css'))
if extra_css: if extra_css:
head.append(STYLE(extra_css, type='text/css')) head.append(STYLE(extra_css, type='text/css'))
body = BODY(style='page-break-before:always') body = BODY()
body.append(self.get_navbar(f, feeds)) body.append(self.get_navbar(f, feeds))
div = DIV( div = DIV(
@ -322,7 +322,7 @@ class TouchscreenFeedTemplate(Template):
head.append(STYLE(style, type='text/css')) head.append(STYLE(style, type='text/css'))
if extra_css: if extra_css:
head.append(STYLE(extra_css, type='text/css')) head.append(STYLE(extra_css, type='text/css'))
body = BODY(style='page-break-before:always') body = BODY()
div = DIV( div = DIV(
top_navbar, top_navbar,
H2(feed.title, CLASS('feed_title')) H2(feed.title, CLASS('feed_title'))

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2006-2008 Søren Roug, European Environment Agency # Copyright (C) 2006-2010 Søren Roug, European Environment Agency
# #
# This library is free software; you can redistribute it and/or # This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public # modify it under the terms of the GNU Lesser General Public
@ -40,6 +40,9 @@ def cnv_boolean(attribute, arg, element):
# Potentially accept color values # Potentially accept color values
def cnv_color(attribute, arg, element): def cnv_color(attribute, arg, element):
""" A RGB color in conformance with §5.9.11 of [XSL], that is a RGB color in notation “#rrggbb”, where
rr, gg and bb are 8-bit hexadecimal digits.
"""
return str(arg) return str(arg)
def cnv_configtype(attribute, arg, element): def cnv_configtype(attribute, arg, element):
@ -55,9 +58,15 @@ def cnv_data_source_has_labels(attribute, arg, element):
# Understand different date formats # Understand different date formats
def cnv_date(attribute, arg, element): def cnv_date(attribute, arg, element):
""" A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
value.
"""
return str(arg) return str(arg)
def cnv_dateTime(attribute, arg, element): def cnv_dateTime(attribute, arg, element):
""" A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
value.
"""
return str(arg) return str(arg)
def cnv_double(attribute, arg, element): def cnv_double(attribute, arg, element):
@ -67,11 +76,31 @@ def cnv_duration(attribute, arg, element):
return str(arg) return str(arg)
def cnv_family(attribute, arg, element): def cnv_family(attribute, arg, element):
""" A style family """
if str(arg) not in ("text", "paragraph", "section", "ruby", "table", "table-column", "table-row", "table-cell", if str(arg) not in ("text", "paragraph", "section", "ruby", "table", "table-column", "table-row", "table-cell",
"graphic", "presentation", "drawing-page", "chart"): "graphic", "presentation", "drawing-page", "chart"):
raise ValueError, "'%s' not allowed" % str(arg) raise ValueError, "'%s' not allowed" % str(arg)
return str(arg) return str(arg)
def __save_prefix(attribute, arg, element):
prefix = arg.split(':',1)[0]
if prefix == arg:
return unicode(arg)
namespace = element.get_knownns(prefix)
if namespace is None:
#raise ValueError, "'%s' is an unknown prefix" % str(prefix)
return unicode(arg)
p = element.get_nsprefix(namespace)
return unicode(arg)
def cnv_formula(attribute, arg, element):
""" A string containing a formula. Formulas do not have a predefined syntax, but the string should
begin with a namespace prefix, followed by a : (COLON, U+003A) separator, followed by the text
of the formula. The namespace bound to the prefix determines the syntax and semantics of the
formula.
"""
return __save_prefix(attribute, arg, element)
def cnv_ID(attribute, arg, element): def cnv_ID(attribute, arg, element):
return str(arg) return str(arg)
@ -89,6 +118,9 @@ def cnv_legend_position(attribute, arg, element):
pattern_length = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px))') pattern_length = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px))')
def cnv_length(attribute, arg, element): def cnv_length(attribute, arg, element):
""" A (positive or negative) physical length, consisting of magnitude and unit, in conformance with the
Units of Measure defined in §5.9.13 of [XSL].
"""
global pattern_length global pattern_length
if not pattern_length.match(arg): if not pattern_length.match(arg):
raise ValueError, "'%s' is not a valid length" % arg raise ValueError, "'%s' is not a valid length" % arg
@ -120,12 +152,12 @@ def cnv_namespacedToken(attribute, arg, element):
if not pattern_namespacedToken.match(arg): if not pattern_namespacedToken.match(arg):
raise ValueError, "'%s' is not a valid namespaced token" % arg raise ValueError, "'%s' is not a valid namespaced token" % arg
return arg return __save_prefix(attribute, arg, element)
# Must accept string as argument
# NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName
# Essentially an XML name minus ':'
def cnv_NCName(attribute, arg, element): def cnv_NCName(attribute, arg, element):
""" NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName
Essentially an XML name minus ':'
"""
if type(arg) in types.StringTypes: if type(arg) in types.StringTypes:
return make_NCName(arg) return make_NCName(arg)
else: else:
@ -226,6 +258,7 @@ attrconverters = {
((ANIMNS,u'name'), None): cnv_string, ((ANIMNS,u'name'), None): cnv_string,
((ANIMNS,u'sub-item'), None): cnv_string, ((ANIMNS,u'sub-item'), None): cnv_string,
((ANIMNS,u'value'), None): cnv_string, ((ANIMNS,u'value'), None): cnv_string,
# ((DBNS,u'type'), None): cnv_namespacedToken,
((CHARTNS,u'attached-axis'), None): cnv_string, ((CHARTNS,u'attached-axis'), None): cnv_string,
((CHARTNS,u'class'), (CHARTNS,u'grid')): cnv_major_minor, ((CHARTNS,u'class'), (CHARTNS,u'grid')): cnv_major_minor,
((CHARTNS,u'class'), None): cnv_namespacedToken, ((CHARTNS,u'class'), None): cnv_namespacedToken,
@ -288,7 +321,7 @@ attrconverters = {
((CHARTNS,u'values-cell-range-address'), None): cnv_string, ((CHARTNS,u'values-cell-range-address'), None): cnv_string,
((CHARTNS,u'vertical'), None): cnv_boolean, ((CHARTNS,u'vertical'), None): cnv_boolean,
((CHARTNS,u'visible'), None): cnv_boolean, ((CHARTNS,u'visible'), None): cnv_boolean,
((CONFIGNS,u'name'), None): cnv_string, ((CONFIGNS,u'name'), None): cnv_formula,
((CONFIGNS,u'type'), None): cnv_configtype, ((CONFIGNS,u'type'), None): cnv_configtype,
((DR3DNS,u'ambient-color'), None): cnv_string, ((DR3DNS,u'ambient-color'), None): cnv_string,
((DR3DNS,u'back-scale'), None): cnv_string, ((DR3DNS,u'back-scale'), None): cnv_string,
@ -369,11 +402,11 @@ attrconverters = {
((DRAWNS,u'decimal-places'), None): cnv_string, ((DRAWNS,u'decimal-places'), None): cnv_string,
((DRAWNS,u'display'), None): cnv_string, ((DRAWNS,u'display'), None): cnv_string,
((DRAWNS,u'display-name'), None): cnv_string, ((DRAWNS,u'display-name'), None): cnv_string,
((DRAWNS,u'distance'), None): cnv_string, ((DRAWNS,u'distance'), None): cnv_lengthorpercent,
((DRAWNS,u'dots1'), None): cnv_integer, ((DRAWNS,u'dots1'), None): cnv_integer,
((DRAWNS,u'dots1-length'), None): cnv_length, ((DRAWNS,u'dots1-length'), None): cnv_lengthorpercent,
((DRAWNS,u'dots2'), None): cnv_integer, ((DRAWNS,u'dots2'), None): cnv_integer,
((DRAWNS,u'dots2-length'), None): cnv_length, ((DRAWNS,u'dots2-length'), None): cnv_lengthorpercent,
((DRAWNS,u'end-angle'), None): cnv_double, ((DRAWNS,u'end-angle'), None): cnv_double,
((DRAWNS,u'end'), None): cnv_string, ((DRAWNS,u'end'), None): cnv_string,
((DRAWNS,u'end-color'), None): cnv_string, ((DRAWNS,u'end-color'), None): cnv_string,
@ -383,7 +416,7 @@ attrconverters = {
((DRAWNS,u'end-line-spacing-horizontal'), None): cnv_string, ((DRAWNS,u'end-line-spacing-horizontal'), None): cnv_string,
((DRAWNS,u'end-line-spacing-vertical'), None): cnv_string, ((DRAWNS,u'end-line-spacing-vertical'), None): cnv_string,
((DRAWNS,u'end-shape'), None): cnv_IDREF, ((DRAWNS,u'end-shape'), None): cnv_IDREF,
((DRAWNS,u'engine'), None): cnv_string, ((DRAWNS,u'engine'), None): cnv_namespacedToken,
((DRAWNS,u'enhanced-path'), None): cnv_string, ((DRAWNS,u'enhanced-path'), None): cnv_string,
((DRAWNS,u'escape-direction'), None): cnv_string, ((DRAWNS,u'escape-direction'), None): cnv_string,
((DRAWNS,u'extrusion-allowed'), None): cnv_boolean, ((DRAWNS,u'extrusion-allowed'), None): cnv_boolean,
@ -604,7 +637,7 @@ attrconverters = {
((FORMNS,u'button-type'), None): cnv_string, ((FORMNS,u'button-type'), None): cnv_string,
((FORMNS,u'command'), None): cnv_string, ((FORMNS,u'command'), None): cnv_string,
((FORMNS,u'command-type'), None): cnv_string, ((FORMNS,u'command-type'), None): cnv_string,
((FORMNS,u'control-implementation'), None): cnv_string, ((FORMNS,u'control-implementation'), None): cnv_namespacedToken,
((FORMNS,u'convert-empty-to-null'), None): cnv_boolean, ((FORMNS,u'convert-empty-to-null'), None): cnv_boolean,
((FORMNS,u'current-selected'), None): cnv_boolean, ((FORMNS,u'current-selected'), None): cnv_boolean,
((FORMNS,u'current-state'), None): cnv_string, ((FORMNS,u'current-state'), None): cnv_string,
@ -800,8 +833,8 @@ attrconverters = {
((PRESENTATIONNS,u'user-transformed'), None): cnv_boolean, ((PRESENTATIONNS,u'user-transformed'), None): cnv_boolean,
((PRESENTATIONNS,u'verb'), None): cnv_nonNegativeInteger, ((PRESENTATIONNS,u'verb'), None): cnv_nonNegativeInteger,
((PRESENTATIONNS,u'visibility'), None): cnv_string, ((PRESENTATIONNS,u'visibility'), None): cnv_string,
((SCRIPTNS,u'event-name'), None): cnv_string, ((SCRIPTNS,u'event-name'), None): cnv_formula,
((SCRIPTNS,u'language'), None): cnv_string, ((SCRIPTNS,u'language'), None): cnv_formula,
((SCRIPTNS,u'macro-name'), None): cnv_string, ((SCRIPTNS,u'macro-name'), None): cnv_string,
((SMILNS,u'accelerate'), None): cnv_double, ((SMILNS,u'accelerate'), None): cnv_double,
((SMILNS,u'accumulate'), None): cnv_string, ((SMILNS,u'accumulate'), None): cnv_string,
@ -1087,7 +1120,7 @@ attrconverters = {
((SVGNS,u'y2'), None): cnv_lengthorpercent, ((SVGNS,u'y2'), None): cnv_lengthorpercent,
((TABLENS,u'acceptance-state'), None): cnv_string, ((TABLENS,u'acceptance-state'), None): cnv_string,
((TABLENS,u'add-empty-lines'), None): cnv_boolean, ((TABLENS,u'add-empty-lines'), None): cnv_boolean,
((TABLENS,u'algorithm'), None): cnv_string, ((TABLENS,u'algorithm'), None): cnv_formula,
((TABLENS,u'align'), None): cnv_string, ((TABLENS,u'align'), None): cnv_string,
((TABLENS,u'allow-empty-cell'), None): cnv_boolean, ((TABLENS,u'allow-empty-cell'), None): cnv_boolean,
((TABLENS,u'application-data'), None): cnv_string, ((TABLENS,u'application-data'), None): cnv_string,
@ -1106,7 +1139,7 @@ attrconverters = {
((TABLENS,u'cell-range'), None): cnv_string, ((TABLENS,u'cell-range'), None): cnv_string,
((TABLENS,u'column'), None): cnv_integer, ((TABLENS,u'column'), None): cnv_integer,
((TABLENS,u'comment'), None): cnv_string, ((TABLENS,u'comment'), None): cnv_string,
((TABLENS,u'condition'), None): cnv_string, ((TABLENS,u'condition'), None): cnv_formula,
((TABLENS,u'condition-source'), None): cnv_string, ((TABLENS,u'condition-source'), None): cnv_string,
((TABLENS,u'condition-source-range-address'), None): cnv_string, ((TABLENS,u'condition-source-range-address'), None): cnv_string,
((TABLENS,u'contains-error'), None): cnv_boolean, ((TABLENS,u'contains-error'), None): cnv_boolean,
@ -1144,13 +1177,13 @@ attrconverters = {
((TABLENS,u'end-x'), None): cnv_length, ((TABLENS,u'end-x'), None): cnv_length,
((TABLENS,u'end-y'), None): cnv_length, ((TABLENS,u'end-y'), None): cnv_length,
((TABLENS,u'execute'), None): cnv_boolean, ((TABLENS,u'execute'), None): cnv_boolean,
((TABLENS,u'expression'), None): cnv_string, ((TABLENS,u'expression'), None): cnv_formula,
((TABLENS,u'field-name'), None): cnv_string, ((TABLENS,u'field-name'), None): cnv_string,
((TABLENS,u'field-number'), None): cnv_nonNegativeInteger, ((TABLENS,u'field-number'), None): cnv_nonNegativeInteger,
((TABLENS,u'field-number'), None): cnv_string, ((TABLENS,u'field-number'), None): cnv_string,
((TABLENS,u'filter-name'), None): cnv_string, ((TABLENS,u'filter-name'), None): cnv_string,
((TABLENS,u'filter-options'), None): cnv_string, ((TABLENS,u'filter-options'), None): cnv_string,
((TABLENS,u'formula'), None): cnv_string, ((TABLENS,u'formula'), None): cnv_formula,
((TABLENS,u'function'), None): cnv_string, ((TABLENS,u'function'), None): cnv_string,
((TABLENS,u'function'), None): cnv_string, ((TABLENS,u'function'), None): cnv_string,
((TABLENS,u'grand-total'), None): cnv_string, ((TABLENS,u'grand-total'), None): cnv_string,
@ -1290,7 +1323,7 @@ attrconverters = {
((TEXTNS,u'combine-entries-with-pp'), None): cnv_boolean, ((TEXTNS,u'combine-entries-with-pp'), None): cnv_boolean,
((TEXTNS,u'comma-separated'), None): cnv_boolean, ((TEXTNS,u'comma-separated'), None): cnv_boolean,
((TEXTNS,u'cond-style-name'), None): cnv_StyleNameRef, ((TEXTNS,u'cond-style-name'), None): cnv_StyleNameRef,
((TEXTNS,u'condition'), None): cnv_string, ((TEXTNS,u'condition'), None): cnv_formula,
((TEXTNS,u'connection-name'), None): cnv_string, ((TEXTNS,u'connection-name'), None): cnv_string,
((TEXTNS,u'consecutive-numbering'), None): cnv_boolean, ((TEXTNS,u'consecutive-numbering'), None): cnv_boolean,
((TEXTNS,u'continue-numbering'), None): cnv_boolean, ((TEXTNS,u'continue-numbering'), None): cnv_boolean,
@ -1321,7 +1354,7 @@ attrconverters = {
((TEXTNS,u'first-row-start-column'), None): cnv_string, ((TEXTNS,u'first-row-start-column'), None): cnv_string,
((TEXTNS,u'fixed'), None): cnv_boolean, ((TEXTNS,u'fixed'), None): cnv_boolean,
((TEXTNS,u'footnotes-position'), None): cnv_string, ((TEXTNS,u'footnotes-position'), None): cnv_string,
((TEXTNS,u'formula'), None): cnv_string, ((TEXTNS,u'formula'), None): cnv_formula,
((TEXTNS,u'global'), None): cnv_boolean, ((TEXTNS,u'global'), None): cnv_boolean,
((TEXTNS,u'howpublished'), None): cnv_string, ((TEXTNS,u'howpublished'), None): cnv_string,
((TEXTNS,u'id'), None): cnv_ID, ((TEXTNS,u'id'), None): cnv_ID,
@ -1437,7 +1470,10 @@ attrconverters = {
class AttrConverters: class AttrConverters:
def convert(self, attribute, value, element): def convert(self, attribute, value, element):
conversion = attrconverters.get((attribute,element), None) """ Based on the element, figures out how to check/convert the attribute value
All values are converted to string
"""
conversion = attrconverters.get((attribute, element.qname), None)
if conversion is not None: if conversion is not None:
return conversion(attribute, value, element) return conversion(attribute, value, element)
else: else:

View File

@ -1,6 +1,6 @@
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2007-2008 Søren Roug, European Environment Agency # Copyright (C) 2007-2010 Søren Roug, European Environment Agency
# #
# This library is free software; you can redistribute it and/or # This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public # modify it under the terms of the GNU Lesser General Public
@ -112,6 +112,9 @@ class Node(xml.dom.Node):
return self.childNodes[-1] return self.childNodes[-1]
def insertBefore(self, newChild, refChild): def insertBefore(self, newChild, refChild):
""" Inserts the node newChild before the existing child node refChild.
If refChild is null, insert newChild at the end of the list of children.
"""
if newChild.nodeType not in self._child_node_types: if newChild.nodeType not in self._child_node_types:
raise IllegalChild, "%s cannot be child of %s" % (newChild.tagName, self.tagName) raise IllegalChild, "%s cannot be child of %s" % (newChild.tagName, self.tagName)
if newChild.parentNode is not None: if newChild.parentNode is not None:
@ -135,21 +138,26 @@ class Node(xml.dom.Node):
newChild.parentNode = self newChild.parentNode = self
return newChild return newChild
def appendChild(self, node): def appendChild(self, newChild):
if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: """ Adds the node newChild to the end of the list of children of this node.
for c in tuple(node.childNodes): If the newChild is already in the tree, it is first removed.
"""
if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
for c in tuple(newChild.childNodes):
self.appendChild(c) self.appendChild(c)
### The DOM does not clearly specify what to return in this case ### The DOM does not clearly specify what to return in this case
return node return newChild
if node.nodeType not in self._child_node_types: if newChild.nodeType not in self._child_node_types:
raise IllegalChild, "<%s> is not allowed in %s" % ( node.tagName, self.tagName) raise IllegalChild, "<%s> is not allowed in %s" % ( newChild.tagName, self.tagName)
if node.parentNode is not None: if newChild.parentNode is not None:
node.parentNode.removeChild(node) newChild.parentNode.removeChild(newChild)
_append_child(self, node) _append_child(self, newChild)
node.nextSibling = None newChild.nextSibling = None
return node return newChild
def removeChild(self, oldChild): def removeChild(self, oldChild):
""" Removes the child node indicated by oldChild from the list of children, and returns it.
"""
#FIXME: update ownerDocument.element_dict or find other solution #FIXME: update ownerDocument.element_dict or find other solution
try: try:
self.childNodes.remove(oldChild) self.childNodes.remove(oldChild)
@ -191,7 +199,7 @@ def _append_child(self, node):
node.__dict__["parentNode"] = self node.__dict__["parentNode"] = self
class Childless: class Childless:
"""Mixin that makes childless-ness easy to implement and avoids """ Mixin that makes childless-ness easy to implement and avoids
the complexity of the Node methods that deal with children. the complexity of the Node methods that deal with children.
""" """
@ -207,6 +215,7 @@ class Childless:
return None return None
def appendChild(self, node): def appendChild(self, node):
""" Raises an error """
raise xml.dom.HierarchyRequestErr( raise xml.dom.HierarchyRequestErr(
self.tagName + " nodes cannot have children") self.tagName + " nodes cannot have children")
@ -214,14 +223,17 @@ class Childless:
return False return False
def insertBefore(self, newChild, refChild): def insertBefore(self, newChild, refChild):
""" Raises an error """
raise xml.dom.HierarchyRequestErr( raise xml.dom.HierarchyRequestErr(
self.tagName + " nodes do not have children") self.tagName + " nodes do not have children")
def removeChild(self, oldChild): def removeChild(self, oldChild):
""" Raises an error """
raise xml.dom.NotFoundErr( raise xml.dom.NotFoundErr(
self.tagName + " nodes do not have children") self.tagName + " nodes do not have children")
def replaceChild(self, newChild, oldChild): def replaceChild(self, newChild, oldChild):
""" Raises an error """
raise xml.dom.HierarchyRequestErr( raise xml.dom.HierarchyRequestErr(
self.tagName + " nodes do not have children") self.tagName + " nodes do not have children")
@ -247,8 +259,12 @@ class CDATASection(Childless, Text):
nodeType = Node.CDATA_SECTION_NODE nodeType = Node.CDATA_SECTION_NODE
def toXml(self,level,f): def toXml(self,level,f):
""" Generate XML output of the node. If the text contains "]]>", then
escape it by going out of CDATA mode (]]>), then write the string
and then go into CDATA mode again. (<![CDATA[)
"""
if self.data: if self.data:
f.write('<![CDATA[%s]]>' % self.data) f.write('<![CDATA[%s]]>' % self.data.replace(']]>',']]>]]><![CDATA['))
class Element(Node): class Element(Node):
""" Creates a arbitrary element and is intended to be subclassed not used on its own. """ Creates a arbitrary element and is intended to be subclassed not used on its own.
@ -310,7 +326,19 @@ class Element(Node):
if self.getAttrNS(r[0],r[1]) is None: if self.getAttrNS(r[0],r[1]) is None:
raise AttributeError, "Required attribute missing: %s in <%s>" % (r[1].lower().replace('-',''), self.tagName) raise AttributeError, "Required attribute missing: %s in <%s>" % (r[1].lower().replace('-',''), self.tagName)
def get_knownns(self, prefix):
""" Odfpy maintains a list of known namespaces. In some cases a prefix is used, and
we need to know which namespace it resolves to.
"""
global nsdict
for ns,p in nsdict.items():
if p == prefix: return ns
return None
def get_nsprefix(self, namespace): def get_nsprefix(self, namespace):
""" Odfpy maintains a list of known namespaces. In some cases we have a namespace URL,
and needs to look up or assign the prefix for it.
"""
if namespace is None: namespace = "" if namespace is None: namespace = ""
prefix = _nsassign(namespace) prefix = _nsassign(namespace)
if not self.namespaces.has_key(namespace): if not self.namespaces.has_key(namespace):
@ -339,6 +367,9 @@ class Element(Node):
self.ownerDocument.rebuild_caches(element) self.ownerDocument.rebuild_caches(element)
def addText(self, text, check_grammar=True): def addText(self, text, check_grammar=True):
""" Adds text to an element
Setting check_grammar=False turns off grammar checking
"""
if check_grammar and self.qname not in grammar.allows_text: if check_grammar and self.qname not in grammar.allows_text:
raise IllegalText, "The <%s> element does not allow text" % self.tagName raise IllegalText, "The <%s> element does not allow text" % self.tagName
else: else:
@ -346,6 +377,9 @@ class Element(Node):
self.appendChild(Text(text)) self.appendChild(Text(text))
def addCDATA(self, cdata, check_grammar=True): def addCDATA(self, cdata, check_grammar=True):
""" Adds CDATA to an element
Setting check_grammar=False turns off grammar checking
"""
if check_grammar and self.qname not in grammar.allows_text: if check_grammar and self.qname not in grammar.allows_text:
raise IllegalText, "The <%s> element does not allow text" % self.tagName raise IllegalText, "The <%s> element does not allow text" % self.tagName
else: else:
@ -403,17 +437,18 @@ class Element(Node):
# if allowed_attrs and (namespace, localpart) not in allowed_attrs: # if allowed_attrs and (namespace, localpart) not in allowed_attrs:
# raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName) # raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName)
c = AttrConverters() c = AttrConverters()
self.attributes[prefix + ":" + localpart] = c.convert((namespace, localpart), value, self.qname) self.attributes[(namespace, localpart)] = c.convert((namespace, localpart), value, self)
def getAttrNS(self, namespace, localpart): def getAttrNS(self, namespace, localpart):
prefix = self.get_nsprefix(namespace) prefix = self.get_nsprefix(namespace)
return self.attributes.get(prefix + ":" + localpart) return self.attributes.get((namespace, localpart))
def removeAttrNS(self, namespace, localpart): def removeAttrNS(self, namespace, localpart):
prefix = self.get_nsprefix(namespace) del self.attributes[(namespace, localpart)]
del self.attributes[prefix + ":" + localpart]
def getAttribute(self, attr): def getAttribute(self, attr):
""" Get an attribute value. The method knows which namespace the attribute is in
"""
allowed_attrs = self.allowed_attributes() allowed_attrs = self.allowed_attributes()
if allowed_attrs is None: if allowed_attrs is None:
if type(attr) == type(()): if type(attr) == type(()):
@ -432,8 +467,9 @@ class Element(Node):
if level == 0: if level == 0:
for namespace, prefix in self.namespaces.items(): for namespace, prefix in self.namespaces.items():
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"') f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
for attkey in self.attributes.keys(): for qname in self.attributes.keys():
f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8'))) prefix = self.get_nsprefix(qname[0])
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
f.write('>') f.write('>')
def write_close_tag(self, level, f): def write_close_tag(self, level, f):
@ -445,8 +481,9 @@ class Element(Node):
if level == 0: if level == 0:
for namespace, prefix in self.namespaces.items(): for namespace, prefix in self.namespaces.items():
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"') f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
for attkey in self.attributes.keys(): for qname in self.attributes.keys():
f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8'))) prefix = self.get_nsprefix(qname[0])
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
if self.childNodes: if self.childNodes:
f.write('>') f.write('>')
for element in self.childNodes: for element in self.childNodes:
@ -464,6 +501,7 @@ class Element(Node):
return accumulator return accumulator
def getElementsByType(self, element): def getElementsByType(self, element):
""" Gets elements based on the type, which is function from text.py, draw.py etc. """
obj = element(check_grammar=False) obj = element(check_grammar=False)
return self._getElementsByObj(obj,[]) return self._getElementsByObj(obj,[])

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency # Copyright (C) 2006-2010 Søren Roug, European Environment Agency
# #
# This library is free software; you can redistribute it and/or # This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public # modify it under the terms of the GNU Lesser General Public

View File

@ -63,8 +63,8 @@ class LoadParser(handler.ContentHandler):
self.level = self.level + 1 self.level = self.level + 1
# Add any accumulated text content # Add any accumulated text content
content = ''.join(self.data).strip() content = ''.join(self.data)
if len(content) > 0: if len(content.strip()) > 0:
self.parent.addText(content, check_grammar=False) self.parent.addText(content, check_grammar=False)
self.data = [] self.data = []
# Create the element # Create the element

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency # Copyright (C) 2006-2010 Søren Roug, European Environment Agency
# #
# This library is free software; you can redistribute it and/or # This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public # modify it under the terms of the GNU Lesser General Public
@ -17,7 +17,7 @@
# #
# Contributor(s): # Contributor(s):
# #
TOOLSVERSION = u"ODFPY/0.9.2dev" TOOLSVERSION = u"ODFPY/0.9.4dev"
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0" ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0" DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
@ -28,19 +28,23 @@ DCNS = u"http://purl.org/dc/elements/1.1/"
DOMNS = u"http://www.w3.org/2001/xml-events" DOMNS = u"http://www.w3.org/2001/xml-events"
DR3DNS = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" DR3DNS = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
DRAWNS = u"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" DRAWNS = u"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
FIELDNS = u"urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0"
FONS = u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" FONS = u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
FORMNS = u"urn:oasis:names:tc:opendocument:xmlns:form:1.0" FORMNS = u"urn:oasis:names:tc:opendocument:xmlns:form:1.0"
GRDDLNS = u"http://www.w3.org/2003/g/data-view#"
KOFFICENS = u"http://www.koffice.org/2005/" KOFFICENS = u"http://www.koffice.org/2005/"
MANIFESTNS = u"urn:oasis:names:tc:opendocument:xmlns:manifest:1.0" MANIFESTNS = u"urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"
MATHNS = u"http://www.w3.org/1998/Math/MathML" MATHNS = u"http://www.w3.org/1998/Math/MathML"
METANS = u"urn:oasis:names:tc:opendocument:xmlns:meta:1.0" METANS = u"urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
NUMBERNS = u"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" NUMBERNS = u"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
OFFICENS = u"urn:oasis:names:tc:opendocument:xmlns:office:1.0" OFFICENS = u"urn:oasis:names:tc:opendocument:xmlns:office:1.0"
OFNS = u"urn:oasis:names:tc:opendocument:xmlns:of:1.2"
OOONS = u"http://openoffice.org/2004/office" OOONS = u"http://openoffice.org/2004/office"
OOOWNS = u"http://openoffice.org/2004/writer" OOOWNS = u"http://openoffice.org/2004/writer"
OOOCNS = u"http://openoffice.org/2004/calc" OOOCNS = u"http://openoffice.org/2004/calc"
PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0" PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"
RDFANS = u"http://docs.oasis-open.org/opendocument/meta/rdfa#" RDFANS = u"http://docs.oasis-open.org/opendocument/meta/rdfa#"
RPTNS = u"http://openoffice.org/2005/report"
SCRIPTNS = u"urn:oasis:names:tc:opendocument:xmlns:script:1.0" SCRIPTNS = u"urn:oasis:names:tc:opendocument:xmlns:script:1.0"
SMILNS = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0" SMILNS = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0"
STYLENS = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0" STYLENS = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0"
@ -50,7 +54,8 @@ TEXTNS = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0"
XFORMSNS = u"http://www.w3.org/2002/xforms" XFORMSNS = u"http://www.w3.org/2002/xforms"
XLINKNS = u"http://www.w3.org/1999/xlink" XLINKNS = u"http://www.w3.org/1999/xlink"
XMLNS = u"http://www.w3.org/XML/1998/namespace" XMLNS = u"http://www.w3.org/XML/1998/namespace"
XSDNS = u"http://www.w3.org/2001/XMLSchema"
XSINS = u"http://www.w3.org/2001/XMLSchema-instance"
nsdict = { nsdict = {
ANIMNS: u'anim', ANIMNS: u'anim',
@ -61,19 +66,23 @@ nsdict = {
DOMNS: u'dom', DOMNS: u'dom',
DR3DNS: u'dr3d', DR3DNS: u'dr3d',
DRAWNS: u'draw', DRAWNS: u'draw',
FIELDNS: u'field',
FONS: u'fo', FONS: u'fo',
FORMNS: u'form', FORMNS: u'form',
GRDDLNS: u'grddl',
KOFFICENS: u'koffice', KOFFICENS: u'koffice',
MANIFESTNS: u'manifest', MANIFESTNS: u'manifest',
MATHNS: u'math', MATHNS: u'math',
METANS: u'meta', METANS: u'meta',
NUMBERNS: u'number', NUMBERNS: u'number',
OFFICENS: u'office', OFFICENS: u'office',
OFNS: u'of',
OOONS: u'ooo', OOONS: u'ooo',
OOOWNS: u'ooow', OOOWNS: u'ooow',
OOOCNS: u'oooc', OOOCNS: u'oooc',
PRESENTATIONNS: u'presentation', PRESENTATIONNS: u'presentation',
RDFANS: u'rdfa', RDFANS: u'rdfa',
RPTNS: u'rpt',
SCRIPTNS: u'script', SCRIPTNS: u'script',
SMILNS: u'smil', SMILNS: u'smil',
STYLENS: u'style', STYLENS: u'style',
@ -83,4 +92,6 @@ nsdict = {
XFORMSNS: u'xforms', XFORMSNS: u'xforms',
XLINKNS: u'xlink', XLINKNS: u'xlink',
XMLNS: u'xml', XMLNS: u'xml',
XSDNS: u'xsd',
XSINS: u'xsi',
} }

View File

@ -1,6 +1,6 @@
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2006-2007 Søren Roug, European Environment Agency # Copyright (C) 2006-2010 Søren Roug, European Environment Agency
# #
# This library is free software; you can redistribute it and/or # This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public # modify it under the terms of the GNU Lesser General Public
@ -20,15 +20,18 @@
# #
#import pdb #import pdb
#pdb.set_trace() #pdb.set_trace()
import zipfile from xml.sax import handler
from xml.sax import handler, expatreader
from xml.sax.xmlreader import InputSource
from xml.sax.saxutils import escape, quoteattr from xml.sax.saxutils import escape, quoteattr
from cStringIO import StringIO from xml.dom import Node
from namespaces import DCNS, DRAWNS, FONS, \ from opendocument import load
METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, \
STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \
SMILNS, STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
if False: # Added by Kovid
DR3DNS, MATHNS, CHARTNS, CONFIGNS, ANIMNS, FORMNS, SMILNS, SCRIPTNS
# Handling of styles # Handling of styles
# #
@ -72,8 +75,8 @@ class StyleToCSS:
(FONS,u"border-left"): self.c_fo, (FONS,u"border-left"): self.c_fo,
(FONS,u"border-right"): self.c_fo, (FONS,u"border-right"): self.c_fo,
(FONS,u"border-top"): self.c_fo, (FONS,u"border-top"): self.c_fo,
(FONS,u"break-after"): self.c_break, (FONS,u"break-after"): self.c_break, # Added by Kovid
(FONS,u"break-before"): self.c_break, (FONS,u"break-before"): self.c_break,# Added by Kovid
(FONS,u"color"): self.c_fo, (FONS,u"color"): self.c_fo,
(FONS,u"font-family"): self.c_fo, (FONS,u"font-family"): self.c_fo,
(FONS,u"font-size"): self.c_fo, (FONS,u"font-size"): self.c_fo,
@ -136,7 +139,7 @@ class StyleToCSS:
selector = rule[1] selector = rule[1]
sdict[selector] = val sdict[selector] = val
def c_break(self, ruleset, sdict, rule, val): def c_break(self, ruleset, sdict, rule, val): # Added by Kovid
property = 'page-' + rule[1] property = 'page-' + rule[1]
values = {'auto': 'auto', 'column': 'always', 'page': 'always', values = {'auto': 'auto', 'column': 'always', 'page': 'always',
'even-page': 'left', 'odd-page': 'right', 'even-page': 'left', 'odd-page': 'right',
@ -346,13 +349,16 @@ class ODF2XHTML(handler.ContentHandler):
self.elements = { self.elements = {
(DCNS, 'title'): (self.s_processcont, self.e_dc_title), (DCNS, 'title'): (self.s_processcont, self.e_dc_title),
(DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage), (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
(DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag), (DCNS, 'creator'): (self.s_processcont, self.e_dc_creator),
(DCNS, 'description'): (self.s_processcont, self.e_dc_metatag), (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
(DCNS, 'date'): (self.s_processcont, self.e_dc_metatag), (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
(DRAWNS, 'custom-shape'): (self.s_custom_shape, self.e_custom_shape),
(DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame), (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
(DRAWNS, 'image'): (self.s_draw_image, None), (DRAWNS, 'image'): (self.s_draw_image, None),
(DRAWNS, 'fill-image'): (self.s_draw_fill_image, None), (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
(DRAWNS, "layer-set"):(self.s_ignorexml, None), (DRAWNS, "layer-set"):(self.s_ignorexml, None),
(DRAWNS, 'object'): (self.s_draw_object, None),
(DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
(DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page), (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
(DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox), (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
(METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag), (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
@ -364,7 +370,9 @@ class ODF2XHTML(handler.ContentHandler):
(NUMBERNS, "date-style"):(self.s_ignorexml, None), (NUMBERNS, "date-style"):(self.s_ignorexml, None),
(NUMBERNS, "number-style"):(self.s_ignorexml, None), (NUMBERNS, "number-style"):(self.s_ignorexml, None),
(NUMBERNS, "text-style"):(self.s_ignorexml, None), (NUMBERNS, "text-style"):(self.s_ignorexml, None),
(OFFICENS, "annotation"):(self.s_ignorexml, None),
(OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None), (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
(OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content),
(OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content), (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
(OFFICENS, "forms"):(self.s_ignorexml, None), (OFFICENS, "forms"):(self.s_ignorexml, None),
(OFFICENS, "master-styles"):(self.s_office_master_styles, None), (OFFICENS, "master-styles"):(self.s_office_master_styles, None),
@ -374,6 +382,7 @@ class ODF2XHTML(handler.ContentHandler):
(OFFICENS, "styles"):(self.s_office_styles, None), (OFFICENS, "styles"):(self.s_office_styles, None),
(OFFICENS, "text"):(self.s_office_text, self.e_office_text), (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
(OFFICENS, "scripts"):(self.s_ignorexml, None), (OFFICENS, "scripts"):(self.s_ignorexml, None),
(OFFICENS, "settings"):(self.s_ignorexml, None),
(PRESENTATIONNS, "notes"):(self.s_ignorexml, None), (PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
# (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout), # (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
(STYLENS, "default-page-layout"):(self.s_ignorexml, None), (STYLENS, "default-page-layout"):(self.s_ignorexml, None),
@ -389,8 +398,8 @@ class ODF2XHTML(handler.ContentHandler):
# (STYLENS, "header-style"):(self.s_style_header_style, None), # (STYLENS, "header-style"):(self.s_style_header_style, None),
(STYLENS, "master-page"):(self.s_style_master_page, None), (STYLENS, "master-page"):(self.s_style_master_page, None),
(STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None), (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout), (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
(STYLENS, "page-layout"):(self.s_ignorexml, None), # (STYLENS, "page-layout"):(self.s_ignorexml, None),
(STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None), (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
(STYLENS, "style"):(self.s_style_style, self.e_style_style), (STYLENS, "style"):(self.s_style_style, self.e_style_style),
(STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None), (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
@ -407,6 +416,10 @@ class ODF2XHTML(handler.ContentHandler):
(TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source), (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None), (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
(TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source), (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, 'bookmark'): (self.s_text_bookmark, None),
(TEXTNS, 'bookmark-start'): (self.s_text_bookmark, None),
(TEXTNS, 'bookmark-ref'): (self.s_text_bookmark_ref, self.e_text_a),
(TEXTNS, 'bookmark-ref-start'): (self.s_text_bookmark_ref, None),
(TEXTNS, 'h'): (self.s_text_h, self.e_text_h), (TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
(TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source), (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, 'line-break'):(self.s_text_line_break, None), (TEXTNS, 'line-break'):(self.s_text_line_break, None),
@ -430,12 +443,68 @@ class ODF2XHTML(handler.ContentHandler):
(TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source), (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
} }
if embedable: if embedable:
self.make_embedable()
self._resetobject()
def set_plain(self):
""" Tell the parser to not generate CSS """
self.generate_css = False
def set_embedable(self):
""" Tells the converter to only output the parts inside the <body>"""
self.elements[(OFFICENS, u"text")] = (None,None) self.elements[(OFFICENS, u"text")] = (None,None)
self.elements[(OFFICENS, u"spreadsheet")] = (None,None) self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
self.elements[(OFFICENS, u"presentation")] = (None,None) self.elements[(OFFICENS, u"presentation")] = (None,None)
self.elements[(OFFICENS, u"document-content")] = (None,None) self.elements[(OFFICENS, u"document-content")] = (None,None)
def add_style_file(self, stylefilename, media=None):
""" Add a link to an external style file.
Also turns of the embedding of styles in the HTML
"""
self.use_internal_css = False
self.stylefilename = stylefilename
if media:
self.metatags.append('<link rel="stylesheet" type="text/css" href="%s" media="%s"/>\n' % (stylefilename,media))
else:
self.metatags.append('<link rel="stylesheet" type="text/css" href="%s"/>\n' % (stylefilename))
def _resetfootnotes(self):
# Footnotes and endnotes
self.notedict = {}
self.currentnote = 0
self.notebody = ''
def _resetobject(self):
self.lines = []
self._wfunc = self._wlines
self.xmlfile = ''
self.title = ''
self.language = ''
self.creator = ''
self.data = []
self.tagstack = TagStack()
self.htmlstack = []
self.pstack = []
self.processelem = True
self.processcont = True
self.listtypes = {}
self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
self.use_internal_css = True
self.cs = StyleToCSS()
self.anchors = {}
# Style declarations
self.stylestack = []
self.styledict = {}
self.currentstyle = None
self._resetfootnotes()
# Tags from meta.xml
self.metatags = []
def writeout(self, s): def writeout(self, s):
if s != '': if s != '':
self._wfunc(s) self._wfunc(s)
@ -447,6 +516,7 @@ class ODF2XHTML(handler.ContentHandler):
def opentag(self, tag, attrs={}, block=False): def opentag(self, tag, attrs={}, block=False):
""" Create an open HTML tag """ """ Create an open HTML tag """
self.htmlstack.append((tag,attrs,block))
a = [] a = []
for key,val in attrs.items(): for key,val in attrs.items():
a.append('''%s=%s''' % (key, quoteattr(val))) a.append('''%s=%s''' % (key, quoteattr(val)))
@ -458,6 +528,8 @@ class ODF2XHTML(handler.ContentHandler):
self.writeout("\n") self.writeout("\n")
def closetag(self, tag, block=True): def closetag(self, tag, block=True):
""" Close an open HTML tag """
self.htmlstack.pop()
self.writeout("</%s>" % tag) self.writeout("</%s>" % tag)
if block == True: if block == True:
self.writeout("\n") self.writeout("\n")
@ -468,17 +540,13 @@ class ODF2XHTML(handler.ContentHandler):
a.append('''%s=%s''' % (key, quoteattr(val))) a.append('''%s=%s''' % (key, quoteattr(val)))
self.writeout("<%s %s/>\n" % (tag, " ".join(a))) self.writeout("<%s %s/>\n" % (tag, " ".join(a)))
#--------------------------------------------------
# Interface to parser
#-------------------------------------------------- #--------------------------------------------------
def characters(self, data): def characters(self, data):
if self.processelem and self.processcont: if self.processelem and self.processcont:
self.data.append(data) self.data.append(data)
def handle_starttag(self, tag, method, attrs):
method(tag,attrs)
def handle_endtag(self, tag, attrs, method):
method(tag, attrs)
def startElementNS(self, tag, qname, attrs): def startElementNS(self, tag, qname, attrs):
self.pstack.append( (self.processelem, self.processcont) ) self.pstack.append( (self.processelem, self.processcont) )
if self.processelem: if self.processelem:
@ -499,6 +567,13 @@ class ODF2XHTML(handler.ContentHandler):
self.unknown_endtag(tag, attrs) self.unknown_endtag(tag, attrs)
self.processelem, self.processcont = self.pstack.pop() self.processelem, self.processcont = self.pstack.pop()
#--------------------------------------------------
def handle_starttag(self, tag, method, attrs):
method(tag,attrs)
def handle_endtag(self, tag, attrs, method):
method(tag, attrs)
def unknown_starttag(self, tag, attrs): def unknown_starttag(self, tag, attrs):
pass pass
@ -512,18 +587,21 @@ class ODF2XHTML(handler.ContentHandler):
self.processelem = False self.processelem = False
def s_ignorecont(self, tag, attrs): def s_ignorecont(self, tag, attrs):
""" Stop processing the text nodes """
self.processcont = False self.processcont = False
def s_processcont(self, tag, attrs): def s_processcont(self, tag, attrs):
""" Start processing the text nodes """
self.processcont = True self.processcont = True
def classname(self, attrs): def classname(self, attrs):
""" Generate a class name from a style name """ """ Generate a class name from a style name """
c = attrs[(TEXTNS,'style-name')] c = attrs.get((TEXTNS,'style-name'),'')
c = c.replace(".","_") c = c.replace(".","_")
return c return c
def get_anchor(self, name): def get_anchor(self, name):
""" Create a unique anchor id for a href name """
if not self.anchors.has_key(name): if not self.anchors.has_key(name):
# Changed by Kovid # Changed by Kovid
self.anchors[name] = "anchor%d" % (len(self.anchors) + 1) self.anchors[name] = "anchor%d" % (len(self.anchors) + 1)
@ -543,8 +621,8 @@ class ODF2XHTML(handler.ContentHandler):
def e_dc_title(self, tag, attrs): def e_dc_title(self, tag, attrs):
""" Get the title from the meta data and create a HTML <title> """ Get the title from the meta data and create a HTML <title>
""" """
self.metatags.append('<title>%s</title>\n' % escape(''.join(self.data)))
self.title = ''.join(self.data) self.title = ''.join(self.data)
#self.metatags.append('<title>%s</title>\n' % escape(self.title))
self.data = [] self.data = []
def e_dc_metatag(self, tag, attrs): def e_dc_metatag(self, tag, attrs):
@ -556,13 +634,57 @@ class ODF2XHTML(handler.ContentHandler):
def e_dc_contentlanguage(self, tag, attrs): def e_dc_contentlanguage(self, tag, attrs):
""" Set the content language. Identifies the targeted audience """ Set the content language. Identifies the targeted audience
""" """
self.metatags.append('<meta http-equiv="content-language" content="%s"/>\n' % ''.join(self.data)) self.language = ''.join(self.data)
self.metatags.append('<meta http-equiv="content-language" content="%s"/>\n' % escape(self.language))
self.data = [] self.data = []
def e_dc_creator(self, tag, attrs):
""" Set the content creator. Identifies the targeted audience
"""
self.creator = ''.join(self.data)
self.metatags.append('<meta http-equiv="creator" content="%s"/>\n' % escape(self.creator))
self.data = []
def s_custom_shape(self, tag, attrs):
""" A <draw:custom-shape> is made into a <div> in HTML which is then styled
"""
anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
htmltag = 'div'
name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
if name == 'G-':
name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "")
name = name.replace(".","_")
if anchor_type == "paragraph":
style = 'position:absolute;'
elif anchor_type == 'char':
style = "position:absolute;"
elif anchor_type == 'as-char':
htmltag = 'div'
style = ''
else:
style = "position: absolute;"
if attrs.has_key( (SVGNS,"width") ):
style = style + "width:" + attrs[(SVGNS,"width")] + ";"
if attrs.has_key( (SVGNS,"height") ):
style = style + "height:" + attrs[(SVGNS,"height")] + ";"
if attrs.has_key( (SVGNS,"x") ):
style = style + "left:" + attrs[(SVGNS,"x")] + ";"
if attrs.has_key( (SVGNS,"y") ):
style = style + "top:" + attrs[(SVGNS,"y")] + ";"
if self.generate_css:
self.opentag(htmltag, {'class': name, 'style': style})
else:
self.opentag(htmltag)
def e_custom_shape(self, tag, attrs):
""" End the <draw:frame>
"""
self.closetag('div')
def s_draw_frame(self, tag, attrs): def s_draw_frame(self, tag, attrs):
""" A <draw:frame> is made into a <div> in HTML which is then styled """ A <draw:frame> is made into a <div> in HTML which is then styled
""" """
anchor_type = attrs.get((TEXTNS,'anchor-type'),'char') anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
htmltag = 'div' htmltag = 'div'
name = "G-" + attrs.get( (DRAWNS,'style-name'), "") name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
if name == 'G-': if name == 'G-':
@ -576,7 +698,7 @@ class ODF2XHTML(handler.ContentHandler):
htmltag = 'div' htmltag = 'div'
style = '' style = ''
else: else:
style = "position: absolute;" style = "position:absolute;"
if attrs.has_key( (SVGNS,"width") ): if attrs.has_key( (SVGNS,"width") ):
style = style + "width:" + attrs[(SVGNS,"width")] + ";" style = style + "width:" + attrs[(SVGNS,"width")] + ";"
if attrs.has_key( (SVGNS,"height") ): if attrs.has_key( (SVGNS,"height") ):
@ -620,6 +742,30 @@ class ODF2XHTML(handler.ContentHandler):
htmlattrs['style'] = "display: block;" htmlattrs['style'] = "display: block;"
self.emptytag('img', htmlattrs) self.emptytag('img', htmlattrs)
def s_draw_object(self, tag, attrs):
""" A <draw:object> is embedded object in the document (e.g. spreadsheet in presentation).
"""
return # Added by Kovid
objhref = attrs[(XLINKNS,"href")]
# Remove leading "./": from "./Object 1" to "Object 1"
# objhref = objhref [2:]
# Not using os.path.join since it fails to find the file on Windows.
# objcontentpath = '/'.join([objhref, 'content.xml'])
for c in self.document.childnodes:
if c.folder == objhref:
self._walknode(c.topnode)
def s_draw_object_ole(self, tag, attrs):
""" A <draw:object-ole> is embedded OLE object in the document (e.g. MS Graph).
"""
class_id = attrs[(DRAWNS,"class-id")]
if class_id and class_id.lower() == "00020803-0000-0000-c000-000000000046": ## Microsoft Graph 97 Chart
tagattrs = { 'name':'object_ole_graph', 'class':'ole-graph' }
self.opentag('a', tagattrs)
self.closetag('a', tagattrs)
def s_draw_page(self, tag, attrs): def s_draw_page(self, tag, attrs):
""" A <draw:page> is a slide in a presentation. We use a <fieldset> element in HTML. """ A <draw:page> is a slide in a presentation. We use a <fieldset> element in HTML.
Therefore if you convert a ODP file, you get a series of <fieldset>s. Therefore if you convert a ODP file, you get a series of <fieldset>s.
@ -655,13 +801,9 @@ class ODF2XHTML(handler.ContentHandler):
def html_body(self, tag, attrs): def html_body(self, tag, attrs):
self.writedata() self.writedata()
if self.generate_css: if self.generate_css and self.use_internal_css:
self.opentag('style', {'type':"text/css"}, True) self.opentag('style', {'type':"text/css"}, True)
self.writeout('/*<![CDATA[*/\n') self.writeout('/*<![CDATA[*/\n')
self.writeout('\nimg { width: 100%; height: 100%; }\n')
self.writeout('* { padding: 0; margin: 0; background-color:white; }\n')
self.writeout('body { margin: 0 1em; }\n')
self.writeout('ol, ul { padding-left: 2em; }\n')
self.generate_stylesheet() self.generate_stylesheet()
self.writeout('/*]]>*/\n') self.writeout('/*]]>*/\n')
self.closetag('style') self.closetag('style')
@ -669,6 +811,16 @@ class ODF2XHTML(handler.ContentHandler):
self.closetag('head') self.closetag('head')
self.opentag('body', block=True) self.opentag('body', block=True)
# background-color: white removed by Kovid for #9118
# Specifying an explicit bg color prevents ebook readers
# from successfully inverting colors
default_styles = """
img { width: 100%; height: 100%; }
* { padding: 0; margin: 0; }
body { margin: 0 1em; }
ol, ul { padding-left: 2em; }
"""
def generate_stylesheet(self): def generate_stylesheet(self):
for name in self.stylestack: for name in self.stylestack:
styles = self.styledict.get(name) styles = self.styledict.get(name)
@ -688,6 +840,7 @@ class ODF2XHTML(handler.ContentHandler):
styles = parentstyle styles = parentstyle
self.styledict[name] = styles self.styledict[name] = styles
# Write the styles to HTML # Write the styles to HTML
self.writeout(self.default_styles)
for name in self.stylestack: for name in self.stylestack:
styles = self.styledict.get(name) styles = self.styledict.get(name)
css2 = self.cs.convert_styles(styles) css2 = self.cs.convert_styles(styles)
@ -729,6 +882,7 @@ class ODF2XHTML(handler.ContentHandler):
self.emptytag('meta', { 'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"}) self.emptytag('meta', { 'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"})
for metaline in self.metatags: for metaline in self.metatags:
self.writeout(metaline) self.writeout(metaline)
self.writeout('<title>%s</title>\n' % escape(self.title))
def e_office_document_content(self, tag, attrs): def e_office_document_content(self, tag, attrs):
""" Last tag """ """ Last tag """
@ -773,7 +927,7 @@ class ODF2XHTML(handler.ContentHandler):
""" Copy all attributes to a struct. """ Copy all attributes to a struct.
We will later convert them to CSS2 We will later convert them to CSS2
""" """
if self.currentstyle is None: if self.currentstyle is None: # Added by Kovid
return return
for key,attr in attrs.items(): for key,attr in attrs.items():
self.styledict[self.currentstyle][key] = attr self.styledict[self.currentstyle][key] = attr
@ -799,7 +953,7 @@ class ODF2XHTML(handler.ContentHandler):
def s_style_font_face(self, tag, attrs): def s_style_font_face(self, tag, attrs):
""" It is possible that the HTML browser doesn't know how to """ It is possible that the HTML browser doesn't know how to
show a particular font. Luckily ODF provides generic fallbacks show a particular font. Luckily ODF provides generic fallbacks
Unluckily they are not the same as CSS2. Unfortunately they are not the same as CSS2.
CSS2: serif, sans-serif, cursive, fantasy, monospace CSS2: serif, sans-serif, cursive, fantasy, monospace
ODF: roman, swiss, modern, decorative, script, system ODF: roman, swiss, modern, decorative, script, system
""" """
@ -850,7 +1004,7 @@ class ODF2XHTML(handler.ContentHandler):
""" """
name = attrs[(STYLENS,'name')] name = attrs[(STYLENS,'name')]
name = name.replace(".","_") name = name.replace(".","_")
self.currentstyle = "@page " + name self.currentstyle = ".PL-" + name
self.stylestack.append(self.currentstyle) self.stylestack.append(self.currentstyle)
self.styledict[self.currentstyle] = {} self.styledict[self.currentstyle] = {}
@ -881,7 +1035,7 @@ class ODF2XHTML(handler.ContentHandler):
self.s_ignorexml(tag, attrs) self.s_ignorexml(tag, attrs)
# Short prefixes for class selectors # Short prefixes for class selectors
familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR', _familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR',
'text':'S', 'section':'D', 'text':'S', 'section':'D',
'table':'T', 'table-cell':'TD', 'table-column':'TC', 'table':'T', 'table-cell':'TD', 'table-column':'TC',
'table-row':'TR', 'graphic':'G' } 'table-row':'TR', 'graphic':'G' }
@ -897,7 +1051,7 @@ class ODF2XHTML(handler.ContentHandler):
name = name.replace(".","_") name = name.replace(".","_")
family = attrs[(STYLENS,'family')] family = attrs[(STYLENS,'family')]
htmlfamily = self.familymap.get(family,'unknown') htmlfamily = self.familymap.get(family,'unknown')
sfamily = self.familyshort.get(family,'X') sfamily = self._familyshort.get(family,'X')
name = "%s%s-%s" % (self.autoprefix, sfamily, name) name = "%s%s-%s" % (self.autoprefix, sfamily, name)
parent = attrs.get( (STYLENS,'parent-style-name') ) parent = attrs.get( (STYLENS,'parent-style-name') )
self.currentstyle = special_styles.get(name,"."+name) self.currentstyle = special_styles.get(name,"."+name)
@ -942,6 +1096,7 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata() self.purgedata()
def s_table_table_cell(self, tag, attrs): def s_table_table_cell(self, tag, attrs):
""" Start a table cell """
#FIXME: number-columns-repeated § 8.1.3 #FIXME: number-columns-repeated § 8.1.3
#repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1)) #repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
htmlattrs = {} htmlattrs = {}
@ -959,11 +1114,13 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata() self.purgedata()
def e_table_table_cell(self, tag, attrs): def e_table_table_cell(self, tag, attrs):
""" End a table cell """
self.writedata() self.writedata()
self.closetag('td') self.closetag('td')
self.purgedata() self.purgedata()
def s_table_table_column(self, tag, attrs): def s_table_table_column(self, tag, attrs):
""" Start a table column """
c = attrs.get( (TABLENS,'style-name'), None) c = attrs.get( (TABLENS,'style-name'), None)
repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1)) repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
htmlattrs = {} htmlattrs = {}
@ -974,6 +1131,7 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata() self.purgedata()
def s_table_table_row(self, tag, attrs): def s_table_table_row(self, tag, attrs):
""" Start a table row """
#FIXME: table:number-rows-repeated #FIXME: table:number-rows-repeated
c = attrs.get( (TABLENS,'style-name'), None) c = attrs.get( (TABLENS,'style-name'), None)
htmlattrs = {} htmlattrs = {}
@ -983,6 +1141,7 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata() self.purgedata()
def e_table_table_row(self, tag, attrs): def e_table_table_row(self, tag, attrs):
""" End a table row """
self.writedata() self.writedata()
self.closetag('tr') self.closetag('tr')
self.purgedata() self.purgedata()
@ -997,10 +1156,28 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata() self.purgedata()
def e_text_a(self, tag, attrs): def e_text_a(self, tag, attrs):
""" End an anchor or bookmark reference """
self.writedata() self.writedata()
self.closetag('a', False) self.closetag('a', False)
self.purgedata() self.purgedata()
def s_text_bookmark(self, tag, attrs):
""" Bookmark definition """
name = attrs[(TEXTNS,'name')]
html_id = self.get_anchor(name)
self.writedata()
self.opentag('span', {'id':html_id})
self.closetag('span', False)
self.purgedata()
def s_text_bookmark_ref(self, tag, attrs):
""" Bookmark reference """
name = attrs[(TEXTNS,'ref-name')]
html_id = "#" + self.get_anchor(name)
self.writedata()
self.opentag('a', {'href':html_id})
self.purgedata()
def s_text_h(self, tag, attrs): def s_text_h(self, tag, attrs):
""" Headings start """ """ Headings start """
level = int(attrs[(TEXTNS,'outline-level')]) level = int(attrs[(TEXTNS,'outline-level')])
@ -1018,13 +1195,19 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata() self.purgedata()
def e_text_h(self, tag, attrs): def e_text_h(self, tag, attrs):
""" Headings end """ """ Headings end
Side-effect: If there is no title in the metadata, then it is taken
from the first heading of any level.
"""
self.writedata() self.writedata()
level = int(attrs[(TEXTNS,'outline-level')]) level = int(attrs[(TEXTNS,'outline-level')])
if level > 6: level = 6 # Heading levels go only to 6 in XHTML if level > 6: level = 6 # Heading levels go only to 6 in XHTML
if level < 1: level = 1 if level < 1: level = 1
lev = self.headinglevels[1:level+1] lev = self.headinglevels[1:level+1]
outline = '.'.join(map(str,lev) ) outline = '.'.join(map(str,lev) )
heading = ''.join(self.data)
if self.title == '': self.title = heading
# Changed by Kovid
tail = ''.join(self.data) tail = ''.join(self.data)
anchor = self.get_anchor("%s.%s" % ( outline, tail)) anchor = self.get_anchor("%s.%s" % ( outline, tail))
anchor2 = self.get_anchor(tail) # Added by kovid to fix #7506 anchor2 = self.get_anchor(tail) # Added by kovid to fix #7506
@ -1036,12 +1219,14 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata() self.purgedata()
def s_text_line_break(self, tag, attrs): def s_text_line_break(self, tag, attrs):
""" Force a line break (<br/>) """
self.writedata() self.writedata()
self.emptytag('br') self.emptytag('br')
self.purgedata() self.purgedata()
def s_text_list(self, tag, attrs): def s_text_list(self, tag, attrs):
""" To know which level we're at, we have to count the number """ Start a list (<ul> or <ol>)
To know which level we're at, we have to count the number
of <text:list> elements on the tagstack. of <text:list> elements on the tagstack.
""" """
name = attrs.get( (TEXTNS,'style-name') ) name = attrs.get( (TEXTNS,'style-name') )
@ -1055,12 +1240,13 @@ class ODF2XHTML(handler.ContentHandler):
name = self.tagstack.rfindattr( (TEXTNS,'style-name') ) name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
list_class = "%s_%d" % (name, level) list_class = "%s_%d" % (name, level)
if self.generate_css: if self.generate_css:
self.opentag('%s' % self.listtypes.get(list_class,'UL'), {'class': list_class }) self.opentag('%s' % self.listtypes.get(list_class,'ul'), {'class': list_class })
else: else:
self.opentag('%s' % self.listtypes.get(list_class,'UL')) self.opentag('%s' % self.listtypes.get(list_class,'ul'))
self.purgedata() self.purgedata()
def e_text_list(self, tag, attrs): def e_text_list(self, tag, attrs):
""" End a list """
self.writedata() self.writedata()
name = attrs.get( (TEXTNS,'style-name') ) name = attrs.get( (TEXTNS,'style-name') )
level = self.tagstack.count_tags(tag) + 1 level = self.tagstack.count_tags(tag) + 1
@ -1072,14 +1258,16 @@ class ODF2XHTML(handler.ContentHandler):
# textbox itself may be nested within another list. # textbox itself may be nested within another list.
name = self.tagstack.rfindattr( (TEXTNS,'style-name') ) name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
list_class = "%s_%d" % (name, level) list_class = "%s_%d" % (name, level)
self.closetag(self.listtypes.get(list_class,'UL')) self.closetag(self.listtypes.get(list_class,'ul'))
self.purgedata() self.purgedata()
def s_text_list_item(self, tag, attrs): def s_text_list_item(self, tag, attrs):
""" Start list item """
self.opentag('li') self.opentag('li')
self.purgedata() self.purgedata()
def e_text_list_item(self, tag, attrs): def e_text_list_item(self, tag, attrs):
""" End list item """
self.writedata() self.writedata()
self.closetag('li') self.closetag('li')
self.purgedata() self.purgedata()
@ -1191,7 +1379,7 @@ class ODF2XHTML(handler.ContentHandler):
if specialtag is None: if specialtag is None:
specialtag = 'p' specialtag = 'p'
self.writedata() self.writedata()
if not self.data: if not self.data: # Added by Kovid
# Give substance to empty paragraphs, as rendered by OOo # Give substance to empty paragraphs, as rendered by OOo
self.writeout('&#160;') self.writeout('&#160;')
self.closetag(specialtag) self.closetag(specialtag)
@ -1254,55 +1442,30 @@ class ODF2XHTML(handler.ContentHandler):
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
def load(self, odffile): def load(self, odffile):
self._odffile = odffile """ Loads a document into the parser and parses it.
The argument can either be a filename or a document in memory.
"""
self.lines = []
self._wfunc = self._wlines
if isinstance(odffile, basestring) \
or hasattr(odffile, 'read'): # Added by Kovid
self.document = load(odffile)
else:
self.document = odffile
self._walknode(self.document.topnode)
def parseodf(self): def _walknode(self, node):
self.xmlfile = '' if node.nodeType == Node.ELEMENT_NODE:
self.title = '' self.startElementNS(node.qname, node.tagName, node.attributes)
self.data = [] for c in node.childNodes:
self.tagstack = TagStack() self._walknode(c)
self.pstack = [] self.endElementNS(node.qname, node.tagName)
self.processelem = True if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
self.processcont = True self.characters(unicode(node))
self.listtypes = {}
self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
self.cs = StyleToCSS()
self.anchors = {}
# Style declarations
self.stylestack = []
self.styledict = {}
self.currentstyle = None
# Footnotes and endnotes
self.notedict = {}
self.currentnote = 0
self.notebody = ''
# Tags from meta.xml
self.metatags = []
# Extract the interesting files
z = zipfile.ZipFile(self._odffile)
# For some reason Trac has trouble when xml.sax.make_parser() is used.
# Could it be because PyXML is installed, and therefore a different parser
# might be chosen? By calling expatreader directly we avoid this issue
parser = expatreader.create_parser()
parser.setFeature(handler.feature_namespaces, 1)
parser.setContentHandler(self)
parser.setErrorHandler(handler.ErrorHandler())
inpsrc = InputSource()
for xmlfile in ('meta.xml', 'styles.xml', 'content.xml'):
self.xmlfile = xmlfile
content = z.read(xmlfile)
inpsrc.setByteStream(StringIO(content))
parser.parse(inpsrc)
z.close()
def odf2xhtml(self, odffile): def odf2xhtml(self, odffile):
""" Load a file and return XHTML """ Load a file and return the XHTML
""" """
self.load(odffile) self.load(odffile)
return self.xhtml() return self.xhtml()
@ -1311,9 +1474,8 @@ class ODF2XHTML(handler.ContentHandler):
if s != '': self.lines.append(s) if s != '': self.lines.append(s)
def xhtml(self): def xhtml(self):
self.lines = [] """ Returns the xhtml
self._wfunc = self._wlines """
self.parseodf()
return ''.join(self.lines) return ''.join(self.lines)
def _writecss(self, s): def _writecss(self, s):
@ -1323,11 +1485,127 @@ class ODF2XHTML(handler.ContentHandler):
pass pass
def css(self): def css(self):
self._wfunc = self._writenothing """ Returns the CSS content """
self.parseodf()
self._csslines = [] self._csslines = []
self._wfunc = self._writecss self._wfunc = self._writecss
self.generate_stylesheet() self.generate_stylesheet()
res = ''.join(self._csslines) res = ''.join(self._csslines)
self._wfunc = self._wlines
del self._csslines del self._csslines
return res return res
def save(self, outputfile, addsuffix=False):
""" Save the HTML under the filename.
If the filename is '-' then save to stdout
We have the last style filename in self.stylefilename
"""
if outputfile == '-':
import sys # Added by Kovid
outputfp = sys.stdout
else:
if addsuffix:
outputfile = outputfile + ".html"
outputfp = file(outputfile, "w")
outputfp.write(self.xhtml().encode('us-ascii','xmlcharrefreplace'))
outputfp.close()
class ODF2XHTMLembedded(ODF2XHTML):
""" The ODF2XHTML parses an ODF file and produces XHTML"""
def __init__(self, lines, generate_css=True, embedable=False):
self._resetobject()
self.lines = lines
# Tags
self.generate_css = generate_css
self.elements = {
# (DCNS, 'title'): (self.s_processcont, self.e_dc_title),
# (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
# (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag),
# (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
# (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
(DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
(DRAWNS, 'image'): (self.s_draw_image, None),
(DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
(DRAWNS, "layer-set"):(self.s_ignorexml, None),
(DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
(DRAWNS, 'object'): (self.s_draw_object, None),
(DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
(DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
# (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
# (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag),
# (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag),
# (METANS, 'keyword'): (self.s_processcont, self.e_dc_metatag),
(NUMBERNS, "boolean-style"):(self.s_ignorexml, None),
(NUMBERNS, "currency-style"):(self.s_ignorexml, None),
(NUMBERNS, "date-style"):(self.s_ignorexml, None),
(NUMBERNS, "number-style"):(self.s_ignorexml, None),
(NUMBERNS, "text-style"):(self.s_ignorexml, None),
# (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
# (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
(OFFICENS, "forms"):(self.s_ignorexml, None),
# (OFFICENS, "master-styles"):(self.s_office_master_styles, None),
(OFFICENS, "meta"):(self.s_ignorecont, None),
# (OFFICENS, "presentation"):(self.s_office_presentation, self.e_office_presentation),
# (OFFICENS, "spreadsheet"):(self.s_office_spreadsheet, self.e_office_spreadsheet),
# (OFFICENS, "styles"):(self.s_office_styles, None),
# (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
(OFFICENS, "scripts"):(self.s_ignorexml, None),
(PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
## (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
# (STYLENS, "default-page-layout"):(self.s_ignorexml, None),
# (STYLENS, "default-style"):(self.s_style_default_style, self.e_style_default_style),
# (STYLENS, "drawing-page-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "font-face"):(self.s_style_font_face, None),
## (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer),
## (STYLENS, "footer-style"):(self.s_style_footer_style, None),
# (STYLENS, "graphic-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "handout-master"):(self.s_ignorexml, None),
## (STYLENS, "header"):(self.s_style_header, self.e_style_header),
## (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None),
## (STYLENS, "header-style"):(self.s_style_header_style, None),
# (STYLENS, "master-page"):(self.s_style_master_page, None),
# (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
## (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
# (STYLENS, "page-layout"):(self.s_ignorexml, None),
# (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "style"):(self.s_style_style, self.e_style_style),
# (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "table-column-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "table-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "text-properties"):(self.s_style_handle_properties, None),
(SVGNS, 'desc'): (self.s_ignorexml, None),
(TABLENS, 'covered-table-cell'): (self.s_ignorexml, None),
(TABLENS, 'table-cell'): (self.s_table_table_cell, self.e_table_table_cell),
(TABLENS, 'table-column'): (self.s_table_table_column, None),
(TABLENS, 'table-row'): (self.s_table_table_row, self.e_table_table_row),
(TABLENS, 'table'): (self.s_table_table, self.e_table_table),
(TEXTNS, 'a'): (self.s_text_a, self.e_text_a),
(TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
(TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
(TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, 'line-break'):(self.s_text_line_break, None),
(TEXTNS, "linenumbering-configuration"):(self.s_ignorexml, None),
(TEXTNS, "list"):(self.s_text_list, self.e_text_list),
(TEXTNS, "list-item"):(self.s_text_list_item, self.e_text_list_item),
(TEXTNS, "list-level-style-bullet"):(self.s_text_list_level_style_bullet, self.e_text_list_level_style_bullet),
(TEXTNS, "list-level-style-number"):(self.s_text_list_level_style_number, self.e_text_list_level_style_number),
(TEXTNS, "list-style"):(None, None),
(TEXTNS, "note"):(self.s_text_note, None),
(TEXTNS, "note-body"):(self.s_text_note_body, self.e_text_note_body),
(TEXTNS, "note-citation"):(None, self.e_text_note_citation),
(TEXTNS, "notes-configuration"):(self.s_ignorexml, None),
(TEXTNS, "object-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, 'p'): (self.s_text_p, self.e_text_p),
(TEXTNS, 's'): (self.s_text_s, None),
(TEXTNS, 'span'): (self.s_text_span, self.e_text_span),
(TEXTNS, 'tab'): (self.s_text_tab, None),
(TEXTNS, "table-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, "page-number"):(None, None),
}

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency # Copyright (C) 2006-2010 Søren Roug, European Environment Agency
# #
# This library is free software; you can redistribute it and/or # This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public # modify it under the terms of the GNU Lesser General Public
@ -41,7 +41,7 @@ IS_IMAGE = 1
# We need at least Python 2.2 # We need at least Python 2.2
assert sys.version_info[0]>=2 and sys.version_info[1] >= 2 assert sys.version_info[0]>=2 and sys.version_info[1] >= 2
sys.setrecursionlimit=50 #sys.setrecursionlimit(100)
#The recursion limit is set conservative so mistakes like #The recursion limit is set conservative so mistakes like
# s=content() s.addElement(s) won't eat up too much processor time. # s=content() s.addElement(s) won't eat up too much processor time.
@ -128,12 +128,12 @@ class OpenDocument:
self.element_dict[element.qname] = [] self.element_dict[element.qname] = []
self.element_dict[element.qname].append(element) self.element_dict[element.qname].append(element)
if element.qname == (STYLENS, u'style'): if element.qname == (STYLENS, u'style'):
self._register_stylename(element) # Add to style dictionary self.__register_stylename(element) # Add to style dictionary
styleref = element.getAttrNS(TEXTNS,u'style-name') styleref = element.getAttrNS(TEXTNS,u'style-name')
if styleref is not None and self._styles_ooo_fix.has_key(styleref): if styleref is not None and self._styles_ooo_fix.has_key(styleref):
element.setAttrNS(TEXTNS,u'style-name', self._styles_ooo_fix[styleref]) element.setAttrNS(TEXTNS,u'style-name', self._styles_ooo_fix[styleref])
def _register_stylename(self, element): def __register_stylename(self, element):
''' Register a style. But there are three style dictionaries: ''' Register a style. But there are three style dictionaries:
office:styles, office:automatic-styles and office:master-styles office:styles, office:automatic-styles and office:master-styles
Chapter 14 Chapter 14
@ -165,7 +165,7 @@ class OpenDocument:
""" Generates the full document as an XML file """ Generates the full document as an XML file
Always written as a bytestream in UTF-8 encoding Always written as a bytestream in UTF-8 encoding
""" """
self._replaceGenerator() self.__replaceGenerator()
xml=StringIO() xml=StringIO()
xml.write(_XMLPROLOGUE) xml.write(_XMLPROLOGUE)
self.topnode.toXml(0, xml) self.topnode.toXml(0, xml)
@ -197,8 +197,10 @@ class OpenDocument:
x.write_close_tag(0, xml) x.write_close_tag(0, xml)
return xml.getvalue() return xml.getvalue()
def manifestxml(self): def __manifestxml(self):
""" Generates the manifest.xml file """ """ Generates the manifest.xml file
The self.manifest isn't avaible unless the document is being saved
"""
xml=StringIO() xml=StringIO()
xml.write(_XMLPROLOGUE) xml.write(_XMLPROLOGUE)
self.manifest.toXml(0,xml) self.manifest.toXml(0,xml)
@ -206,7 +208,7 @@ class OpenDocument:
def metaxml(self): def metaxml(self):
""" Generates the meta.xml file """ """ Generates the meta.xml file """
self._replaceGenerator() self.__replaceGenerator()
x = DocumentMeta() x = DocumentMeta()
x.addElement(self.meta) x.addElement(self.meta)
xml=StringIO() xml=StringIO()
@ -344,7 +346,7 @@ class OpenDocument:
self.thumbnail = filecontent self.thumbnail = filecontent
def addObject(self, document, objectname=None): def addObject(self, document, objectname=None):
""" Add an object. The object must be an OpenDocument class """ Adds an object (subdocument). The object must be an OpenDocument class
The return value will be the folder in the zipfile the object is stored in The return value will be the folder in the zipfile the object is stored in
""" """
self.childobjects.append(document) self.childobjects.append(document)
@ -367,15 +369,16 @@ class OpenDocument:
zi.compress_type = zipfile.ZIP_STORED zi.compress_type = zipfile.ZIP_STORED
zi.external_attr = UNIXPERMS zi.external_attr = UNIXPERMS
self._z.writestr(zi, fileobj) self._z.writestr(zi, fileobj)
if hasPictures: # According to section 17.7.3 in ODF 1.1, the pictures folder should not have a manifest entry
self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder,mediatype="")) # if hasPictures:
# self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder, mediatype=""))
# Look in subobjects # Look in subobjects
subobjectnum = 1 subobjectnum = 1
for subobject in object.childobjects: for subobject in object.childobjects:
self._savePictures(subobject,'%sObject %d/' % (folder, subobjectnum)) self._savePictures(subobject,'%sObject %d/' % (folder, subobjectnum))
subobjectnum += 1 subobjectnum += 1
def _replaceGenerator(self): def __replaceGenerator(self):
""" Section 3.1.1: The application MUST NOT export the original identifier """ Section 3.1.1: The application MUST NOT export the original identifier
belonging to the application that created the document. belonging to the application that created the document.
""" """
@ -385,22 +388,29 @@ class OpenDocument:
self.meta.addElement(meta.Generator(text=TOOLSVERSION)) self.meta.addElement(meta.Generator(text=TOOLSVERSION))
def save(self, outputfile, addsuffix=False): def save(self, outputfile, addsuffix=False):
""" Save the document under the filename """ """ Save the document under the filename.
If the filename is '-' then save to stdout
"""
if outputfile == '-': if outputfile == '-':
outputfp = zipfile.ZipFile(sys.stdout,"w") outputfp = zipfile.ZipFile(sys.stdout,"w")
else: else:
if addsuffix: if addsuffix:
outputfile = outputfile + odmimetypes.get(self.mimetype,'.xxx') outputfile = outputfile + odmimetypes.get(self.mimetype,'.xxx')
outputfp = zipfile.ZipFile(outputfile, "w") outputfp = zipfile.ZipFile(outputfile, "w")
self._zipwrite(outputfp) self.__zipwrite(outputfp)
outputfp.close() outputfp.close()
def write(self, outputfp): def write(self, outputfp):
""" User API to write the ODF file to an open file descriptor
Writes the ZIP format
"""
zipoutputfp = zipfile.ZipFile(outputfp,"w") zipoutputfp = zipfile.ZipFile(outputfp,"w")
self._zipwrite(zipoutputfp) self.__zipwrite(zipoutputfp)
def _zipwrite(self, outputfp): def __zipwrite(self, outputfp):
""" Write the document to an open file pointer """ """ Write the document to an open file pointer
This is where the real work is done
"""
self._z = outputfp self._z = outputfp
self._now = time.localtime()[:6] self._now = time.localtime()[:6]
self.manifest = manifest.Manifest() self.manifest = manifest.Manifest()
@ -438,7 +448,7 @@ class OpenDocument:
zi = zipfile.ZipInfo("META-INF/manifest.xml", self._now) zi = zipfile.ZipInfo("META-INF/manifest.xml", self._now)
zi.compress_type = zipfile.ZIP_DEFLATED zi.compress_type = zipfile.ZIP_DEFLATED
zi.external_attr = UNIXPERMS zi.external_attr = UNIXPERMS
self._z.writestr(zi, self.manifestxml() ) self._z.writestr(zi, self.__manifestxml() )
del self._z del self._z
del self._now del self._now
del self.manifest del self.manifest
@ -464,8 +474,8 @@ class OpenDocument:
self._z.writestr(zi, object.contentxml() ) self._z.writestr(zi, object.contentxml() )
# Write settings # Write settings
if self == object and self.settings.hasChildNodes(): if object.settings.hasChildNodes():
self.manifest.addElement(manifest.FileEntry(fullpath="settings.xml",mediatype="text/xml")) self.manifest.addElement(manifest.FileEntry(fullpath="%ssettings.xml" % folder, mediatype="text/xml"))
zi = zipfile.ZipInfo("%ssettings.xml" % folder, self._now) zi = zipfile.ZipInfo("%ssettings.xml" % folder, self._now)
zi.compress_type = zipfile.ZIP_DEFLATED zi.compress_type = zipfile.ZIP_DEFLATED
zi.external_attr = UNIXPERMS zi.external_attr = UNIXPERMS
@ -473,7 +483,7 @@ class OpenDocument:
# Write meta # Write meta
if self == object: if self == object:
self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml",mediatype="text/xml")) self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml", mediatype="text/xml"))
zi = zipfile.ZipInfo("meta.xml", self._now) zi = zipfile.ZipInfo("meta.xml", self._now)
zi.compress_type = zipfile.ZIP_DEFLATED zi.compress_type = zipfile.ZIP_DEFLATED
zi.external_attr = UNIXPERMS zi.external_attr = UNIXPERMS
@ -497,6 +507,7 @@ class OpenDocument:
return element.Text(data) return element.Text(data)
def createCDATASection(self, data): def createCDATASection(self, data):
""" Method to create a CDATA section """
return element.CDATASection(cdata) return element.CDATASection(cdata)
def getMediaType(self): def getMediaType(self):
@ -504,12 +515,14 @@ class OpenDocument:
return self.mimetype return self.mimetype
def getStyleByName(self, name): def getStyleByName(self, name):
""" Finds a style object based on the name """
ncname = make_NCName(name) ncname = make_NCName(name)
if self._styles_dict == {}: if self._styles_dict == {}:
self.rebuild_caches() self.rebuild_caches()
return self._styles_dict.get(ncname, None) return self._styles_dict.get(ncname, None)
def getElementsByType(self, element): def getElementsByType(self, element):
""" Gets elements based on the type, which is function from text.py, draw.py etc. """
obj = element(check_grammar=False) obj = element(check_grammar=False)
if self.element_dict == {}: if self.element_dict == {}:
self.rebuild_caches() self.rebuild_caches()
@ -517,53 +530,59 @@ class OpenDocument:
# Convenience functions # Convenience functions
def OpenDocumentChart(): def OpenDocumentChart():
""" Creates a chart document """
doc = OpenDocument('application/vnd.oasis.opendocument.chart') doc = OpenDocument('application/vnd.oasis.opendocument.chart')
doc.chart = Chart() doc.chart = Chart()
doc.body.addElement(doc.chart) doc.body.addElement(doc.chart)
return doc return doc
def OpenDocumentDrawing(): def OpenDocumentDrawing():
""" Creates a drawing document """
doc = OpenDocument('application/vnd.oasis.opendocument.graphics') doc = OpenDocument('application/vnd.oasis.opendocument.graphics')
doc.drawing = Drawing() doc.drawing = Drawing()
doc.body.addElement(doc.drawing) doc.body.addElement(doc.drawing)
return doc return doc
def OpenDocumentImage(): def OpenDocumentImage():
""" Creates an image document """
doc = OpenDocument('application/vnd.oasis.opendocument.image') doc = OpenDocument('application/vnd.oasis.opendocument.image')
doc.image = Image() doc.image = Image()
doc.body.addElement(doc.image) doc.body.addElement(doc.image)
return doc return doc
def OpenDocumentPresentation(): def OpenDocumentPresentation():
""" Creates a presentation document """
doc = OpenDocument('application/vnd.oasis.opendocument.presentation') doc = OpenDocument('application/vnd.oasis.opendocument.presentation')
doc.presentation = Presentation() doc.presentation = Presentation()
doc.body.addElement(doc.presentation) doc.body.addElement(doc.presentation)
return doc return doc
def OpenDocumentSpreadsheet(): def OpenDocumentSpreadsheet():
""" Creates a spreadsheet document """
doc = OpenDocument('application/vnd.oasis.opendocument.spreadsheet') doc = OpenDocument('application/vnd.oasis.opendocument.spreadsheet')
doc.spreadsheet = Spreadsheet() doc.spreadsheet = Spreadsheet()
doc.body.addElement(doc.spreadsheet) doc.body.addElement(doc.spreadsheet)
return doc return doc
def OpenDocumentText(): def OpenDocumentText():
""" Creates a text document """
doc = OpenDocument('application/vnd.oasis.opendocument.text') doc = OpenDocument('application/vnd.oasis.opendocument.text')
doc.text = Text() doc.text = Text()
doc.body.addElement(doc.text) doc.body.addElement(doc.text)
return doc return doc
def OpenDocumentTextMaster():
""" Creates a text master document """
doc = OpenDocument('application/vnd.oasis.opendocument.text-master')
doc.text = Text()
doc.body.addElement(doc.text)
return doc
def load(odffile): def __loadxmlparts(z, manifest, doc, objectpath):
from load import LoadParser from load import LoadParser
from xml.sax import make_parser, handler from xml.sax import make_parser, handler
z = zipfile.ZipFile(odffile)
mimetype = z.read('mimetype')
doc = OpenDocument(mimetype, add_generator=False)
# Look in the manifest file to see if which of the four files there are for xmlfile in (objectpath+'settings.xml', objectpath+'meta.xml', objectpath+'content.xml', objectpath+'styles.xml'):
manifestpart = z.read('META-INF/manifest.xml')
manifest = manifestlist(manifestpart)
for xmlfile in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'):
if not manifest.has_key(xmlfile): if not manifest.has_key(xmlfile):
continue continue
try: try:
@ -580,7 +599,19 @@ def load(odffile):
parser.parse(inpsrc) parser.parse(inpsrc)
del doc._parsing del doc._parsing
except KeyError, v: pass except KeyError, v: pass
# FIXME: Add subobjects correctly here
def load(odffile):
""" Load an ODF file into memory
Returns a reference to the structure
"""
z = zipfile.ZipFile(odffile)
mimetype = z.read('mimetype')
doc = OpenDocument(mimetype, add_generator=False)
# Look in the manifest file to see if which of the four files there are
manifestpart = z.read('META-INF/manifest.xml')
manifest = manifestlist(manifestpart)
__loadxmlparts(z, manifest, doc, '')
for mentry,mvalue in manifest.items(): for mentry,mvalue in manifest.items():
if mentry[:9] == "Pictures/" and len(mentry) > 9: if mentry[:9] == "Pictures/" and len(mentry) > 9:
doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry)) doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry))
@ -588,6 +619,13 @@ def load(odffile):
doc.addThumbnail(z.read(mentry)) doc.addThumbnail(z.read(mentry))
elif mentry in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'): elif mentry in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'):
pass pass
# Load subobjects into structure
elif mentry[:7] == "Object " and len(mentry) < 11 and mentry[-1] == "/":
subdoc = OpenDocument(mvalue['media-type'], add_generator=False)
doc.addObject(subdoc, "/" + mentry[:-1])
__loadxmlparts(z, manifest, subdoc, mentry)
elif mentry[:7] == "Object ":
pass # Don't load subobjects as opaque objects
else: else:
if mvalue['full-path'][-1] == '/': if mvalue['full-path'][-1] == '/':
doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None)) doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None))
@ -612,4 +650,5 @@ def load(odffile):
elif mimetype[:42] == 'application/vnd.oasis.opendocument.formula': elif mimetype[:42] == 'application/vnd.oasis.opendocument.formula':
doc.formula = b[0].firstChild doc.formula = b[0].firstChild
return doc return doc
# vim: set expandtab sw=4 : # vim: set expandtab sw=4 :