Sync to trunk.

This commit is contained in:
John Schember 2011-02-24 18:46:46 -05:00
commit 6d568aee69
56 changed files with 1830 additions and 483 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 800 B

View File

@ -0,0 +1,68 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.20minutos.es
'''
from calibre.web.feeds.news import BasicNewsRecipe
class t20Minutos(BasicNewsRecipe):
title = '20 Minutos'
__author__ = 'Darko Miletic'
description = 'Diario de informacion general y local mas leido de Espania, noticias de ultima hora de Espania, el mundo, local, deportes, noticias curiosas y mas'
publisher = '20 Minutos Online SL'
category = 'news, politics, Spain'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = True
language = 'es'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://estaticos.20minutos.es/css4/img/ui/logo-301x54.png'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags = [dict(attrs={'class':'mf-viral'})]
remove_attributes=['border']
feeds = [
(u'Principal' , u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss')
,(u'Cine' , u'http://20minutos.feedsportal.com/c/32489/f/478285/index.rss')
,(u'Internacional' , u'http://20minutos.feedsportal.com/c/32489/f/492689/index.rss')
,(u'Deportes' , u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss')
,(u'Nacional' , u'http://20minutos.feedsportal.com/c/32489/f/492688/index.rss')
,(u'Economia' , u'http://20minutos.feedsportal.com/c/32489/f/492690/index.rss')
,(u'Tecnologia' , u'http://20minutos.feedsportal.com/c/32489/f/478292/index.rss')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -32,16 +32,25 @@ class Adevarul(BasicNewsRecipe):
}
keep_only_tags = [ dict(name='div', attrs={'class':'article_header'})
,dict(name='div', attrs={'class':'bd'})
,dict(name='div', attrs={'class':'bb-tu first-t bb-article-body'})
]
remove_tags = [ dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
remove_tags = [
dict(name='li', attrs={'class':'author'})
,dict(name='li', attrs={'class':'date'})
,dict(name='li', attrs={'class':'comments'})
,dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'})
,dict(name='form', attrs={'id':'bb-comment-create-form'})
]
,dict(name='form', attrs={'id':'bb-comment-create-form'})
,dict(name='div', attrs={'id':'mediatag'})
,dict(name='div', attrs={'id':'ft'})
,dict(name='div', attrs={'id':'comment_wrapper'})
]
remove_tags_after = [ dict(name='form', attrs={'id':'bb-comment-create-form'}) ]
remove_tags_after = [
dict(name='div', attrs={'id':'comment_wrapper'}),
]
feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ]

View File

@ -41,7 +41,8 @@ class ESPN(BasicNewsRecipe):
'''
feeds = [('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'),
feeds = [
('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'),
'http://sports.espn.go.com/espn/rss/nfl/news',
'http://sports.espn.go.com/espn/rss/nba/news',
'http://sports.espn.go.com/espn/rss/mlb/news',
@ -107,10 +108,11 @@ class ESPN(BasicNewsRecipe):
if match and 'soccernet' not in url and 'bassmaster' not in url:
return 'http://sports.espn.go.com/espn/print?'+match.group(1)+'&type=story'
else:
if match and 'soccernet' in url:
splitlist = url.split("&", 5)
newurl = 'http://soccernet.espn.go.com/print?'+match.group(1)+'&type=story' + '&' + str(splitlist[2] )
return newurl
if 'soccernet' in url:
match = re.search(r'/id/(\d+)/', url)
if match:
return \
'http://soccernet.espn.go.com/print?id=%s&type=story' % match.group(1)
#else:
# if 'bassmaster' in url:
# return url

View File

@ -0,0 +1,48 @@
__license__ = 'GPL v3'
__author__ = 'Ricardo Jurado'
__copyright__ = 'Ricardo Jurado'
__version__ = 'v0.1'
__date__ = '22 February 2011'
'''
http://blog.flickr.net/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1297031650(BasicNewsRecipe):
title = u'Flickr Blog'
masthead_url = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
cover_url = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
publisher = u''
__author__ = 'Ricardo Jurado'
description = 'Pictures Blog'
category = 'Blog,Pictures'
oldest_article = 120
max_articles_per_feed = 10
no_stylesheets = True
use_embedded_content = False
encoding = 'UTF-8'
remove_javascript = True
language = 'en'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h2{font-family: sans-serif; font-size:130%; font-weight:bold; text-align: justify; }
.published{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
.posted{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
"""
keep_only_tags = [
dict(name='div', attrs={'class':'entry'})
]
feeds = [
(u'BLOG', u'http://feeds.feedburner.com/Flickrblog'),
#(u'BLOG', u'http://blog.flickr.net/es/feed/atom/')
]

View File

@ -0,0 +1,47 @@
__license__ = 'GPL v3'
__author__ = 'Ricardo Jurado'
__copyright__ = 'Ricardo Jurado'
__version__ = 'v0.1'
__date__ = '22 February 2011'
'''
http://blog.flickr.net/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1297031650(BasicNewsRecipe):
title = u'Flickr Blog'
masthead_url = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
cover_url = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
publisher = u''
__author__ = 'Ricardo Jurado'
description = 'Pictures Blog'
category = 'Blog,Pictures'
oldest_article = 120
max_articles_per_feed = 10
no_stylesheets = True
use_embedded_content = False
encoding = 'UTF-8'
remove_javascript = True
language = 'es'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h2{font-family: sans-serif; font-size:130%; font-weight:bold; text-align: justify; }
.published{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
.posted{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
"""
keep_only_tags = [
dict(name='div', attrs={'class':'entry'})
]
feeds = [
(u'BLOG', u'http://blog.flickr.net/es/feed/atom/')
]

View File

@ -17,10 +17,9 @@ class Gizmodo(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
use_embedded_content = True
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
extra_css = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} '
conversion_options = {
'comment' : description
@ -29,13 +28,12 @@ class Gizmodo(BasicNewsRecipe):
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/vip?format=xml')]
remove_tags = [
{'class': 'feedflare'},
]
feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,20 +1,43 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
gsp.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1286351181(BasicNewsRecipe):
title = u'gsp.ro'
__author__ = 'bucsie'
oldest_article = 2
class GSP(BasicNewsRecipe):
title = u'Gazeta Sporturilor'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Gazeta Sporturilor'
publisher = u'Gazeta Sporturilor'
category = 'Ziare,Sport,Stiri,Romania'
oldest_article = 5
max_articles_per_feed = 100
language='ro'
cover_url ='http://www.gsp.ro/images/sigla_rosu.jpg'
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://www.gsp.ro/images/logo.jpg'
remove_tags = [
dict(name='div', attrs={'class':['related_articles', 'articol_noteaza straight_line dotted_line_top', 'comentarii','mai_multe_articole']}),
dict(name='div', attrs={'id':'icons'})
]
remove_tags_after = dict(name='div', attrs={'id':'adoceanintactrovccmgpmnyt'})
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
feeds = [(u'toate stirile', u'http://www.gsp.ro/index.php?section=section&screen=rss')]
keep_only_tags = [ dict(name='h1', attrs={'class':'serif title_2'})
,dict(name='div', attrs={'id':'only_text'})
,dict(name='span', attrs={'class':'block poza_principala'})
]
feeds = [ (u'\u0218tiri', u'http://www.gsp.ro/rss.xml') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)
def print_version(self, url):
return 'http://www1.gsp.ro/print/' + url[(url.rindex('/')+1):]

View File

@ -16,15 +16,9 @@ class Lifehacker(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
use_embedded_content = True
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
'''
conversion_options = {
'comment' : description
, 'tags' : category
@ -32,20 +26,12 @@ class Lifehacker(BasicNewsRecipe):
, 'language' : language
}
remove_attributes = ['width', 'height', 'style']
remove_tags_before = dict(name='h1')
keep_only_tags = [dict(id='container')]
remove_tags_after = dict(attrs={'class':'post-body'})
remove_tags = [
dict(id="sharemenu"),
{'class': 'related'},
{'class': 'feedflare'},
]
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')]
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')]
def preprocess_html(self, soup):
return self.adeify_images(soup)
def print_version(self, url):
return url.replace('#!', '?_escaped_fragment_=')

View File

@ -88,8 +88,8 @@ class NYTimes(BasicNewsRecipe):
if headlinesOnly:
title='New York Times Headlines'
description = 'Headlines from the New York Times'
needs_subscription = False
description = 'Headlines from the New York Times. Needs a subscription from http://www.nytimes.com'
needs_subscription = 'optional'
elif webEdition:
title='New York Times (Web)'
description = 'New York Times on the Web'

View File

@ -96,18 +96,18 @@ class NYTimes(BasicNewsRecipe):
if headlinesOnly:
title='New York Times Headlines'
description = 'Headlines from the New York Times'
needs_subscription = False
needs_subscription = True
elif webEdition:
title='New York Times (Web)'
description = 'New York Times on the Web'
needs_subscription = True
elif replaceKindleVersion:
title='The New York Times'
title='The New York Times'
description = 'Today\'s New York Times'
needs_subscription = True
else:
title='New York Times'
description = 'Today\'s New York Times'
description = 'Today\'s New York Times. Needs subscription from http://www.nytimes.com'
needs_subscription = True
@ -676,7 +676,7 @@ class NYTimes(BasicNewsRecipe):
if hlines:
for hline in hlines:
hline.extract()
#find all section headers
hlines = runAround.findAll('h6')
if hlines:

View File

@ -15,6 +15,7 @@
"template": "def evaluate(self, formatter, kwargs, mi, locals, template):\n template = template.replace('[[', '{').replace(']]', '}')\n return formatter.__class__().safe_format(template, kwargs, 'TEMPLATE', mi)\n",
"print": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n print args\n return None\n",
"titlecase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return titlecase(val)\n",
"sublist": "def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):\n if not val:\n return ''\n si = int(start_index)\n ei = int(end_index)\n val = val.split(sep)\n try:\n if ei == 0:\n return sep.join(val[si:])\n else:\n return sep.join(val[si:ei])\n except:\n return ''\n",
"test": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):\n if val:\n return value_if_set\n else:\n return value_not_set\n",
"eval": "def evaluate(self, formatter, kwargs, mi, locals, template):\n from formatter import eval_formatter\n template = template.replace('[[', '{').replace(']]', '}')\n return eval_formatter.safe_format(template, locals, 'EVAL', None)\n",
"multiply": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x * y)\n",

View File

@ -391,11 +391,6 @@ noembed, param, link {
display: none;
}
/* Page breaks at body tags, to help out with LIT-generation */
body {
page-break-before: always;
}
/* Explicit line-breaks are blocks, sure... */
br {
display: block;

View File

@ -571,7 +571,7 @@ from calibre.devices.binatone.driver import README
from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
from calibre.devices.edge.driver import EDGE
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER
from calibre.devices.sne.driver import SNE
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, \
@ -679,7 +679,7 @@ plugins += [
ELONEX,
TECLAST_K3,
NEWSMY,
PICO, SUNSTECH_EB700, ARCHOS7O, SOVOS, STASH,
PICO, SUNSTECH_EB700, ARCHOS7O, SOVOS, STASH, WEXLER,
IPAPYRUS,
EDGE,
SNE,

View File

@ -594,7 +594,7 @@ def main(args=sys.argv):
if remove_plugin(opts.remove_plugin):
print 'Plugin removed'
else:
print 'No custom pluginnamed', opts.remove_plugin
print 'No custom plugin named', opts.remove_plugin
if opts.customize_plugin is not None:
name, custom = opts.customize_plugin.split(',')
plugin = find_plugin(name.strip())

View File

@ -74,6 +74,9 @@ class ANDROID(USBMS):
# T-Mobile
0x0408 : { 0x03ba : [0x0109], },
# Xperia
0x13d3 : { 0x3304 : [0x0001, 0x0002] },
}
EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books']
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
@ -83,7 +86,7 @@ class ANDROID(USBMS):
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
'TELECHIP', 'HUAWEI', 'T-MOBILE', ]
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',

View File

@ -78,9 +78,13 @@ class KOBO(USBMS):
else self._main_prefix
# Determine the firmware version
f = open(self.normalize_path(self._main_prefix + '.kobo/version'), 'r')
self.fwversion = f.readline().split(',')[2]
f.close()
try:
with open(self.normalize_path(self._main_prefix + '.kobo/version'),
'rb') as f:
self.fwversion = f.readline().split(',')[2]
except:
self.fwversion = 'unknown'
if self.fwversion != '1.0' and self.fwversion != '1.4':
self.has_kepubs = True
debug_print('Version of firmware: ', self.fwversion, 'Has kepubs:', self.has_kepubs)
@ -161,7 +165,7 @@ class KOBO(USBMS):
return changed
connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
# return bytestrings if the content cannot the decoded as unicode
connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
@ -234,7 +238,7 @@ class KOBO(USBMS):
debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
# return bytestrings if the content cannot the decoded as unicode
connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
@ -511,7 +515,7 @@ class KOBO(USBMS):
# the last book from the collection the list of books is empty
# and the removal of the last book would not occur
connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
# return bytestrings if the content cannot the decoded as unicode
connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")

View File

@ -104,3 +104,14 @@ class STASH(TECLAST_K3):
VENDOR_NAME = 'STASH'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'W950'
class WEXLER(TECLAST_K3):
name = 'Wexler device interface'
gui_name = 'Wexler'
description = _('Communicate with the Wexler reader.')
FORMATS = ['epub', 'fb2', 'pdf', 'txt']
VENDOR_NAME = 'WEXLER'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'T7001'

View File

@ -304,6 +304,10 @@ class ComicInput(InputFormatPlugin):
help=_('Specify the image size as widthxheight pixels. Normally,'
' an image size is automatically calculated from the output '
'profile, this option overrides it.')),
OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
help=_('When converting a CBC do not add links to each page to'
' the TOC. Note this only applies if the TOC has more than one'
' section')),
])
recommendations = set([
@ -449,10 +453,11 @@ class ComicInput(InputFormatPlugin):
wrappers = comic[2]
stoc = toc.add_item(href(wrappers[0]),
None, comic[0], play_order=po)
for i, x in enumerate(wrappers):
stoc.add_item(href(x), None,
_('Page')+' %d'%(i+1), play_order=po)
po += 1
if not opts.dont_add_comic_pages_to_toc:
for i, x in enumerate(wrappers):
stoc.add_item(href(x), None,
_('Page')+' %d'%(i+1), play_order=po)
po += 1
opf.set_toc(toc)
m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
opf.render(m, n, 'toc.ncx')

View File

@ -984,7 +984,9 @@ OptionRecommendation(name='sr3_replace',
flattener = CSSFlattener(fbase=fbase, fkey=fkey,
lineh=line_height,
untable=self.output_plugin.file_type in ('mobi','lit'),
unfloat=self.output_plugin.file_type in ('mobi', 'lit'))
unfloat=self.output_plugin.file_type in ('mobi', 'lit'),
page_break_on_body=self.output_plugin.file_type in ('mobi',
'lit'))
flattener(self.oeb, self.opts)
self.opts.insert_blank_line = oibl
self.opts.remove_paragraph_spacing = orps

View File

@ -22,7 +22,8 @@ class LITOutput(OutputFormatPlugin):
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
from calibre.ebooks.lit.writer import LitWriter
from calibre.ebooks.oeb.transforms.split import Split
split = Split(split_on_page_breaks=True, max_flow_size=0)
split = Split(split_on_page_breaks=True, max_flow_size=0,
remove_css_pagebreaks=False)
split(self.oeb, self.opts)

View File

@ -83,6 +83,10 @@ CALIBRE_METADATA_FIELDS = frozenset([
'application_id', # An application id, currently set to the db_id.
'db_id', # the calibre primary key of the item.
'formats', # list of formats (extensions) for this book
# a dict of user category names, where the value is a list of item names
# from the book that are in that category
'user_categories',
]
)

View File

@ -30,6 +30,7 @@ NULL_VALUES = {
'author_sort_map': {},
'authors' : [_('Unknown')],
'title' : _('Unknown'),
'user_categories' : {},
'language' : 'und'
}

View File

@ -470,6 +470,13 @@ def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8))
metadata_elem.append(meta)
def dump_user_categories(cats):
if not cats:
cats = {}
from calibre.ebooks.metadata.book.json_codec import object_to_unicode
return json.dumps(object_to_unicode(cats), ensure_ascii=False,
skipkeys=True)
class OPF(object): # {{{
MIMETYPE = 'application/oebps-package+xml'
@ -524,6 +531,9 @@ class OPF(object): # {{{
publication_type = MetadataField('publication_type', is_dc=False)
timestamp = MetadataField('timestamp', is_dc=False,
formatter=parse_date, renderer=isoformat)
user_categories = MetadataField('user_categories', is_dc=False,
formatter=json.loads,
renderer=dump_user_categories)
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
@ -994,7 +1004,7 @@ class OPF(object): # {{{
for attr in ('title', 'authors', 'author_sort', 'title_sort',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'category', 'comments',
'pubdate'):
'pubdate', 'user_categories'):
val = getattr(mi, attr, None)
if val is not None and val != [] and val != (None, None):
setattr(self, attr, val)
@ -1175,6 +1185,10 @@ class OPFCreator(Metadata):
a(CAL_ELEM('calibre:timestamp', self.timestamp.isoformat()))
if self.publication_type is not None:
a(CAL_ELEM('calibre:publication_type', self.publication_type))
if self.user_categories:
from calibre.ebooks.metadata.book.json_codec import object_to_unicode
a(CAL_ELEM('calibre:user_categories',
json.dumps(object_to_unicode(self.user_categories))))
manifest = E.manifest()
if self.manifest is not None:
for ref in self.manifest:
@ -1299,6 +1313,8 @@ def metadata_to_opf(mi, as_string=True):
meta('publication_type', mi.publication_type)
if mi.title_sort:
meta('title_sort', mi.title_sort)
if mi.user_categories:
meta('user_categories', dump_user_categories(mi.user_categories))
serialize_user_metadata(metadata, mi.get_all_user_metadata(False))

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
import re, threading
from calibre.customize import Plugin
from calibre.utils.logging import ThreadSafeLog, FileStream
@ -30,7 +30,21 @@ class Source(Plugin):
touched_fields = frozenset()
def __init__(self, *args, **kwargs):
Plugin.__init__(self, *args, **kwargs)
self._isbn_to_identifier_cache = {}
self.cache_lock = threading.RLock()
# Utility functions {{{
def cache_isbn_to_identifier(self, isbn, identifier):
with self.cache_lock:
self._isbn_to_identifier_cache[isbn] = identifier
def cached_isbn_to_identifier(self, isbn):
with self.cache_lock:
return self._isbn_to_identifier_cache.get(isbn, None)
def get_author_tokens(self, authors, only_first_author=True):
'''
Take a list of authors and return a list of tokens useful for an

View File

@ -13,6 +13,7 @@ from functools import partial
from lxml import etree
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.chardet import xml_to_unicode
@ -69,6 +70,7 @@ def to_metadata(browser, log, entry_, timeout):
id_url = entry_id(entry_)[0].text
google_id = id_url.split('/')[-1]
title_ = ': '.join([x.text for x in title(entry_)]).strip()
authors = [x.text.strip() for x in creator(entry_) if x.text]
if not authors:
@ -78,6 +80,7 @@ def to_metadata(browser, log, entry_, timeout):
return None
mi = Metadata(title_, authors)
mi.identifiers = {'google':google_id}
try:
raw = get_details(browser, id_url, timeout)
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
@ -103,9 +106,12 @@ def to_metadata(browser, log, entry_, timeout):
t = str(x.text).strip()
if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
if t[:5].upper() == 'ISBN:':
isbns.append(t[5:])
t = check_isbn(t[5:])
if t:
isbns.append(t)
if isbns:
mi.isbn = sorted(isbns, key=len)[-1]
mi.all_isbns = isbns
# Tags
try:
@ -133,20 +139,6 @@ def to_metadata(browser, log, entry_, timeout):
return mi
def get_all_details(br, log, entries, abort, result_queue, timeout):
for i in entries:
try:
ans = to_metadata(br, log, i, timeout)
if isinstance(ans, Metadata):
result_queue.put(ans)
except:
log.exception(
'Failed to get metadata for identify entry:',
etree.tostring(i))
if abort.is_set():
break
class GoogleBooks(Source):
name = 'Google Books'
@ -185,6 +177,36 @@ class GoogleBooks(Source):
'min-viewability':'none',
})
def cover_url_from_identifiers(self, identifiers):
goog = identifiers.get('google', None)
if goog is None:
isbn = identifiers.get('isbn', None)
goog = self.cached_isbn_to_identifier(isbn)
if goog is not None:
return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' %
goog)
def is_cover_image_valid(self, raw):
# When no cover is present, returns a PNG saying image not available
# Try for example google identifier llNqPwAACAAJ
# I have yet to see an actual cover in PNG format
return raw and len(raw) > 17000 and raw[1:4] != 'PNG'
def get_all_details(self, br, log, entries, abort, result_queue, timeout):
for i in entries:
try:
ans = to_metadata(br, log, i, timeout)
if isinstance(ans, Metadata):
result_queue.put(ans)
for isbn in ans.all_isbns:
self.cache_isbn_to_identifier(isbn,
ans.identifiers['google'])
except:
log.exception(
'Failed to get metadata for identify entry:',
etree.tostring(i))
if abort.is_set():
break
def identify(self, log, result_queue, abort, title=None, authors=None,
identifiers={}, timeout=5):
@ -207,8 +229,8 @@ class GoogleBooks(Source):
return as_unicode(e)
# There is no point running these queries in threads as google
# throttles requests returning Forbidden errors
get_all_details(br, log, entries, abort, result_queue, timeout)
# throttles requests returning 403 Forbidden errors
self.get_all_details(br, log, entries, abort, result_queue, timeout)
return None
@ -218,8 +240,14 @@ if __name__ == '__main__':
title_test)
test_identify_plugin(GoogleBooks.name,
[
(
{'title': 'Great Expectations', 'authors':['Charles Dickens']},
[title_test('Great Expectations', exact=True)]
{'identifiers':{'isbn': '0743273567'}},
[title_test('The great gatsby', exact=True)]
),
#(
# {'title': 'Great Expectations', 'authors':['Charles Dickens']},
# [title_test('Great Expectations', exact=True)]
#),
])

View File

@ -242,9 +242,11 @@ class MobiReader(object):
self.debug = debug
self.embedded_mi = None
self.base_css_rules = textwrap.dedent('''
blockquote { margin: 0em 0em 0em 2em; text-align: justify }
body { text-align: justify }
p { margin: 0em; text-align: justify; text-indent: 1.5em }
blockquote { margin: 0em 0em 0em 2em; }
p { margin: 0em; text-indent: 1.5em }
.bold { font-weight: bold }

View File

@ -2256,22 +2256,22 @@ class MobiWriter(object):
return sectionIndices, sectionParents
def _generate_section_article_indices(self, i, section, entries, sectionIndices, sectionParents):
sectionArticles = list(section.iter())[1:]
# Iterate over the section's articles
sectionArticles = list(section.iter())[1:]
# Iterate over the section's articles
for (j, article) in enumerate(sectionArticles):
# Recompute offset and length for each article
offset, length = self._compute_offset_length(i, article, entries)
if self.opts.verbose > 2 :
self._oeb.logger.info( "article %02d: offset = 0x%06X length = 0x%06X" % (j, offset, length) )
for (j, article) in enumerate(sectionArticles):
# Recompute offset and length for each article
offset, length = self._compute_offset_length(i, article, entries)
if self.opts.verbose > 2 :
self._oeb.logger.info( "article %02d: offset = 0x%06X length = 0x%06X" % (j, offset, length) )
ctoc_map_index = i + j + 1
ctoc_map_index = i + j + 1
#hasAuthor = self._ctoc_map[ctoc_map_index].get('authorOffset')
#hasDescription = self._ctoc_map[ctoc_map_index].get('descriptionOffset')
mySectionParent = sectionParents[sectionIndices[i-1]]
myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index )
mySectionParent.addArticle( myNewArticle )
#hasAuthor = self._ctoc_map[ctoc_map_index].get('authorOffset')
#hasDescription = self._ctoc_map[ctoc_map_index].get('descriptionOffset')
mySectionParent = sectionParents[sectionIndices[i-1]]
myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index )
mySectionParent.addArticle( myNewArticle )
def _add_book_chapters(self, myDoc, indxt, indices):
chapterCount = myDoc.documentStructure.chapterCount()

View File

@ -32,6 +32,12 @@ class OEBOutput(OutputFormatPlugin):
for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
href, root = results.pop(key, [None, None])
if root is not None:
if key == OPF_MIME:
try:
self.workaround_nook_cover_bug(root)
except:
self.log.exception('Something went wrong while trying to'
' workaround Nook cover bug, ignoring')
raw = etree.tostring(root, pretty_print=True,
encoding='utf-8', xml_declaration=True)
if key == OPF_MIME:
@ -49,3 +55,24 @@ class OEBOutput(OutputFormatPlugin):
with open(path, 'wb') as f:
f.write(str(item))
item.unload_data_from_memory(memory=path)
def workaround_nook_cover_bug(self, root): # {{{
cov = root.xpath('//*[local-name() = "meta" and @name="cover" and'
' @content != "cover"]')
if len(cov) == 1:
manpath = ('//*[local-name() = "manifest"]/*[local-name() = "item" '
' and @id="%s" and @media-type]')
cov = cov[0]
covid = cov.get('content')
manifest_item = root.xpath(manpath%covid)
has_cover = root.xpath(manpath%'cover')
if len(manifest_item) == 1 and not has_cover and \
manifest_item[0].get('media-type',
'').startswith('image/'):
self.log.warn('The cover image has an id != "cover". Renaming'
' to work around Nook Color bug')
manifest_item = manifest_item[0]
manifest_item.set('id', 'cover')
cov.set('content', 'cover')
# }}}

View File

@ -100,12 +100,13 @@ def FontMapper(sbase=None, dbase=None, dkey=None):
class CSSFlattener(object):
def __init__(self, fbase=None, fkey=None, lineh=None, unfloat=False,
untable=False):
untable=False, page_break_on_body=False):
self.fbase = fbase
self.fkey = fkey
self.lineh = lineh
self.unfloat = unfloat
self.untable = untable
self.page_break_on_body = page_break_on_body
@classmethod
def config(cls, cfg):
@ -139,6 +140,8 @@ class CSSFlattener(object):
bs.append('margin-right : %fpt'%\
float(self.context.margin_right))
bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
if self.page_break_on_body:
bs.extend(['page-break-before: always'])
if self.context.change_justification != 'original':
bs.append('text-align: '+ self.context.change_justification)
body.set('style', '; '.join(bs))

View File

@ -38,11 +38,12 @@ class SplitError(ValueError):
class Split(object):
def __init__(self, split_on_page_breaks=True, page_breaks_xpath=None,
max_flow_size=0):
max_flow_size=0, remove_css_pagebreaks=True):
self.split_on_page_breaks = split_on_page_breaks
self.page_breaks_xpath = page_breaks_xpath
self.max_flow_size = max_flow_size
self.page_break_selectors = None
self.remove_css_pagebreaks = remove_css_pagebreaks
if self.page_breaks_xpath is not None:
self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)]
@ -83,12 +84,16 @@ class Split(object):
if before and before != 'avoid':
self.page_break_selectors.add((CSSSelector(rule.selectorText),
True))
if self.remove_css_pagebreaks:
rule.style.removeProperty('page-break-before')
except:
pass
try:
if after and after != 'avoid':
self.page_break_selectors.add((CSSSelector(rule.selectorText),
False))
if self.remove_css_pagebreaks:
rule.style.removeProperty('page-break-after')
except:
pass
page_breaks = set([])

View File

@ -22,7 +22,8 @@ class PluginWidget(Widget, Ui_Form):
['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left',
'despeckle', 'no_sort', 'no_process', 'landscape',
'dont_sharpen', 'disable_trim', 'wide', 'output_format',
'dont_grayscale', 'comic_image_size']
'dont_grayscale', 'comic_image_size',
'dont_add_comic_pages_to_toc']
)
self.db, self.book_id = db, book_id
for x in get_option('output_format').option.choices:

View File

@ -14,7 +14,7 @@
<string>Form</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="2" column="0">
<item row="3" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>&amp;Number of Colors:</string>
@ -24,7 +24,7 @@
</property>
</widget>
</item>
<item row="2" column="1">
<item row="3" column="1">
<widget class="QSpinBox" name="opt_colors">
<property name="minimum">
<number>8</number>
@ -37,70 +37,70 @@
</property>
</widget>
</item>
<item row="4" column="0">
<item row="5" column="0">
<widget class="QCheckBox" name="opt_dont_normalize">
<property name="text">
<string>Disable &amp;normalize</string>
</property>
</widget>
</item>
<item row="5" column="0">
<item row="6" column="0">
<widget class="QCheckBox" name="opt_keep_aspect_ratio">
<property name="text">
<string>Keep &amp;aspect ratio</string>
</property>
</widget>
</item>
<item row="6" column="0">
<item row="7" column="0">
<widget class="QCheckBox" name="opt_dont_sharpen">
<property name="text">
<string>Disable &amp;Sharpening</string>
</property>
</widget>
</item>
<item row="7" column="0">
<item row="8" column="0">
<widget class="QCheckBox" name="opt_disable_trim">
<property name="text">
<string>Disable &amp;Trimming</string>
</property>
</widget>
</item>
<item row="8" column="0">
<item row="9" column="0">
<widget class="QCheckBox" name="opt_wide">
<property name="text">
<string>&amp;Wide</string>
</property>
</widget>
</item>
<item row="9" column="0">
<item row="10" column="0">
<widget class="QCheckBox" name="opt_landscape">
<property name="text">
<string>&amp;Landscape</string>
</property>
</widget>
</item>
<item row="10" column="0">
<item row="11" column="0">
<widget class="QCheckBox" name="opt_right2left">
<property name="text">
<string>&amp;Right to left</string>
</property>
</widget>
</item>
<item row="11" column="0">
<item row="12" column="0">
<widget class="QCheckBox" name="opt_no_sort">
<property name="text">
<string>Don't so&amp;rt</string>
</property>
</widget>
</item>
<item row="12" column="0">
<item row="13" column="0">
<widget class="QCheckBox" name="opt_despeckle">
<property name="text">
<string>De&amp;speckle</string>
</property>
</widget>
</item>
<item row="14" column="0">
<item row="15" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -120,7 +120,7 @@
</property>
</widget>
</item>
<item row="13" column="0">
<item row="14" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>&amp;Output format:</string>
@ -130,7 +130,7 @@
</property>
</widget>
</item>
<item row="13" column="1">
<item row="14" column="1">
<widget class="QComboBox" name="opt_output_format"/>
</item>
<item row="1" column="0">
@ -140,7 +140,7 @@
</property>
</widget>
</item>
<item row="3" column="0">
<item row="4" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>Override image &amp;size:</string>
@ -150,9 +150,16 @@
</property>
</widget>
</item>
<item row="3" column="1">
<item row="4" column="1">
<widget class="QLineEdit" name="opt_comic_image_size"/>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="opt_dont_add_comic_pages_to_toc">
<property name="text">
<string>Don't add links to &amp;pages to the Table of Contents for CBC files</string>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>

View File

@ -44,7 +44,8 @@
<widget class="QLabel" name="msg">
<property name="text">
<string>&lt;p&gt;This book is locked by &lt;b&gt;DRM&lt;/b&gt;. To learn more about DRM and why you cannot read or convert this book in calibre,
&lt;a href=&quot;http://bugs.calibre-ebook.com/wiki/DRM&quot;&gt;click here&lt;/a&gt;.</string>
&lt;a href=&quot;http://drmfree.calibre-ebook.com/about#drm&quot;&gt;click here&lt;/a&gt;.&lt;p&gt;A large number of recent, DRM free releases are
available at &lt;a href=&quot;http://drmfree.calibre-ebook.com&quot;&gt;Open Books&lt;/a&gt;.</string>
</property>
<property name="wordWrap">
<bool>true</bool>

View File

@ -73,16 +73,17 @@ class TagCategories(QDialog, Ui_TagCategories):
if idx == 0:
continue
for n in category_values[idx]():
t = Item(name=n, label=label, index=len(self.all_items),icon=category_icons[idx], exists=True)
t = Item(name=n, label=label, index=len(self.all_items),
icon=category_icons[idx], exists=True)
self.all_items.append(t)
self.all_items_dict[label+':'+n] = t
self.all_items_dict[icu_lower(label+':'+n)] = t
self.categories = dict.copy(db.prefs.get('user_categories', {}))
if self.categories is None:
self.categories = {}
for cat in self.categories:
for item,l in enumerate(self.categories[cat]):
key = ':'.join([l[1], l[0]])
key = icu_lower(':'.join([l[1], l[0]]))
t = self.all_items_dict.get(key, None)
if l[1] in self.category_labels:
if t is None:
@ -231,6 +232,12 @@ class TagCategories(QDialog, Ui_TagCategories):
def accept(self):
self.save_category()
for cat in sorted(self.categories.keys(), key=sort_key):
components = cat.split('.')
for i in range(0,len(components)):
c = '.'.join(components[0:i+1])
if c not in self.categories:
self.categories[c] = []
QDialog.accept(self)
def save_category(self):

View File

@ -58,10 +58,12 @@ class TagListEditor(QDialog, Ui_TagListEditor):
self.to_rename = {}
self.to_delete = set([])
self.original_names = {}
self.all_tags = {}
for k,v in data:
self.all_tags[v] = k
self.original_names[k] = v
for tag in sorted(self.all_tags.keys(), key=key):
item = ListWidgetItem(tag)
item.setData(Qt.UserRole, self.all_tags[tag])

View File

@ -209,7 +209,6 @@ class EmailMixin(object): # {{{
def __init__(self):
self.emailer = Emailer(self.job_manager)
self.emailer.start()
def send_by_mail(self, to, fmts, delete_from_library, send_ids=None,
do_auto_convert=True, specific_format=None):
@ -255,6 +254,8 @@ class EmailMixin(object): # {{{
to_s = list(repeat(to, len(attachments)))
if attachments:
if not self.emailer.is_alive():
self.emailer.start()
self.emailer.send_mails(jobnames,
Dispatcher(partial(self.email_sent, remove=remove)),
attachments, to_s, subjects, texts, attachment_names)
@ -325,6 +326,8 @@ class EmailMixin(object): # {{{
files, auto = self.library_view.model().\
get_preferred_formats_from_ids([id_], fmts)
return files
if not self.emailer.is_alive():
self.emailer.start()
sent_mails = self.emailer.email_news(mi, remove,
get_fmts, self.email_sent)
if sent_mails:

View File

@ -7,17 +7,19 @@ __docformat__ = 'restructuredtext en'
from PyQt4.Qt import QApplication, QFont, QFontInfo, QFontDialog
from calibre.gui2.preferences import ConfigWidgetBase, test_widget
from calibre.gui2.preferences import ConfigWidgetBase, test_widget, CommaSeparatedList
from calibre.gui2.preferences.look_feel_ui import Ui_Form
from calibre.gui2 import config, gprefs, qt_app
from calibre.utils.localization import available_translations, \
get_language, get_lang
from calibre.utils.config import prefs
from calibre.utils.icu import sort_key
class ConfigWidget(ConfigWidgetBase, Ui_Form):
def genesis(self, gui):
self.gui = gui
db = gui.library_view.model().db
r = self.register
@ -61,6 +63,15 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
r('tags_browser_partition_method', gprefs, choices=choices)
r('tags_browser_collapse_at', gprefs)
choices = set([k for k in db.field_metadata.all_field_keys()
if db.field_metadata[k]['is_category'] and
db.field_metadata[k]['datatype'] in ['text', 'series', 'enumeration']])
choices -= set(['authors', 'publisher', 'formats', 'news'])
self.opt_categories_using_hierarchy.update_items_cache(choices)
r('categories_using_hierarchy', db.prefs, setting=CommaSeparatedList,
choices=sorted(list(choices), key=sort_key))
self.current_font = None
self.change_font_button.clicked.connect(self.change_font)

View File

@ -7,7 +7,7 @@
<x>0</x>
<y>0</y>
<width>670</width>
<height>392</height>
<height>422</height>
</rect>
</property>
<property name="windowTitle">
@ -136,7 +136,7 @@
<item>
<widget class="QLabel" name="label_6">
<property name="text">
<string>Tags browser category partitioning method:</string>
<string>Tags browser category &amp;partitioning method:</string>
</property>
<property name="buddy">
<cstring>opt_tags_browser_partition_method</cstring>
@ -157,7 +157,7 @@ if you never want subcategories</string>
<item>
<widget class="QLabel" name="label_6">
<property name="text">
<string>Collapse when more items than:</string>
<string>&amp;Collapse when more items than:</string>
</property>
<property name="buddy">
<cstring>opt_tags_browser_collapse_at</cstring>
@ -190,6 +190,28 @@ up into sub-categories. If the partition method is set to disable, this value is
</item>
</layout>
</item>
<item row="8" column="0">
<widget class="QLabel" name="label_81">
<property name="text">
<string>Categories with &amp;hierarchical items:</string>
</property>
<property name="buddy">
<cstring>opt_categories_using_hierarchy</cstring>
</property>
</widget>
</item>
<item row="8" column="1">
<widget class="MultiCompleteLineEdit" name="opt_categories_using_hierarchy">
<property name="toolTip">
<string>A comma-separated list of columns in which items containing
periods are displayed in the tag browser trees. For example, if
this box contains 'tags' then tags of the form 'Mystery.English'
and 'Mystery.Thriller' will be displayed with English and Thriller
both under 'Mystery'. If 'tags' is not in this box,
then the tags will be displayed each on their own line.</string>
</property>
</widget>
</item>
<item row="15" column="0" colspan="2">
<widget class="QGroupBox" name="groupBox_2">
<property name="title">
@ -275,6 +297,13 @@ up into sub-categories. If the partition method is set to disable, this value is
</item>
</layout>
</widget>
<customwidgets>
<customwidget>
<class>MultiCompleteLineEdit</class>
<extends>QLineEdit</extends>
<header>calibre/gui2/complete.h</header>
</customwidget>
</customwidgets>
<resources/>
<connections/>
</ui>

View File

@ -167,9 +167,10 @@ class StoreDownloadMixin(object):
def __init__(self):
self.store_downloader = StoreDownloader(self.job_manager)
self.store_downloader.start()
def download_from_store(self, url='', save_as_loc='', add_to_lib=True):
if not self.store_downloader.is_alive():
self.store_downloader.start()
self.store_downloader.download_from_store(Dispatcher(self.downloaded_from_store), self.library_view.model().db, url, save_as_loc, add_to_lib)
self.status_bar.show_message(_('Downloading') + ' ' + url, 3000)

File diff suppressed because it is too large Load Diff

View File

@ -635,7 +635,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
mb.stop()
self.hide_windows()
self.emailer.stop()
if self.emailer.is_alive():
self.emailer.stop()
try:
try:
if self.content_server is not None:

View File

@ -124,9 +124,16 @@ def _match(query, value, matchkind):
for t in value:
t = icu_lower(t)
try: ### ignore regexp exceptions, required because search-ahead tries before typing is finished
if ((matchkind == EQUALS_MATCH and query == t) or
(matchkind == REGEXP_MATCH and re.search(query, t, re.I)) or ### search unanchored
(matchkind == CONTAINS_MATCH and query in t)):
if (matchkind == EQUALS_MATCH):
if query[0] == '.':
if t.startswith(query[1:]):
ql = len(query) - 1
if (len(t) == ql) or (t[ql:ql+1] == '.'):
return True
elif query == t:
return True
elif ((matchkind == REGEXP_MATCH and re.search(query, t, re.I)) or ### search unanchored
(matchkind == CONTAINS_MATCH and query in t)):
return True
except re.error:
pass
@ -415,13 +422,25 @@ class ResultCache(SearchQueryParser): # {{{
if self.db_prefs is None:
return res
user_cats = self.db_prefs.get('user_categories', [])
if location not in user_cats:
return res
c = set(candidates)
for (item, category, ign) in user_cats[location]:
s = self.get_matches(category, '=' + item, candidates=c)
c -= s
res |= s
l = location.rfind('.')
if l > 0:
alt_loc = location[0:l]
alt_item = location[l+1:]
else:
alt_loc = None
for key in user_cats:
if key == location or key.startswith(location + '.'):
for (item, category, ign) in user_cats[key]:
s = self.get_matches(category, '=' + item, candidates=c)
c -= s
res |= s
elif key == alt_loc:
for (item, category, ign) in user_cats[key]:
if item == alt_item:
s = self.get_matches(category, '=' + item, candidates=c)
c -= s
res |= s
if query == 'false':
return candidates - res
return res

View File

@ -174,6 +174,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.prefs = DBPrefs(self)
defs = self.prefs.defaults
defs['gui_restriction'] = defs['cs_restriction'] = ''
defs['categories_using_hierarchy'] = []
# Migrate saved search and user categories to db preference scheme
def migrate_preference(key, default):
@ -812,6 +813,21 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
index_is_id=index_is_id),
extra=self.get_custom_extra(idx, label=meta['label'],
index_is_id=index_is_id))
user_cats = self.prefs['user_categories']
user_cat_vals = {}
for ucat in user_cats:
res = []
for name,cat,ign in user_cats[ucat]:
v = mi.get(cat, None)
if isinstance(v, list):
if name in v:
res.append([name,cat])
elif name == v:
res.append([name,cat])
user_cat_vals[ucat] = res
mi.user_categories = user_cat_vals
if get_cover:
mi.cover = self.cover(id, index_is_id=True, as_path=True)
return mi
@ -1406,7 +1422,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
# temporarily duplicating the categories lists.
taglist = {}
for c in categories.keys():
taglist[c] = dict(map(lambda t:(t.name, t), categories[c]))
taglist[c] = dict(map(lambda t:(icu_lower(t.name), t), categories[c]))
muc = self.prefs.get('grouped_search_make_user_categories', [])
gst = self.prefs.get('grouped_search_terms', {})
@ -1422,8 +1438,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
for user_cat in sorted(user_categories.keys(), key=sort_key):
items = []
for (name,label,ign) in user_categories[user_cat]:
if label in taglist and name in taglist[label]:
items.append(taglist[label][name])
n = icu_lower(name)
if label in taglist and n in taglist[label]:
items.append(taglist[label][n])
# else: do nothing, to not include nodes w zero counts
cat_name = '@' + user_cat # add the '@' to avoid name collision
# Not a problem if we accumulate entries in the icon map
@ -2434,7 +2451,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
stream.seek(0)
mi = get_metadata(stream, format, use_libprs_metadata=False)
stream.seek(0)
mi.series_index = 1.0
if not mi.series_index:
mi.series_index = 1.0
mi.tags = [_('News')]
if arg['add_title_tag']:
mi.tags += [arg['title']]

View File

@ -32,7 +32,7 @@ category_icon_map = {
'news' : 'news.png',
'tags' : 'tags.png',
'custom:' : 'column.png',
'user:' : 'drawer.png',
'user:' : 'tb_folder.png',
'search' : 'search.png'
}

View File

@ -413,6 +413,27 @@ The Book Details display shows you extra information and the cover for the curre
.. _jobs:
.. _tag_browser:
Tag Browser
-------------
.. image:: images/tag_browser.png
The Tag Browser allows you to easily browse your collection by Author/Tags/Series/etc. If you click on any Item in the Tag Browser, for example, the Author name, Isaac Asimov, then the list of books to the right is restricted to books by that author. Clicking once again on Isaac Asimov will restrict the list of books to books not by Isaac Asimov. A third click will remove the restriction. If you hold down the Ctrl or Shift keys and click on multiple items, then restrictions based on multiple items are created. For example you could Hold Ctrl and click on the tags History and Europe for find books on European history. The Tag Browser works by constructing search expressions that are automatically entered into the Search bar. It is a good way to learn how to construct basic search expressions.
There is a search bar at the top of the Tag Browser that allows you to easily find any item in the Tag Browser. In addition, you can right click on any item and choose to hide it or rename it or open a "Manage x" dialog that allows you to manage items of that kind. For example the "Manage Authors" dialog allows you to rename authors and control how their names are sorted.
For convenience, you can drag and drop books from the book list to items in the Tag Browser and that item will be automatically applied to the dropped books. For example, dragging a book to Isaac Asimov will set the author of that book to Isaac Asimov or dragging it to the tag History will add the tag History to its tags.
The outer-level items in the tag browser such as Authors and Series are called categories. You can create your own categories, called User Categories, which are useful for organizing items. For example, you can use the user categories editor (push the Manage User Categories button) to create a user category called Favorite Authors, then put the items for your favorites into the category. User categories act like built-in categories; you can click on items to search for them. You can search for all items in a category by right-clicking on the category name and choosing "Search for books in ...".
User categories can have sub-categories. For example, the user category Favorites.Authors is a sub-category of Favorites. You might also have Favorites.Series, in which case there will be two sub-categories under Favorites. Sub-categories can be created using Manage User Categories by entering names like the Favorites example. They can also be created by right-clicking on a user category, choosing "Add sub-category to ...", and entering the category name.
It is also possible to create hierarchies inside some of the built-in categories (the text categories). These hierarchies show with the small triangle permitting the sub-items to be hidden. To use hierarchies in a category, you must first go to Preferences / Look & Feel and enter the category name(s) into the "Categories with hierarchical items" box. Once this is done, items in that category that contain periods will be shown using the small triangle. For example, assume you create a custom column called "Genre" and indicate that it contains hierarchical items. Once done, items such as Mystery.Thriller and Mystery.English will display as Mystery with the small triangle next to it. Clicking on the triangle will show Thriller and English as sub-items.
You can drag and drop items in the Tag browser onto user categories to add them to that category.
Jobs
-----
.. image:: images/jobs.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

View File

@ -396,6 +396,34 @@ class BuiltinListitem(BuiltinFormatterFunction):
except:
return ''
class BuiltinSublist(BuiltinFormatterFunction):
name = 'sublist'
arg_count = 4
doc = _('sublist(val, start_index, end_index, separator) -- interpret the '
' value as a list of items separated by `separator`, returning a '
' new list made from the `start_index`th to the `end_index`th item. '
'The first item is number zero. If an index is negative, then it '
'counts from the end of the list. As a special case, an end_index '
'of zero is assumed to be the length of the list. Examples using '
'basic template mode and assuming a #genre value if A.B.C: '
'{#genre:sublist(-1,0,.)} returns C<br/>'
'{#genre:sublist(0,1,.)} returns A<br/>'
'{#genre:sublist(0,-1,.)} returns A.B')
def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):
if not val:
return ''
si = int(start_index)
ei = int(end_index)
val = val.split(sep)
try:
if ei == 0:
return sep.join(val[si:])
else:
return sep.join(val[si:ei])
except:
return ''
class BuiltinUppercase(BuiltinFormatterFunction):
name = 'uppercase'
arg_count = 1
@ -447,6 +475,7 @@ builtin_re = BuiltinRe()
builtin_shorten = BuiltinShorten()
builtin_strcat = BuiltinStrcat()
builtin_strcmp = BuiltinStrcmp()
builtin_sublist = BuiltinSublist()
builtin_substr = BuiltinSubstr()
builtin_subtract = BuiltinSubtract()
builtin_switch = BuiltinSwitch()

View File

@ -136,7 +136,7 @@ class FeedTemplate(Template):
head.append(STYLE(style, type='text/css'))
if extra_css:
head.append(STYLE(extra_css, type='text/css'))
body = BODY(style='page-break-before:always')
body = BODY()
body.append(self.get_navbar(f, feeds))
div = DIV(
@ -322,7 +322,7 @@ class TouchscreenFeedTemplate(Template):
head.append(STYLE(style, type='text/css'))
if extra_css:
head.append(STYLE(extra_css, type='text/css'))
body = BODY(style='page-break-before:always')
body = BODY()
div = DIV(
top_navbar,
H2(feed.title, CLASS('feed_title'))

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2008 Søren Roug, European Environment Agency
# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@ -40,6 +40,9 @@ def cnv_boolean(attribute, arg, element):
# Potentially accept color values
def cnv_color(attribute, arg, element):
""" A RGB color in conformance with §5.9.11 of [XSL], that is a RGB color in notation “#rrggbb”, where
rr, gg and bb are 8-bit hexadecimal digits.
"""
return str(arg)
def cnv_configtype(attribute, arg, element):
@ -55,9 +58,15 @@ def cnv_data_source_has_labels(attribute, arg, element):
# Understand different date formats
def cnv_date(attribute, arg, element):
""" A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
value.
"""
return str(arg)
def cnv_dateTime(attribute, arg, element):
""" A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
value.
"""
return str(arg)
def cnv_double(attribute, arg, element):
@ -67,11 +76,31 @@ def cnv_duration(attribute, arg, element):
return str(arg)
def cnv_family(attribute, arg, element):
""" A style family """
if str(arg) not in ("text", "paragraph", "section", "ruby", "table", "table-column", "table-row", "table-cell",
"graphic", "presentation", "drawing-page", "chart"):
raise ValueError, "'%s' not allowed" % str(arg)
return str(arg)
def __save_prefix(attribute, arg, element):
prefix = arg.split(':',1)[0]
if prefix == arg:
return unicode(arg)
namespace = element.get_knownns(prefix)
if namespace is None:
#raise ValueError, "'%s' is an unknown prefix" % str(prefix)
return unicode(arg)
p = element.get_nsprefix(namespace)
return unicode(arg)
def cnv_formula(attribute, arg, element):
""" A string containing a formula. Formulas do not have a predefined syntax, but the string should
begin with a namespace prefix, followed by a : (COLON, U+003A) separator, followed by the text
of the formula. The namespace bound to the prefix determines the syntax and semantics of the
formula.
"""
return __save_prefix(attribute, arg, element)
def cnv_ID(attribute, arg, element):
return str(arg)
@ -89,6 +118,9 @@ def cnv_legend_position(attribute, arg, element):
pattern_length = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px))')
def cnv_length(attribute, arg, element):
""" A (positive or negative) physical length, consisting of magnitude and unit, in conformance with the
Units of Measure defined in §5.9.13 of [XSL].
"""
global pattern_length
if not pattern_length.match(arg):
raise ValueError, "'%s' is not a valid length" % arg
@ -120,12 +152,12 @@ def cnv_namespacedToken(attribute, arg, element):
if not pattern_namespacedToken.match(arg):
raise ValueError, "'%s' is not a valid namespaced token" % arg
return arg
return __save_prefix(attribute, arg, element)
# Must accept string as argument
# NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName
# Essentially an XML name minus ':'
def cnv_NCName(attribute, arg, element):
""" NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName
Essentially an XML name minus ':'
"""
if type(arg) in types.StringTypes:
return make_NCName(arg)
else:
@ -226,6 +258,7 @@ attrconverters = {
((ANIMNS,u'name'), None): cnv_string,
((ANIMNS,u'sub-item'), None): cnv_string,
((ANIMNS,u'value'), None): cnv_string,
# ((DBNS,u'type'), None): cnv_namespacedToken,
((CHARTNS,u'attached-axis'), None): cnv_string,
((CHARTNS,u'class'), (CHARTNS,u'grid')): cnv_major_minor,
((CHARTNS,u'class'), None): cnv_namespacedToken,
@ -288,7 +321,7 @@ attrconverters = {
((CHARTNS,u'values-cell-range-address'), None): cnv_string,
((CHARTNS,u'vertical'), None): cnv_boolean,
((CHARTNS,u'visible'), None): cnv_boolean,
((CONFIGNS,u'name'), None): cnv_string,
((CONFIGNS,u'name'), None): cnv_formula,
((CONFIGNS,u'type'), None): cnv_configtype,
((DR3DNS,u'ambient-color'), None): cnv_string,
((DR3DNS,u'back-scale'), None): cnv_string,
@ -369,11 +402,11 @@ attrconverters = {
((DRAWNS,u'decimal-places'), None): cnv_string,
((DRAWNS,u'display'), None): cnv_string,
((DRAWNS,u'display-name'), None): cnv_string,
((DRAWNS,u'distance'), None): cnv_string,
((DRAWNS,u'distance'), None): cnv_lengthorpercent,
((DRAWNS,u'dots1'), None): cnv_integer,
((DRAWNS,u'dots1-length'), None): cnv_length,
((DRAWNS,u'dots1-length'), None): cnv_lengthorpercent,
((DRAWNS,u'dots2'), None): cnv_integer,
((DRAWNS,u'dots2-length'), None): cnv_length,
((DRAWNS,u'dots2-length'), None): cnv_lengthorpercent,
((DRAWNS,u'end-angle'), None): cnv_double,
((DRAWNS,u'end'), None): cnv_string,
((DRAWNS,u'end-color'), None): cnv_string,
@ -383,7 +416,7 @@ attrconverters = {
((DRAWNS,u'end-line-spacing-horizontal'), None): cnv_string,
((DRAWNS,u'end-line-spacing-vertical'), None): cnv_string,
((DRAWNS,u'end-shape'), None): cnv_IDREF,
((DRAWNS,u'engine'), None): cnv_string,
((DRAWNS,u'engine'), None): cnv_namespacedToken,
((DRAWNS,u'enhanced-path'), None): cnv_string,
((DRAWNS,u'escape-direction'), None): cnv_string,
((DRAWNS,u'extrusion-allowed'), None): cnv_boolean,
@ -604,7 +637,7 @@ attrconverters = {
((FORMNS,u'button-type'), None): cnv_string,
((FORMNS,u'command'), None): cnv_string,
((FORMNS,u'command-type'), None): cnv_string,
((FORMNS,u'control-implementation'), None): cnv_string,
((FORMNS,u'control-implementation'), None): cnv_namespacedToken,
((FORMNS,u'convert-empty-to-null'), None): cnv_boolean,
((FORMNS,u'current-selected'), None): cnv_boolean,
((FORMNS,u'current-state'), None): cnv_string,
@ -800,8 +833,8 @@ attrconverters = {
((PRESENTATIONNS,u'user-transformed'), None): cnv_boolean,
((PRESENTATIONNS,u'verb'), None): cnv_nonNegativeInteger,
((PRESENTATIONNS,u'visibility'), None): cnv_string,
((SCRIPTNS,u'event-name'), None): cnv_string,
((SCRIPTNS,u'language'), None): cnv_string,
((SCRIPTNS,u'event-name'), None): cnv_formula,
((SCRIPTNS,u'language'), None): cnv_formula,
((SCRIPTNS,u'macro-name'), None): cnv_string,
((SMILNS,u'accelerate'), None): cnv_double,
((SMILNS,u'accumulate'), None): cnv_string,
@ -1087,7 +1120,7 @@ attrconverters = {
((SVGNS,u'y2'), None): cnv_lengthorpercent,
((TABLENS,u'acceptance-state'), None): cnv_string,
((TABLENS,u'add-empty-lines'), None): cnv_boolean,
((TABLENS,u'algorithm'), None): cnv_string,
((TABLENS,u'algorithm'), None): cnv_formula,
((TABLENS,u'align'), None): cnv_string,
((TABLENS,u'allow-empty-cell'), None): cnv_boolean,
((TABLENS,u'application-data'), None): cnv_string,
@ -1106,7 +1139,7 @@ attrconverters = {
((TABLENS,u'cell-range'), None): cnv_string,
((TABLENS,u'column'), None): cnv_integer,
((TABLENS,u'comment'), None): cnv_string,
((TABLENS,u'condition'), None): cnv_string,
((TABLENS,u'condition'), None): cnv_formula,
((TABLENS,u'condition-source'), None): cnv_string,
((TABLENS,u'condition-source-range-address'), None): cnv_string,
((TABLENS,u'contains-error'), None): cnv_boolean,
@ -1144,13 +1177,13 @@ attrconverters = {
((TABLENS,u'end-x'), None): cnv_length,
((TABLENS,u'end-y'), None): cnv_length,
((TABLENS,u'execute'), None): cnv_boolean,
((TABLENS,u'expression'), None): cnv_string,
((TABLENS,u'expression'), None): cnv_formula,
((TABLENS,u'field-name'), None): cnv_string,
((TABLENS,u'field-number'), None): cnv_nonNegativeInteger,
((TABLENS,u'field-number'), None): cnv_string,
((TABLENS,u'filter-name'), None): cnv_string,
((TABLENS,u'filter-options'), None): cnv_string,
((TABLENS,u'formula'), None): cnv_string,
((TABLENS,u'formula'), None): cnv_formula,
((TABLENS,u'function'), None): cnv_string,
((TABLENS,u'function'), None): cnv_string,
((TABLENS,u'grand-total'), None): cnv_string,
@ -1290,7 +1323,7 @@ attrconverters = {
((TEXTNS,u'combine-entries-with-pp'), None): cnv_boolean,
((TEXTNS,u'comma-separated'), None): cnv_boolean,
((TEXTNS,u'cond-style-name'), None): cnv_StyleNameRef,
((TEXTNS,u'condition'), None): cnv_string,
((TEXTNS,u'condition'), None): cnv_formula,
((TEXTNS,u'connection-name'), None): cnv_string,
((TEXTNS,u'consecutive-numbering'), None): cnv_boolean,
((TEXTNS,u'continue-numbering'), None): cnv_boolean,
@ -1321,7 +1354,7 @@ attrconverters = {
((TEXTNS,u'first-row-start-column'), None): cnv_string,
((TEXTNS,u'fixed'), None): cnv_boolean,
((TEXTNS,u'footnotes-position'), None): cnv_string,
((TEXTNS,u'formula'), None): cnv_string,
((TEXTNS,u'formula'), None): cnv_formula,
((TEXTNS,u'global'), None): cnv_boolean,
((TEXTNS,u'howpublished'), None): cnv_string,
((TEXTNS,u'id'), None): cnv_ID,
@ -1437,7 +1470,10 @@ attrconverters = {
class AttrConverters:
def convert(self, attribute, value, element):
conversion = attrconverters.get((attribute,element), None)
""" Based on the element, figures out how to check/convert the attribute value
All values are converted to string
"""
conversion = attrconverters.get((attribute, element.qname), None)
if conversion is not None:
return conversion(attribute, value, element)
else:

View File

@ -1,6 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2007-2008 Søren Roug, European Environment Agency
# Copyright (C) 2007-2010 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@ -112,6 +112,9 @@ class Node(xml.dom.Node):
return self.childNodes[-1]
def insertBefore(self, newChild, refChild):
""" Inserts the node newChild before the existing child node refChild.
If refChild is null, insert newChild at the end of the list of children.
"""
if newChild.nodeType not in self._child_node_types:
raise IllegalChild, "%s cannot be child of %s" % (newChild.tagName, self.tagName)
if newChild.parentNode is not None:
@ -135,21 +138,26 @@ class Node(xml.dom.Node):
newChild.parentNode = self
return newChild
def appendChild(self, node):
if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
for c in tuple(node.childNodes):
def appendChild(self, newChild):
""" Adds the node newChild to the end of the list of children of this node.
If the newChild is already in the tree, it is first removed.
"""
if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
for c in tuple(newChild.childNodes):
self.appendChild(c)
### The DOM does not clearly specify what to return in this case
return node
if node.nodeType not in self._child_node_types:
raise IllegalChild, "<%s> is not allowed in %s" % ( node.tagName, self.tagName)
if node.parentNode is not None:
node.parentNode.removeChild(node)
_append_child(self, node)
node.nextSibling = None
return node
return newChild
if newChild.nodeType not in self._child_node_types:
raise IllegalChild, "<%s> is not allowed in %s" % ( newChild.tagName, self.tagName)
if newChild.parentNode is not None:
newChild.parentNode.removeChild(newChild)
_append_child(self, newChild)
newChild.nextSibling = None
return newChild
def removeChild(self, oldChild):
""" Removes the child node indicated by oldChild from the list of children, and returns it.
"""
#FIXME: update ownerDocument.element_dict or find other solution
try:
self.childNodes.remove(oldChild)
@ -191,8 +199,8 @@ def _append_child(self, node):
node.__dict__["parentNode"] = self
class Childless:
"""Mixin that makes childless-ness easy to implement and avoids
the complexity of the Node methods that deal with children.
""" Mixin that makes childless-ness easy to implement and avoids
the complexity of the Node methods that deal with children.
"""
attributes = None
@ -207,6 +215,7 @@ class Childless:
return None
def appendChild(self, node):
""" Raises an error """
raise xml.dom.HierarchyRequestErr(
self.tagName + " nodes cannot have children")
@ -214,14 +223,17 @@ class Childless:
return False
def insertBefore(self, newChild, refChild):
""" Raises an error """
raise xml.dom.HierarchyRequestErr(
self.tagName + " nodes do not have children")
def removeChild(self, oldChild):
""" Raises an error """
raise xml.dom.NotFoundErr(
self.tagName + " nodes do not have children")
def replaceChild(self, newChild, oldChild):
""" Raises an error """
raise xml.dom.HierarchyRequestErr(
self.tagName + " nodes do not have children")
@ -247,8 +259,12 @@ class CDATASection(Childless, Text):
nodeType = Node.CDATA_SECTION_NODE
def toXml(self,level,f):
""" Generate XML output of the node. If the text contains "]]>", then
escape it by going out of CDATA mode (]]>), then write the string
and then go into CDATA mode again. (<![CDATA[)
"""
if self.data:
f.write('<![CDATA[%s]]>' % self.data)
f.write('<![CDATA[%s]]>' % self.data.replace(']]>',']]>]]><![CDATA['))
class Element(Node):
""" Creates a arbitrary element and is intended to be subclassed not used on its own.
@ -310,7 +326,19 @@ class Element(Node):
if self.getAttrNS(r[0],r[1]) is None:
raise AttributeError, "Required attribute missing: %s in <%s>" % (r[1].lower().replace('-',''), self.tagName)
def get_knownns(self, prefix):
""" Odfpy maintains a list of known namespaces. In some cases a prefix is used, and
we need to know which namespace it resolves to.
"""
global nsdict
for ns,p in nsdict.items():
if p == prefix: return ns
return None
def get_nsprefix(self, namespace):
""" Odfpy maintains a list of known namespaces. In some cases we have a namespace URL,
and needs to look up or assign the prefix for it.
"""
if namespace is None: namespace = ""
prefix = _nsassign(namespace)
if not self.namespaces.has_key(namespace):
@ -339,6 +367,9 @@ class Element(Node):
self.ownerDocument.rebuild_caches(element)
def addText(self, text, check_grammar=True):
""" Adds text to an element
Setting check_grammar=False turns off grammar checking
"""
if check_grammar and self.qname not in grammar.allows_text:
raise IllegalText, "The <%s> element does not allow text" % self.tagName
else:
@ -346,6 +377,9 @@ class Element(Node):
self.appendChild(Text(text))
def addCDATA(self, cdata, check_grammar=True):
""" Adds CDATA to an element
Setting check_grammar=False turns off grammar checking
"""
if check_grammar and self.qname not in grammar.allows_text:
raise IllegalText, "The <%s> element does not allow text" % self.tagName
else:
@ -403,17 +437,18 @@ class Element(Node):
# if allowed_attrs and (namespace, localpart) not in allowed_attrs:
# raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName)
c = AttrConverters()
self.attributes[prefix + ":" + localpart] = c.convert((namespace, localpart), value, self.qname)
self.attributes[(namespace, localpart)] = c.convert((namespace, localpart), value, self)
def getAttrNS(self, namespace, localpart):
prefix = self.get_nsprefix(namespace)
return self.attributes.get(prefix + ":" + localpart)
return self.attributes.get((namespace, localpart))
def removeAttrNS(self, namespace, localpart):
prefix = self.get_nsprefix(namespace)
del self.attributes[prefix + ":" + localpart]
del self.attributes[(namespace, localpart)]
def getAttribute(self, attr):
""" Get an attribute value. The method knows which namespace the attribute is in
"""
allowed_attrs = self.allowed_attributes()
if allowed_attrs is None:
if type(attr) == type(()):
@ -432,8 +467,9 @@ class Element(Node):
if level == 0:
for namespace, prefix in self.namespaces.items():
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
for attkey in self.attributes.keys():
f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
for qname in self.attributes.keys():
prefix = self.get_nsprefix(qname[0])
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
f.write('>')
def write_close_tag(self, level, f):
@ -445,8 +481,9 @@ class Element(Node):
if level == 0:
for namespace, prefix in self.namespaces.items():
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
for attkey in self.attributes.keys():
f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
for qname in self.attributes.keys():
prefix = self.get_nsprefix(qname[0])
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
if self.childNodes:
f.write('>')
for element in self.childNodes:
@ -464,6 +501,7 @@ class Element(Node):
return accumulator
def getElementsByType(self, element):
""" Gets elements based on the type, which is function from text.py, draw.py etc. """
obj = element(check_grammar=False)
return self._getElementsByObj(obj,[])

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public

View File

@ -63,8 +63,8 @@ class LoadParser(handler.ContentHandler):
self.level = self.level + 1
# Add any accumulated text content
content = ''.join(self.data).strip()
if len(content) > 0:
content = ''.join(self.data)
if len(content.strip()) > 0:
self.parent.addText(content, check_grammar=False)
self.data = []
# Create the element

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@ -17,7 +17,7 @@
#
# Contributor(s):
#
TOOLSVERSION = u"ODFPY/0.9.2dev"
TOOLSVERSION = u"ODFPY/0.9.4dev"
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
@ -28,19 +28,23 @@ DCNS = u"http://purl.org/dc/elements/1.1/"
DOMNS = u"http://www.w3.org/2001/xml-events"
DR3DNS = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
DRAWNS = u"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
FIELDNS = u"urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0"
FONS = u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
FORMNS = u"urn:oasis:names:tc:opendocument:xmlns:form:1.0"
GRDDLNS = u"http://www.w3.org/2003/g/data-view#"
KOFFICENS = u"http://www.koffice.org/2005/"
MANIFESTNS = u"urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"
MATHNS = u"http://www.w3.org/1998/Math/MathML"
METANS = u"urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
NUMBERNS = u"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
OFFICENS = u"urn:oasis:names:tc:opendocument:xmlns:office:1.0"
OFNS = u"urn:oasis:names:tc:opendocument:xmlns:of:1.2"
OOONS = u"http://openoffice.org/2004/office"
OOOWNS = u"http://openoffice.org/2004/writer"
OOOCNS = u"http://openoffice.org/2004/calc"
PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"
RDFANS = u"http://docs.oasis-open.org/opendocument/meta/rdfa#"
RPTNS = u"http://openoffice.org/2005/report"
SCRIPTNS = u"urn:oasis:names:tc:opendocument:xmlns:script:1.0"
SMILNS = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0"
STYLENS = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0"
@ -50,7 +54,8 @@ TEXTNS = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0"
XFORMSNS = u"http://www.w3.org/2002/xforms"
XLINKNS = u"http://www.w3.org/1999/xlink"
XMLNS = u"http://www.w3.org/XML/1998/namespace"
XSDNS = u"http://www.w3.org/2001/XMLSchema"
XSINS = u"http://www.w3.org/2001/XMLSchema-instance"
nsdict = {
ANIMNS: u'anim',
@ -61,19 +66,23 @@ nsdict = {
DOMNS: u'dom',
DR3DNS: u'dr3d',
DRAWNS: u'draw',
FIELDNS: u'field',
FONS: u'fo',
FORMNS: u'form',
GRDDLNS: u'grddl',
KOFFICENS: u'koffice',
MANIFESTNS: u'manifest',
MATHNS: u'math',
METANS: u'meta',
NUMBERNS: u'number',
OFFICENS: u'office',
OFNS: u'of',
OOONS: u'ooo',
OOOWNS: u'ooow',
OOOCNS: u'oooc',
PRESENTATIONNS: u'presentation',
RDFANS: u'rdfa',
RPTNS: u'rpt',
SCRIPTNS: u'script',
SMILNS: u'smil',
STYLENS: u'style',
@ -83,4 +92,6 @@ nsdict = {
XFORMSNS: u'xforms',
XLINKNS: u'xlink',
XMLNS: u'xml',
XSDNS: u'xsd',
XSINS: u'xsi',
}

View File

@ -1,6 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2007 Søren Roug, European Environment Agency
# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@ -20,15 +20,18 @@
#
#import pdb
#pdb.set_trace()
import zipfile
from xml.sax import handler, expatreader
from xml.sax.xmlreader import InputSource
from xml.sax import handler
from xml.sax.saxutils import escape, quoteattr
from cStringIO import StringIO
from xml.dom import Node
from namespaces import DCNS, DRAWNS, FONS, \
METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, \
STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
from opendocument import load
from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \
SMILNS, STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
if False: # Added by Kovid
DR3DNS, MATHNS, CHARTNS, CONFIGNS, ANIMNS, FORMNS, SMILNS, SCRIPTNS
# Handling of styles
#
@ -72,8 +75,8 @@ class StyleToCSS:
(FONS,u"border-left"): self.c_fo,
(FONS,u"border-right"): self.c_fo,
(FONS,u"border-top"): self.c_fo,
(FONS,u"break-after"): self.c_break,
(FONS,u"break-before"): self.c_break,
(FONS,u"break-after"): self.c_break, # Added by Kovid
(FONS,u"break-before"): self.c_break,# Added by Kovid
(FONS,u"color"): self.c_fo,
(FONS,u"font-family"): self.c_fo,
(FONS,u"font-size"): self.c_fo,
@ -136,7 +139,7 @@ class StyleToCSS:
selector = rule[1]
sdict[selector] = val
def c_break(self, ruleset, sdict, rule, val):
def c_break(self, ruleset, sdict, rule, val): # Added by Kovid
property = 'page-' + rule[1]
values = {'auto': 'auto', 'column': 'always', 'page': 'always',
'even-page': 'left', 'odd-page': 'right',
@ -346,13 +349,16 @@ class ODF2XHTML(handler.ContentHandler):
self.elements = {
(DCNS, 'title'): (self.s_processcont, self.e_dc_title),
(DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
(DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag),
(DCNS, 'creator'): (self.s_processcont, self.e_dc_creator),
(DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
(DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
(DRAWNS, 'custom-shape'): (self.s_custom_shape, self.e_custom_shape),
(DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
(DRAWNS, 'image'): (self.s_draw_image, None),
(DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
(DRAWNS, "layer-set"):(self.s_ignorexml, None),
(DRAWNS, 'object'): (self.s_draw_object, None),
(DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
(DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
(DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
(METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
@ -364,7 +370,9 @@ class ODF2XHTML(handler.ContentHandler):
(NUMBERNS, "date-style"):(self.s_ignorexml, None),
(NUMBERNS, "number-style"):(self.s_ignorexml, None),
(NUMBERNS, "text-style"):(self.s_ignorexml, None),
(OFFICENS, "annotation"):(self.s_ignorexml, None),
(OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
(OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content),
(OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
(OFFICENS, "forms"):(self.s_ignorexml, None),
(OFFICENS, "master-styles"):(self.s_office_master_styles, None),
@ -374,6 +382,7 @@ class ODF2XHTML(handler.ContentHandler):
(OFFICENS, "styles"):(self.s_office_styles, None),
(OFFICENS, "text"):(self.s_office_text, self.e_office_text),
(OFFICENS, "scripts"):(self.s_ignorexml, None),
(OFFICENS, "settings"):(self.s_ignorexml, None),
(PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
# (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
(STYLENS, "default-page-layout"):(self.s_ignorexml, None),
@ -389,8 +398,8 @@ class ODF2XHTML(handler.ContentHandler):
# (STYLENS, "header-style"):(self.s_style_header_style, None),
(STYLENS, "master-page"):(self.s_style_master_page, None),
(STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
(STYLENS, "page-layout"):(self.s_ignorexml, None),
(STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
# (STYLENS, "page-layout"):(self.s_ignorexml, None),
(STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
(STYLENS, "style"):(self.s_style_style, self.e_style_style),
(STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
@ -407,6 +416,10 @@ class ODF2XHTML(handler.ContentHandler):
(TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
(TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, 'bookmark'): (self.s_text_bookmark, None),
(TEXTNS, 'bookmark-start'): (self.s_text_bookmark, None),
(TEXTNS, 'bookmark-ref'): (self.s_text_bookmark_ref, self.e_text_a),
(TEXTNS, 'bookmark-ref-start'): (self.s_text_bookmark_ref, None),
(TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
(TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, 'line-break'):(self.s_text_line_break, None),
@ -430,10 +443,66 @@ class ODF2XHTML(handler.ContentHandler):
(TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
}
if embedable:
self.elements[(OFFICENS, u"text")] = (None,None)
self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
self.elements[(OFFICENS, u"presentation")] = (None,None)
self.elements[(OFFICENS, u"document-content")] = (None,None)
self.make_embedable()
self._resetobject()
def set_plain(self):
""" Tell the parser to not generate CSS """
self.generate_css = False
def set_embedable(self):
""" Tells the converter to only output the parts inside the <body>"""
self.elements[(OFFICENS, u"text")] = (None,None)
self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
self.elements[(OFFICENS, u"presentation")] = (None,None)
self.elements[(OFFICENS, u"document-content")] = (None,None)
def add_style_file(self, stylefilename, media=None):
""" Add a link to an external style file.
Also turns of the embedding of styles in the HTML
"""
self.use_internal_css = False
self.stylefilename = stylefilename
if media:
self.metatags.append('<link rel="stylesheet" type="text/css" href="%s" media="%s"/>\n' % (stylefilename,media))
else:
self.metatags.append('<link rel="stylesheet" type="text/css" href="%s"/>\n' % (stylefilename))
def _resetfootnotes(self):
# Footnotes and endnotes
self.notedict = {}
self.currentnote = 0
self.notebody = ''
def _resetobject(self):
self.lines = []
self._wfunc = self._wlines
self.xmlfile = ''
self.title = ''
self.language = ''
self.creator = ''
self.data = []
self.tagstack = TagStack()
self.htmlstack = []
self.pstack = []
self.processelem = True
self.processcont = True
self.listtypes = {}
self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
self.use_internal_css = True
self.cs = StyleToCSS()
self.anchors = {}
# Style declarations
self.stylestack = []
self.styledict = {}
self.currentstyle = None
self._resetfootnotes()
# Tags from meta.xml
self.metatags = []
def writeout(self, s):
@ -447,6 +516,7 @@ class ODF2XHTML(handler.ContentHandler):
def opentag(self, tag, attrs={}, block=False):
""" Create an open HTML tag """
self.htmlstack.append((tag,attrs,block))
a = []
for key,val in attrs.items():
a.append('''%s=%s''' % (key, quoteattr(val)))
@ -458,6 +528,8 @@ class ODF2XHTML(handler.ContentHandler):
self.writeout("\n")
def closetag(self, tag, block=True):
""" Close an open HTML tag """
self.htmlstack.pop()
self.writeout("</%s>" % tag)
if block == True:
self.writeout("\n")
@ -468,17 +540,13 @@ class ODF2XHTML(handler.ContentHandler):
a.append('''%s=%s''' % (key, quoteattr(val)))
self.writeout("<%s %s/>\n" % (tag, " ".join(a)))
#--------------------------------------------------
# Interface to parser
#--------------------------------------------------
def characters(self, data):
if self.processelem and self.processcont:
self.data.append(data)
def handle_starttag(self, tag, method, attrs):
method(tag,attrs)
def handle_endtag(self, tag, attrs, method):
method(tag, attrs)
def startElementNS(self, tag, qname, attrs):
self.pstack.append( (self.processelem, self.processcont) )
if self.processelem:
@ -499,6 +567,13 @@ class ODF2XHTML(handler.ContentHandler):
self.unknown_endtag(tag, attrs)
self.processelem, self.processcont = self.pstack.pop()
#--------------------------------------------------
def handle_starttag(self, tag, method, attrs):
method(tag,attrs)
def handle_endtag(self, tag, attrs, method):
method(tag, attrs)
def unknown_starttag(self, tag, attrs):
pass
@ -512,18 +587,21 @@ class ODF2XHTML(handler.ContentHandler):
self.processelem = False
def s_ignorecont(self, tag, attrs):
""" Stop processing the text nodes """
self.processcont = False
def s_processcont(self, tag, attrs):
""" Start processing the text nodes """
self.processcont = True
def classname(self, attrs):
""" Generate a class name from a style name """
c = attrs[(TEXTNS,'style-name')]
c = attrs.get((TEXTNS,'style-name'),'')
c = c.replace(".","_")
return c
def get_anchor(self, name):
""" Create a unique anchor id for a href name """
if not self.anchors.has_key(name):
# Changed by Kovid
self.anchors[name] = "anchor%d" % (len(self.anchors) + 1)
@ -543,8 +621,8 @@ class ODF2XHTML(handler.ContentHandler):
def e_dc_title(self, tag, attrs):
""" Get the title from the meta data and create a HTML <title>
"""
self.metatags.append('<title>%s</title>\n' % escape(''.join(self.data)))
self.title = ''.join(self.data)
#self.metatags.append('<title>%s</title>\n' % escape(self.title))
self.data = []
def e_dc_metatag(self, tag, attrs):
@ -556,13 +634,57 @@ class ODF2XHTML(handler.ContentHandler):
def e_dc_contentlanguage(self, tag, attrs):
""" Set the content language. Identifies the targeted audience
"""
self.metatags.append('<meta http-equiv="content-language" content="%s"/>\n' % ''.join(self.data))
self.language = ''.join(self.data)
self.metatags.append('<meta http-equiv="content-language" content="%s"/>\n' % escape(self.language))
self.data = []
def e_dc_creator(self, tag, attrs):
""" Set the content creator. Identifies the targeted audience
"""
self.creator = ''.join(self.data)
self.metatags.append('<meta http-equiv="creator" content="%s"/>\n' % escape(self.creator))
self.data = []
def s_custom_shape(self, tag, attrs):
""" A <draw:custom-shape> is made into a <div> in HTML which is then styled
"""
anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
htmltag = 'div'
name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
if name == 'G-':
name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "")
name = name.replace(".","_")
if anchor_type == "paragraph":
style = 'position:absolute;'
elif anchor_type == 'char':
style = "position:absolute;"
elif anchor_type == 'as-char':
htmltag = 'div'
style = ''
else:
style = "position: absolute;"
if attrs.has_key( (SVGNS,"width") ):
style = style + "width:" + attrs[(SVGNS,"width")] + ";"
if attrs.has_key( (SVGNS,"height") ):
style = style + "height:" + attrs[(SVGNS,"height")] + ";"
if attrs.has_key( (SVGNS,"x") ):
style = style + "left:" + attrs[(SVGNS,"x")] + ";"
if attrs.has_key( (SVGNS,"y") ):
style = style + "top:" + attrs[(SVGNS,"y")] + ";"
if self.generate_css:
self.opentag(htmltag, {'class': name, 'style': style})
else:
self.opentag(htmltag)
def e_custom_shape(self, tag, attrs):
""" End the <draw:frame>
"""
self.closetag('div')
def s_draw_frame(self, tag, attrs):
""" A <draw:frame> is made into a <div> in HTML which is then styled
"""
anchor_type = attrs.get((TEXTNS,'anchor-type'),'char')
anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
htmltag = 'div'
name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
if name == 'G-':
@ -576,7 +698,7 @@ class ODF2XHTML(handler.ContentHandler):
htmltag = 'div'
style = ''
else:
style = "position: absolute;"
style = "position:absolute;"
if attrs.has_key( (SVGNS,"width") ):
style = style + "width:" + attrs[(SVGNS,"width")] + ";"
if attrs.has_key( (SVGNS,"height") ):
@ -620,6 +742,30 @@ class ODF2XHTML(handler.ContentHandler):
htmlattrs['style'] = "display: block;"
self.emptytag('img', htmlattrs)
def s_draw_object(self, tag, attrs):
""" A <draw:object> is embedded object in the document (e.g. spreadsheet in presentation).
"""
return # Added by Kovid
objhref = attrs[(XLINKNS,"href")]
# Remove leading "./": from "./Object 1" to "Object 1"
# objhref = objhref [2:]
# Not using os.path.join since it fails to find the file on Windows.
# objcontentpath = '/'.join([objhref, 'content.xml'])
for c in self.document.childnodes:
if c.folder == objhref:
self._walknode(c.topnode)
def s_draw_object_ole(self, tag, attrs):
""" A <draw:object-ole> is embedded OLE object in the document (e.g. MS Graph).
"""
class_id = attrs[(DRAWNS,"class-id")]
if class_id and class_id.lower() == "00020803-0000-0000-c000-000000000046": ## Microsoft Graph 97 Chart
tagattrs = { 'name':'object_ole_graph', 'class':'ole-graph' }
self.opentag('a', tagattrs)
self.closetag('a', tagattrs)
def s_draw_page(self, tag, attrs):
""" A <draw:page> is a slide in a presentation. We use a <fieldset> element in HTML.
Therefore if you convert a ODP file, you get a series of <fieldset>s.
@ -655,13 +801,9 @@ class ODF2XHTML(handler.ContentHandler):
def html_body(self, tag, attrs):
self.writedata()
if self.generate_css:
if self.generate_css and self.use_internal_css:
self.opentag('style', {'type':"text/css"}, True)
self.writeout('/*<![CDATA[*/\n')
self.writeout('\nimg { width: 100%; height: 100%; }\n')
self.writeout('* { padding: 0; margin: 0; background-color:white; }\n')
self.writeout('body { margin: 0 1em; }\n')
self.writeout('ol, ul { padding-left: 2em; }\n')
self.generate_stylesheet()
self.writeout('/*]]>*/\n')
self.closetag('style')
@ -669,6 +811,16 @@ class ODF2XHTML(handler.ContentHandler):
self.closetag('head')
self.opentag('body', block=True)
# background-color: white removed by Kovid for #9118
# Specifying an explicit bg color prevents ebook readers
# from successfully inverting colors
default_styles = """
img { width: 100%; height: 100%; }
* { padding: 0; margin: 0; }
body { margin: 0 1em; }
ol, ul { padding-left: 2em; }
"""
def generate_stylesheet(self):
for name in self.stylestack:
styles = self.styledict.get(name)
@ -688,6 +840,7 @@ class ODF2XHTML(handler.ContentHandler):
styles = parentstyle
self.styledict[name] = styles
# Write the styles to HTML
self.writeout(self.default_styles)
for name in self.stylestack:
styles = self.styledict.get(name)
css2 = self.cs.convert_styles(styles)
@ -729,6 +882,7 @@ class ODF2XHTML(handler.ContentHandler):
self.emptytag('meta', { 'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"})
for metaline in self.metatags:
self.writeout(metaline)
self.writeout('<title>%s</title>\n' % escape(self.title))
def e_office_document_content(self, tag, attrs):
""" Last tag """
@ -773,7 +927,7 @@ class ODF2XHTML(handler.ContentHandler):
""" Copy all attributes to a struct.
We will later convert them to CSS2
"""
if self.currentstyle is None:
if self.currentstyle is None: # Added by Kovid
return
for key,attr in attrs.items():
self.styledict[self.currentstyle][key] = attr
@ -799,7 +953,7 @@ class ODF2XHTML(handler.ContentHandler):
def s_style_font_face(self, tag, attrs):
""" It is possible that the HTML browser doesn't know how to
show a particular font. Luckily ODF provides generic fallbacks
Unluckily they are not the same as CSS2.
Unfortunately they are not the same as CSS2.
CSS2: serif, sans-serif, cursive, fantasy, monospace
ODF: roman, swiss, modern, decorative, script, system
"""
@ -850,7 +1004,7 @@ class ODF2XHTML(handler.ContentHandler):
"""
name = attrs[(STYLENS,'name')]
name = name.replace(".","_")
self.currentstyle = "@page " + name
self.currentstyle = ".PL-" + name
self.stylestack.append(self.currentstyle)
self.styledict[self.currentstyle] = {}
@ -881,7 +1035,7 @@ class ODF2XHTML(handler.ContentHandler):
self.s_ignorexml(tag, attrs)
# Short prefixes for class selectors
familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR',
_familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR',
'text':'S', 'section':'D',
'table':'T', 'table-cell':'TD', 'table-column':'TC',
'table-row':'TR', 'graphic':'G' }
@ -897,7 +1051,7 @@ class ODF2XHTML(handler.ContentHandler):
name = name.replace(".","_")
family = attrs[(STYLENS,'family')]
htmlfamily = self.familymap.get(family,'unknown')
sfamily = self.familyshort.get(family,'X')
sfamily = self._familyshort.get(family,'X')
name = "%s%s-%s" % (self.autoprefix, sfamily, name)
parent = attrs.get( (STYLENS,'parent-style-name') )
self.currentstyle = special_styles.get(name,"."+name)
@ -942,6 +1096,7 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata()
def s_table_table_cell(self, tag, attrs):
""" Start a table cell """
#FIXME: number-columns-repeated § 8.1.3
#repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
htmlattrs = {}
@ -959,11 +1114,13 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata()
def e_table_table_cell(self, tag, attrs):
""" End a table cell """
self.writedata()
self.closetag('td')
self.purgedata()
def s_table_table_column(self, tag, attrs):
""" Start a table column """
c = attrs.get( (TABLENS,'style-name'), None)
repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
htmlattrs = {}
@ -974,6 +1131,7 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata()
def s_table_table_row(self, tag, attrs):
""" Start a table row """
#FIXME: table:number-rows-repeated
c = attrs.get( (TABLENS,'style-name'), None)
htmlattrs = {}
@ -983,6 +1141,7 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata()
def e_table_table_row(self, tag, attrs):
""" End a table row """
self.writedata()
self.closetag('tr')
self.purgedata()
@ -997,10 +1156,28 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata()
def e_text_a(self, tag, attrs):
""" End an anchor or bookmark reference """
self.writedata()
self.closetag('a', False)
self.purgedata()
def s_text_bookmark(self, tag, attrs):
""" Bookmark definition """
name = attrs[(TEXTNS,'name')]
html_id = self.get_anchor(name)
self.writedata()
self.opentag('span', {'id':html_id})
self.closetag('span', False)
self.purgedata()
def s_text_bookmark_ref(self, tag, attrs):
""" Bookmark reference """
name = attrs[(TEXTNS,'ref-name')]
html_id = "#" + self.get_anchor(name)
self.writedata()
self.opentag('a', {'href':html_id})
self.purgedata()
def s_text_h(self, tag, attrs):
""" Headings start """
level = int(attrs[(TEXTNS,'outline-level')])
@ -1018,13 +1195,19 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata()
def e_text_h(self, tag, attrs):
""" Headings end """
""" Headings end
Side-effect: If there is no title in the metadata, then it is taken
from the first heading of any level.
"""
self.writedata()
level = int(attrs[(TEXTNS,'outline-level')])
if level > 6: level = 6 # Heading levels go only to 6 in XHTML
if level < 1: level = 1
lev = self.headinglevels[1:level+1]
outline = '.'.join(map(str,lev) )
heading = ''.join(self.data)
if self.title == '': self.title = heading
# Changed by Kovid
tail = ''.join(self.data)
anchor = self.get_anchor("%s.%s" % ( outline, tail))
anchor2 = self.get_anchor(tail) # Added by kovid to fix #7506
@ -1036,12 +1219,14 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata()
def s_text_line_break(self, tag, attrs):
""" Force a line break (<br/>) """
self.writedata()
self.emptytag('br')
self.purgedata()
def s_text_list(self, tag, attrs):
""" To know which level we're at, we have to count the number
""" Start a list (<ul> or <ol>)
To know which level we're at, we have to count the number
of <text:list> elements on the tagstack.
"""
name = attrs.get( (TEXTNS,'style-name') )
@ -1055,12 +1240,13 @@ class ODF2XHTML(handler.ContentHandler):
name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
list_class = "%s_%d" % (name, level)
if self.generate_css:
self.opentag('%s' % self.listtypes.get(list_class,'UL'), {'class': list_class })
self.opentag('%s' % self.listtypes.get(list_class,'ul'), {'class': list_class })
else:
self.opentag('%s' % self.listtypes.get(list_class,'UL'))
self.opentag('%s' % self.listtypes.get(list_class,'ul'))
self.purgedata()
def e_text_list(self, tag, attrs):
""" End a list """
self.writedata()
name = attrs.get( (TEXTNS,'style-name') )
level = self.tagstack.count_tags(tag) + 1
@ -1072,14 +1258,16 @@ class ODF2XHTML(handler.ContentHandler):
# textbox itself may be nested within another list.
name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
list_class = "%s_%d" % (name, level)
self.closetag(self.listtypes.get(list_class,'UL'))
self.closetag(self.listtypes.get(list_class,'ul'))
self.purgedata()
def s_text_list_item(self, tag, attrs):
""" Start list item """
self.opentag('li')
self.purgedata()
def e_text_list_item(self, tag, attrs):
""" End list item """
self.writedata()
self.closetag('li')
self.purgedata()
@ -1191,7 +1379,7 @@ class ODF2XHTML(handler.ContentHandler):
if specialtag is None:
specialtag = 'p'
self.writedata()
if not self.data:
if not self.data: # Added by Kovid
# Give substance to empty paragraphs, as rendered by OOo
self.writeout('&#160;')
self.closetag(specialtag)
@ -1254,55 +1442,30 @@ class ODF2XHTML(handler.ContentHandler):
#-----------------------------------------------------------------------------
def load(self, odffile):
self._odffile = odffile
""" Loads a document into the parser and parses it.
The argument can either be a filename or a document in memory.
"""
self.lines = []
self._wfunc = self._wlines
if isinstance(odffile, basestring) \
or hasattr(odffile, 'read'): # Added by Kovid
self.document = load(odffile)
else:
self.document = odffile
self._walknode(self.document.topnode)
def parseodf(self):
self.xmlfile = ''
self.title = ''
self.data = []
self.tagstack = TagStack()
self.pstack = []
self.processelem = True
self.processcont = True
self.listtypes = {}
self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
self.cs = StyleToCSS()
self.anchors = {}
def _walknode(self, node):
if node.nodeType == Node.ELEMENT_NODE:
self.startElementNS(node.qname, node.tagName, node.attributes)
for c in node.childNodes:
self._walknode(c)
self.endElementNS(node.qname, node.tagName)
if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
self.characters(unicode(node))
# Style declarations
self.stylestack = []
self.styledict = {}
self.currentstyle = None
# Footnotes and endnotes
self.notedict = {}
self.currentnote = 0
self.notebody = ''
# Tags from meta.xml
self.metatags = []
# Extract the interesting files
z = zipfile.ZipFile(self._odffile)
# For some reason Trac has trouble when xml.sax.make_parser() is used.
# Could it be because PyXML is installed, and therefore a different parser
# might be chosen? By calling expatreader directly we avoid this issue
parser = expatreader.create_parser()
parser.setFeature(handler.feature_namespaces, 1)
parser.setContentHandler(self)
parser.setErrorHandler(handler.ErrorHandler())
inpsrc = InputSource()
for xmlfile in ('meta.xml', 'styles.xml', 'content.xml'):
self.xmlfile = xmlfile
content = z.read(xmlfile)
inpsrc.setByteStream(StringIO(content))
parser.parse(inpsrc)
z.close()
def odf2xhtml(self, odffile):
""" Load a file and return XHTML
""" Load a file and return the XHTML
"""
self.load(odffile)
return self.xhtml()
@ -1311,9 +1474,8 @@ class ODF2XHTML(handler.ContentHandler):
if s != '': self.lines.append(s)
def xhtml(self):
self.lines = []
self._wfunc = self._wlines
self.parseodf()
""" Returns the xhtml
"""
return ''.join(self.lines)
def _writecss(self, s):
@ -1323,11 +1485,127 @@ class ODF2XHTML(handler.ContentHandler):
pass
def css(self):
self._wfunc = self._writenothing
self.parseodf()
""" Returns the CSS content """
self._csslines = []
self._wfunc = self._writecss
self.generate_stylesheet()
res = ''.join(self._csslines)
self._wfunc = self._wlines
del self._csslines
return res
def save(self, outputfile, addsuffix=False):
""" Save the HTML under the filename.
If the filename is '-' then save to stdout
We have the last style filename in self.stylefilename
"""
if outputfile == '-':
import sys # Added by Kovid
outputfp = sys.stdout
else:
if addsuffix:
outputfile = outputfile + ".html"
outputfp = file(outputfile, "w")
outputfp.write(self.xhtml().encode('us-ascii','xmlcharrefreplace'))
outputfp.close()
class ODF2XHTMLembedded(ODF2XHTML):
""" The ODF2XHTML parses an ODF file and produces XHTML"""
def __init__(self, lines, generate_css=True, embedable=False):
self._resetobject()
self.lines = lines
# Tags
self.generate_css = generate_css
self.elements = {
# (DCNS, 'title'): (self.s_processcont, self.e_dc_title),
# (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
# (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag),
# (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
# (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
(DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
(DRAWNS, 'image'): (self.s_draw_image, None),
(DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
(DRAWNS, "layer-set"):(self.s_ignorexml, None),
(DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
(DRAWNS, 'object'): (self.s_draw_object, None),
(DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
(DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
# (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
# (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag),
# (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag),
# (METANS, 'keyword'): (self.s_processcont, self.e_dc_metatag),
(NUMBERNS, "boolean-style"):(self.s_ignorexml, None),
(NUMBERNS, "currency-style"):(self.s_ignorexml, None),
(NUMBERNS, "date-style"):(self.s_ignorexml, None),
(NUMBERNS, "number-style"):(self.s_ignorexml, None),
(NUMBERNS, "text-style"):(self.s_ignorexml, None),
# (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
# (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
(OFFICENS, "forms"):(self.s_ignorexml, None),
# (OFFICENS, "master-styles"):(self.s_office_master_styles, None),
(OFFICENS, "meta"):(self.s_ignorecont, None),
# (OFFICENS, "presentation"):(self.s_office_presentation, self.e_office_presentation),
# (OFFICENS, "spreadsheet"):(self.s_office_spreadsheet, self.e_office_spreadsheet),
# (OFFICENS, "styles"):(self.s_office_styles, None),
# (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
(OFFICENS, "scripts"):(self.s_ignorexml, None),
(PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
## (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
# (STYLENS, "default-page-layout"):(self.s_ignorexml, None),
# (STYLENS, "default-style"):(self.s_style_default_style, self.e_style_default_style),
# (STYLENS, "drawing-page-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "font-face"):(self.s_style_font_face, None),
## (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer),
## (STYLENS, "footer-style"):(self.s_style_footer_style, None),
# (STYLENS, "graphic-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "handout-master"):(self.s_ignorexml, None),
## (STYLENS, "header"):(self.s_style_header, self.e_style_header),
## (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None),
## (STYLENS, "header-style"):(self.s_style_header_style, None),
# (STYLENS, "master-page"):(self.s_style_master_page, None),
# (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
## (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
# (STYLENS, "page-layout"):(self.s_ignorexml, None),
# (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "style"):(self.s_style_style, self.e_style_style),
# (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "table-column-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "table-properties"):(self.s_style_handle_properties, None),
# (STYLENS, "text-properties"):(self.s_style_handle_properties, None),
(SVGNS, 'desc'): (self.s_ignorexml, None),
(TABLENS, 'covered-table-cell'): (self.s_ignorexml, None),
(TABLENS, 'table-cell'): (self.s_table_table_cell, self.e_table_table_cell),
(TABLENS, 'table-column'): (self.s_table_table_column, None),
(TABLENS, 'table-row'): (self.s_table_table_row, self.e_table_table_row),
(TABLENS, 'table'): (self.s_table_table, self.e_table_table),
(TEXTNS, 'a'): (self.s_text_a, self.e_text_a),
(TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
(TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
(TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, 'line-break'):(self.s_text_line_break, None),
(TEXTNS, "linenumbering-configuration"):(self.s_ignorexml, None),
(TEXTNS, "list"):(self.s_text_list, self.e_text_list),
(TEXTNS, "list-item"):(self.s_text_list_item, self.e_text_list_item),
(TEXTNS, "list-level-style-bullet"):(self.s_text_list_level_style_bullet, self.e_text_list_level_style_bullet),
(TEXTNS, "list-level-style-number"):(self.s_text_list_level_style_number, self.e_text_list_level_style_number),
(TEXTNS, "list-style"):(None, None),
(TEXTNS, "note"):(self.s_text_note, None),
(TEXTNS, "note-body"):(self.s_text_note_body, self.e_text_note_body),
(TEXTNS, "note-citation"):(None, self.e_text_note_citation),
(TEXTNS, "notes-configuration"):(self.s_ignorexml, None),
(TEXTNS, "object-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, 'p'): (self.s_text_p, self.e_text_p),
(TEXTNS, 's'): (self.s_text_s, None),
(TEXTNS, 'span'): (self.s_text_span, self.e_text_span),
(TEXTNS, 'tab'): (self.s_text_tab, None),
(TEXTNS, "table-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, "page-number"):(None, None),
}

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@ -41,7 +41,7 @@ IS_IMAGE = 1
# We need at least Python 2.2
assert sys.version_info[0]>=2 and sys.version_info[1] >= 2
sys.setrecursionlimit=50
#sys.setrecursionlimit(100)
#The recursion limit is set conservative so mistakes like
# s=content() s.addElement(s) won't eat up too much processor time.
@ -128,12 +128,12 @@ class OpenDocument:
self.element_dict[element.qname] = []
self.element_dict[element.qname].append(element)
if element.qname == (STYLENS, u'style'):
self._register_stylename(element) # Add to style dictionary
self.__register_stylename(element) # Add to style dictionary
styleref = element.getAttrNS(TEXTNS,u'style-name')
if styleref is not None and self._styles_ooo_fix.has_key(styleref):
element.setAttrNS(TEXTNS,u'style-name', self._styles_ooo_fix[styleref])
def _register_stylename(self, element):
def __register_stylename(self, element):
''' Register a style. But there are three style dictionaries:
office:styles, office:automatic-styles and office:master-styles
Chapter 14
@ -165,7 +165,7 @@ class OpenDocument:
""" Generates the full document as an XML file
Always written as a bytestream in UTF-8 encoding
"""
self._replaceGenerator()
self.__replaceGenerator()
xml=StringIO()
xml.write(_XMLPROLOGUE)
self.topnode.toXml(0, xml)
@ -197,8 +197,10 @@ class OpenDocument:
x.write_close_tag(0, xml)
return xml.getvalue()
def manifestxml(self):
""" Generates the manifest.xml file """
def __manifestxml(self):
""" Generates the manifest.xml file
The self.manifest isn't avaible unless the document is being saved
"""
xml=StringIO()
xml.write(_XMLPROLOGUE)
self.manifest.toXml(0,xml)
@ -206,7 +208,7 @@ class OpenDocument:
def metaxml(self):
""" Generates the meta.xml file """
self._replaceGenerator()
self.__replaceGenerator()
x = DocumentMeta()
x.addElement(self.meta)
xml=StringIO()
@ -344,7 +346,7 @@ class OpenDocument:
self.thumbnail = filecontent
def addObject(self, document, objectname=None):
""" Add an object. The object must be an OpenDocument class
""" Adds an object (subdocument). The object must be an OpenDocument class
The return value will be the folder in the zipfile the object is stored in
"""
self.childobjects.append(document)
@ -367,15 +369,16 @@ class OpenDocument:
zi.compress_type = zipfile.ZIP_STORED
zi.external_attr = UNIXPERMS
self._z.writestr(zi, fileobj)
if hasPictures:
self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder,mediatype=""))
# According to section 17.7.3 in ODF 1.1, the pictures folder should not have a manifest entry
# if hasPictures:
# self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder, mediatype=""))
# Look in subobjects
subobjectnum = 1
for subobject in object.childobjects:
self._savePictures(subobject,'%sObject %d/' % (folder, subobjectnum))
subobjectnum += 1
def _replaceGenerator(self):
def __replaceGenerator(self):
""" Section 3.1.1: The application MUST NOT export the original identifier
belonging to the application that created the document.
"""
@ -385,22 +388,29 @@ class OpenDocument:
self.meta.addElement(meta.Generator(text=TOOLSVERSION))
def save(self, outputfile, addsuffix=False):
""" Save the document under the filename """
""" Save the document under the filename.
If the filename is '-' then save to stdout
"""
if outputfile == '-':
outputfp = zipfile.ZipFile(sys.stdout,"w")
else:
if addsuffix:
outputfile = outputfile + odmimetypes.get(self.mimetype,'.xxx')
outputfp = zipfile.ZipFile(outputfile, "w")
self._zipwrite(outputfp)
self.__zipwrite(outputfp)
outputfp.close()
def write(self, outputfp):
""" User API to write the ODF file to an open file descriptor
Writes the ZIP format
"""
zipoutputfp = zipfile.ZipFile(outputfp,"w")
self._zipwrite(zipoutputfp)
self.__zipwrite(zipoutputfp)
def _zipwrite(self, outputfp):
""" Write the document to an open file pointer """
def __zipwrite(self, outputfp):
""" Write the document to an open file pointer
This is where the real work is done
"""
self._z = outputfp
self._now = time.localtime()[:6]
self.manifest = manifest.Manifest()
@ -438,7 +448,7 @@ class OpenDocument:
zi = zipfile.ZipInfo("META-INF/manifest.xml", self._now)
zi.compress_type = zipfile.ZIP_DEFLATED
zi.external_attr = UNIXPERMS
self._z.writestr(zi, self.manifestxml() )
self._z.writestr(zi, self.__manifestxml() )
del self._z
del self._now
del self.manifest
@ -464,8 +474,8 @@ class OpenDocument:
self._z.writestr(zi, object.contentxml() )
# Write settings
if self == object and self.settings.hasChildNodes():
self.manifest.addElement(manifest.FileEntry(fullpath="settings.xml",mediatype="text/xml"))
if object.settings.hasChildNodes():
self.manifest.addElement(manifest.FileEntry(fullpath="%ssettings.xml" % folder, mediatype="text/xml"))
zi = zipfile.ZipInfo("%ssettings.xml" % folder, self._now)
zi.compress_type = zipfile.ZIP_DEFLATED
zi.external_attr = UNIXPERMS
@ -473,7 +483,7 @@ class OpenDocument:
# Write meta
if self == object:
self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml",mediatype="text/xml"))
self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml", mediatype="text/xml"))
zi = zipfile.ZipInfo("meta.xml", self._now)
zi.compress_type = zipfile.ZIP_DEFLATED
zi.external_attr = UNIXPERMS
@ -497,6 +507,7 @@ class OpenDocument:
return element.Text(data)
def createCDATASection(self, data):
""" Method to create a CDATA section """
return element.CDATASection(cdata)
def getMediaType(self):
@ -504,12 +515,14 @@ class OpenDocument:
return self.mimetype
def getStyleByName(self, name):
""" Finds a style object based on the name """
ncname = make_NCName(name)
if self._styles_dict == {}:
self.rebuild_caches()
return self._styles_dict.get(ncname, None)
def getElementsByType(self, element):
""" Gets elements based on the type, which is function from text.py, draw.py etc. """
obj = element(check_grammar=False)
if self.element_dict == {}:
self.rebuild_caches()
@ -517,53 +530,59 @@ class OpenDocument:
# Convenience functions
def OpenDocumentChart():
""" Creates a chart document """
doc = OpenDocument('application/vnd.oasis.opendocument.chart')
doc.chart = Chart()
doc.body.addElement(doc.chart)
return doc
def OpenDocumentDrawing():
""" Creates a drawing document """
doc = OpenDocument('application/vnd.oasis.opendocument.graphics')
doc.drawing = Drawing()
doc.body.addElement(doc.drawing)
return doc
def OpenDocumentImage():
""" Creates an image document """
doc = OpenDocument('application/vnd.oasis.opendocument.image')
doc.image = Image()
doc.body.addElement(doc.image)
return doc
def OpenDocumentPresentation():
""" Creates a presentation document """
doc = OpenDocument('application/vnd.oasis.opendocument.presentation')
doc.presentation = Presentation()
doc.body.addElement(doc.presentation)
return doc
def OpenDocumentSpreadsheet():
""" Creates a spreadsheet document """
doc = OpenDocument('application/vnd.oasis.opendocument.spreadsheet')
doc.spreadsheet = Spreadsheet()
doc.body.addElement(doc.spreadsheet)
return doc
def OpenDocumentText():
""" Creates a text document """
doc = OpenDocument('application/vnd.oasis.opendocument.text')
doc.text = Text()
doc.body.addElement(doc.text)
return doc
def OpenDocumentTextMaster():
""" Creates a text master document """
doc = OpenDocument('application/vnd.oasis.opendocument.text-master')
doc.text = Text()
doc.body.addElement(doc.text)
return doc
def load(odffile):
def __loadxmlparts(z, manifest, doc, objectpath):
from load import LoadParser
from xml.sax import make_parser, handler
z = zipfile.ZipFile(odffile)
mimetype = z.read('mimetype')
doc = OpenDocument(mimetype, add_generator=False)
# Look in the manifest file to see if which of the four files there are
manifestpart = z.read('META-INF/manifest.xml')
manifest = manifestlist(manifestpart)
for xmlfile in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'):
for xmlfile in (objectpath+'settings.xml', objectpath+'meta.xml', objectpath+'content.xml', objectpath+'styles.xml'):
if not manifest.has_key(xmlfile):
continue
try:
@ -580,7 +599,19 @@ def load(odffile):
parser.parse(inpsrc)
del doc._parsing
except KeyError, v: pass
# FIXME: Add subobjects correctly here
def load(odffile):
""" Load an ODF file into memory
Returns a reference to the structure
"""
z = zipfile.ZipFile(odffile)
mimetype = z.read('mimetype')
doc = OpenDocument(mimetype, add_generator=False)
# Look in the manifest file to see if which of the four files there are
manifestpart = z.read('META-INF/manifest.xml')
manifest = manifestlist(manifestpart)
__loadxmlparts(z, manifest, doc, '')
for mentry,mvalue in manifest.items():
if mentry[:9] == "Pictures/" and len(mentry) > 9:
doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry))
@ -588,6 +619,13 @@ def load(odffile):
doc.addThumbnail(z.read(mentry))
elif mentry in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'):
pass
# Load subobjects into structure
elif mentry[:7] == "Object " and len(mentry) < 11 and mentry[-1] == "/":
subdoc = OpenDocument(mvalue['media-type'], add_generator=False)
doc.addObject(subdoc, "/" + mentry[:-1])
__loadxmlparts(z, manifest, subdoc, mentry)
elif mentry[:7] == "Object ":
pass # Don't load subobjects as opaque objects
else:
if mvalue['full-path'][-1] == '/':
doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None))
@ -612,4 +650,5 @@ def load(odffile):
elif mimetype[:42] == 'application/vnd.oasis.opendocument.formula':
doc.formula = b[0].firstChild
return doc
# vim: set expandtab sw=4 :