diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 674302bf6d..e619d00a31 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -398,9 +398,10 @@ def relpath(target, base=os.curdir): Base can be a directory specified either as absolute or relative to current dir. """ - if not os.path.exists(target): - raise OSError, 'Target does not exist: '+target - + #if not os.path.exists(target): + # raise OSError, 'Target does not exist: '+target + if target == base: + raise ValueError('target and base are both: %s'%target) if not os.path.isdir(base): raise OSError, 'Base is not a directory or does not exist: '+base @@ -408,13 +409,13 @@ def relpath(target, base=os.curdir): target_list = (os.path.abspath(target)).split(os.sep) # On the windows platform the target may be on a completely different drive from the base. - if iswindows and base_list[0] <> target_list[0]: + if iswindows and base_list[0] != target_list[0]: raise OSError, 'Target is on a different drive to base. Target: '+target_list[0].upper()+', base: '+base_list[0].upper() # Starting from the filepath root, work out how much of the filepath is # shared by base and target. for i in range(min(len(base_list), len(target_list))): - if base_list[i] <> target_list[i]: break + if base_list[i] != target_list[i]: break else: # If we broke out of the loop, i is pointing to the first differing path elements. # If we didn't break out of the loop, i is pointing to identical path elements. diff --git a/src/calibre/ebooks/lrf/epub/convert_from.py b/src/calibre/ebooks/lrf/epub/convert_from.py index 4dc531f86b..701681243e 100644 --- a/src/calibre/ebooks/lrf/epub/convert_from.py +++ b/src/calibre/ebooks/lrf/epub/convert_from.py @@ -29,7 +29,7 @@ def generate_html(pathtoepub, logger): zip_extract(pathtoepub, tdir) except: if os.path.exists(tdir) and os.path.isdir(tdir): - shutil.rmtree(tdir) + shutil.rmtree(tdir) raise ConversionError, '.epub extraction failed' return tdir @@ -42,7 +42,7 @@ def process_file(path, options, logger=None): tdir = generate_html(epub, logger) try: ocf = OCFDirReader(tdir) - htmlfile = ocf.opf.spine.items().next().href + htmlfile = ocf.opf.spine[0].path options.opf = os.path.join(tdir, ocf.container[OPF.MIMETYPE]) if not options.output: ext = '.lrs' if options.lrs else '.lrf' @@ -61,7 +61,7 @@ def process_file(path, options, logger=None): def main(args=sys.argv, logger=None): parser = option_parser() options, args = parser.parse_args(args) - if len(args) != 2: + if len(args) != 2: parser.print_help() print print 'No epub file specified' diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index a69b47bab0..27bf152597 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -1969,7 +1969,7 @@ def try_opf(path, options, logger): continue if not getattr(options, 'cover', None) and orig_cover is not None: options.cover = orig_cover - options.spine = [i.href for i in opf.spine.items()] + options.spine = [i.path for i in opf.spine if i.path] if not getattr(options, 'toc', None): options.toc = opf.toc except Exception: diff --git a/src/calibre/ebooks/lrf/lit/convert_from.py b/src/calibre/ebooks/lrf/lit/convert_from.py index d93eaf9534..3d8a3f97b0 100644 --- a/src/calibre/ebooks/lrf/lit/convert_from.py +++ b/src/calibre/ebooks/lrf/lit/convert_from.py @@ -57,7 +57,7 @@ def process_file(path, options, logger=None): if opf: path = opf[0] opf = OPFReader(path) - htmlfile = opf.spine.items().next().href.replace('&', '%26') #convertlit replaces & with %26 + htmlfile = opf.spine[0].path.replace('&', '%26') #convertlit replaces & with %26 options.opf = path else: l = glob.glob(os.path.join(tdir, '*toc*.htm*')) diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index f744bb7f41..eacc4f04e0 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -1,14 +1,17 @@ +#!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + """ -Provides metadata editing support for PDF and RTF files. For LRF metadata, use -the L{lrf.meta} module. +Provides abstraction for metadata reading.writing from a variety of ebook formats. """ -__docformat__ = "epytext" -__author__ = "Kovid Goyal " +import os, mimetypes +from urllib import unquote, quote +from urlparse import urlparse -from calibre import __version__ as VERSION +from calibre import __version__ as VERSION, relpath from calibre import OptionParser def get_parser(extension): @@ -24,6 +27,125 @@ def get_parser(extension): help=_('Set the comment')) return parser +class Resource(object): + ''' + Represents a resource (usually a file on the filesystem or a URL pointing + to the web. Such resources are commonly referred to in OPF files. + + They have the interface: + + :member:`path` + :member:`mime_type` + :method:`href` + + ''' + + def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True): + self._href = None + self._basedir = None + self.path = None + self.fragment = '' + try: + self.mime_type = mimetypes.guess_type(href_or_path)[0] + except: + self.mime_type = None + if self.mime_type is None: + self.mime_type = 'application/octet-stream' + if is_path: + path = href_or_path + if not os.path.isabs(path): + path = os.path.abspath(os.path.join(path, basedir)) + self.path = path + else: + url = urlparse(href_or_path) + if url[0] not in ('', 'file'): + self._href = href_or_path + else: + self.path = os.path.abspath(os.path.join(basedir, unquote(url[2]).replace('/', os.sep))) + self.fragment = unquote(url[-1]) + + + def href(self, basedir=None): + ''' + Return a URL pointing to this resource. If it is a file on the filesystem + the URL is relative to `basedir`. + + `basedir`: If None, the basedir of this resource is used (see :method:`set_basedir`). + If this resource has no basedir, then the current working directory is used as the basedir. + ''' + if basedir is None: + if self._basedir: + basedir = self._basedir + else: + basedir = os.getcwd() + if self.path is None: + return self._href + frag = '#'+quote(self.fragment) if self.fragment else '' + if self.path == basedir: + return ''+frag + rpath = relpath(self.path, basedir) + + return quote(rpath.replace(os.sep, '/'))+frag + + def set_basedir(self, path): + self._basedir = path + + def basedir(self): + return self._basedir + + def __repr__(self): + return 'Resource(%s, %s)'%(repr(self.path), repr(self.href())) + + +class ResourceCollection(object): + + def __init__(self): + self._resources = [] + + def __iter__(self): + for r in self._resources: + yield r + + def __len__(self): + return len(self._resources) + + def __getitem__(self, index): + return self._resources[index] + + def __bool__(self): + return len(self._resources) > 0 + + def __str__(self): + resources = map(repr, self) + return '[%s]'%', '.join(resources) + + def __repr__(self): + return str(self) + + def append(self, resource): + if not isinstance(resource, Resource): + raise ValueError('Can only append objects of type Resource') + self._resources.append(resource) + + def remove(self, resource): + self._resources.remove(resource) + + @staticmethod + def from_directory_contents(top, topdown=True): + collection = ResourceCollection() + for spec in os.walk(top, topdown=topdown): + path = os.path.abspath(os.path.join(spec[0], spec[1])) + res = Resource.from_path(path) + res.set_basedir(top) + collection.append(res) + return collection + + def set_basedir(self, path): + for res in self: + res.set_basedir(path) + + + class MetaInformation(object): '''Convenient encapsulation of book metadata''' @@ -32,7 +154,7 @@ class MetaInformation(object): ans = MetaInformation(mi.title, mi.authors) for attr in ('author_sort', 'title_sort', 'comments', 'category', 'publisher', 'series', 'series_index', 'rating', - 'isbn', 'tags', 'cover_data', 'application_id', + 'isbn', 'tags', 'cover_data', 'application_id', 'guide', 'manifest', 'spine', 'toc', 'cover', 'language'): if hasattr(mi, attr): setattr(ans, attr, getattr(mi, attr)) @@ -70,6 +192,7 @@ class MetaInformation(object): self.manifest = getattr(mi, 'manifest', None) self.toc = getattr(mi, 'toc', None) self.spine = getattr(mi, 'spine', None) + self.guide = getattr(mi, 'guide', None) self.cover = getattr(mi, 'cover', None) def smart_update(self, mi): @@ -86,7 +209,7 @@ class MetaInformation(object): for attr in ('author_sort', 'title_sort', 'comments', 'category', 'publisher', 'series', 'series_index', 'rating', 'isbn', 'application_id', 'manifest', 'spine', 'toc', - 'cover', 'language'): + 'cover', 'language', 'guide'): if hasattr(mi, attr): val = getattr(mi, attr) if val is not None: @@ -116,7 +239,7 @@ class MetaInformation(object): if self.tags: ans += u'Tags : ' +unicode(self.tags) + '\n' if self.series: - ans += u'Series : '+unicode(self.series) + '(%d)'%self.series_index + ans += u'Series : '+unicode(self.series) + ' #%d\n'%self.series_index if self.language: ans += u'Language : ' + unicode(self.language) + u'\n' return ans.strip() diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py index 892acec3f5..2b8c3a4b9f 100644 --- a/src/calibre/ebooks/metadata/lit.py +++ b/src/calibre/ebooks/metadata/lit.py @@ -5,7 +5,9 @@ Support for reading the metadata from a lit file. ''' import sys, struct, cStringIO, os +from itertools import repeat +from calibre import relpath from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf import OPFReader @@ -188,7 +190,7 @@ class UnBinary(object): def write_spaces(self, depth): - self.buf.write(u' '.join(u'' for i in range(depth))) + self.buf.write(u''.join(repeat(' ', depth))) def item_path(self, internal_id): for i in self.manifest: @@ -692,6 +694,7 @@ class LitFile(object): try: self._stream.seek(self.content_offset + entry.offset) raw = self._stream.read(entry.size) + xml = \ '''\ @@ -721,9 +724,10 @@ def get_metadata(stream): try: litfile = LitFile(stream) src = litfile.meta.encode('utf-8') - mi = OPFReader(cStringIO.StringIO(src)) + mi = OPFReader(cStringIO.StringIO(src), dir=os.getcwd()) cover_url, cover_item = mi.cover, None if cover_url: + cover_url = relpath(cover_url, os.getcwd()) for item in litfile.manifest: if item.path == cover_url: cover_item = item.internal diff --git a/src/calibre/ebooks/metadata/opf.py b/src/calibre/ebooks/metadata/opf.py index 51b392c70f..df84bc59da 100644 --- a/src/calibre/ebooks/metadata/opf.py +++ b/src/calibre/ebooks/metadata/opf.py @@ -1,93 +1,205 @@ +import cStringIO __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' import uuid '''Read/Write metadata from Open Packaging Format (.opf) files.''' -import sys, re, os, mimetypes -from urllib import unquote -from urlparse import urlparse -import xml.dom.minidom as dom -from itertools import repeat +import sys, re, os, glob from calibre import __appname__ from calibre.ebooks.metadata import MetaInformation -from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup +from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup from calibre.ebooks.lrf import entity_to_unicode -from calibre.ebooks.metadata import get_parser +from calibre.ebooks.metadata import get_parser, Resource, ResourceCollection from calibre.ebooks.metadata.toc import TOC -class ManifestItem(object): - def __init__(self, item, cwd): - self.id = item['id'] if item.has_key('id') else '' - self.href = urlparse(unquote(item['href']))[2] if item.has_key('href') else '' - if not os.path.isabs(self.href): - self.href = os.path.join(cwd, self.href) - self.href = os.path.normpath(self.href) - if not os.path.exists(self.href): # Bug in Baen OPF files - nhref = os.path.join(os.path.dirname(self.href), os.path.basename(self.href).replace('__p_.htm', '__c_.htm')) - if os.path.exists(nhref): - self.href = nhref - self.media_type = item['media-type'] if item.has_key('media-type') else '' +class OPFSoup(BeautifulStoneSoup): + + def __init__(self, raw): + BeautifulStoneSoup.__init__(self, raw, + convertEntities=BeautifulSoup.HTML_ENTITIES, + selfClosingTags=['item', 'itemref', 'reference']) + +class ManifestItem(Resource): + + @staticmethod + def from_opf_manifest_item(item, basedir): + if item.has_key('href'): + res = ManifestItem(item['href'], basedir=basedir, is_path=False) + mt = item.get('media-type', '').strip() + if mt: + res.mime_type = mt + return res + + @apply + def media_type(): + def fget(self): + return self.mime_type + def fset(self, val): + self.mime_type = val + return property(fget=fget, fset=fset) + def __unicode__(self): - return u''%(self.id, self.href, self.media_type) + return u''%(self.id, self.href(), self.media_type) + + def __str__(self): + return unicode(self).encode('utf-8') + + def __repr__(self): + return unicode(self) + def __getitem__(self, index): if index == 0: - return self.href + return self.href() if index == 1: return self.media_type raise IndexError('%d out of bounds.'%index) -class Manifest(list): +class Manifest(ResourceCollection): - def __init__(self, soup, dir): - manifest = soup.find('manifest') - if manifest is not None: - for item in manifest.findAll('item'): - self.append(ManifestItem(item, dir)) + @staticmethod + def from_opf_manifest_element(manifest, dir): + m = Manifest() + for item in manifest.findAll('item'): + try: + m.append(ManifestItem.from_opf_manifest_item(item, dir)) + id = item.get('id', '') + if not id: + id = 'id%d'%m.next_id + m[-1].id = id + m.next_id += 1 + except ValueError: + continue + return m + + @staticmethod + def from_paths(entries): + ''' + `entries`: List of (path, mime-type) If mime-type is None it is autodetected + ''' + m = Manifest() + for path, mt in entries: + mi = ManifestItem(path, is_path=True) + if mt: + mi.mime_type = mt + mi.id = 'id%d'%m.next_id + m.next_id += 1 + m.append(mi) + return m + + def __init__(self): + ResourceCollection.__init__(self) + self.next_id = 1 + def item(self, id): for i in self: if i.id == id: - return i + return i + + def id_for_path(self, path): + path = os.path.normpath(os.path.abspath(path)) + for i in self: + if i.path and os.path.normpath(i.path) == path: + return i.id + + def path_for_id(self, id): + for i in self: + if i.id == id: + return i.path -class Spine(object): +class Spine(ResourceCollection): - def __init__(self, soup, manifest): + class Item(Resource): + + def __init__(self, idfunc, *args, **kwargs): + Resource.__init__(self, *args, **kwargs) + self.is_linear = True + self.id = idfunc(self.path) + + @staticmethod + def from_opf_spine_element(spine, manifest): + s = Spine(manifest) + for itemref in spine.findAll('itemref'): + if itemref.has_key('idref'): + r = Spine.Item(s.manifest.id_for_path, + s.manifest.path_for_id(itemref['idref']), is_path=True) + r.is_linear = itemref.get('linear', 'yes') == 'yes' + s.append(r) + return s + + @staticmethod + def from_paths(paths, manifest): + s = Spine(manifest) + for path in paths: + try: + s.append(Spine.Item(s.manifest.id_for_path, path, is_path=True)) + except: + continue + return s + + + + def __init__(self, manifest): + ResourceCollection.__init__(self) self.manifest = manifest - self.linear_ids, self.nonlinear_ids = [], [] - spine = soup.find('spine') - if spine is not None: - for itemref in spine.findAll('itemref'): - if itemref.has_key('idref'): - if itemref.get('linear', 'yes') == 'yes': - self.linear_ids.append(itemref['idref']) - else: - self.nonlinear_ids.append(itemref['idref']) + def linear_items(self): - for id in self.linear_ids: - yield self.manifest.item(id) - + for r in self: + if r.is_linear: + yield r.path def nonlinear_items(self): - for id in self.nonlinear_ids: - yield self.manifest.item(id) - - + for r in self: + if not r.is_linear: + yield r.path + def items(self): - for i in self.linear_ids + self.nonlinear_ids: - mi = self.manifest.item(i) - if getattr(mi, 'href', None): - yield mi - - def __iter__(self): - for i in self.linear_ids + self.nonlinear_ids: - yield i - + for i in self: + yield i.path + +class Guide(ResourceCollection): + + class Reference(Resource): + + @staticmethod + def from_opf_resource_item(ref, basedir): + title, href, type = ref.get('title', ''), ref['href'], ref['type'] + res = Guide.Reference(href, basedir, is_path=False) + res.title = title + res.type = type + return res + + def __repr__(self): + ans = '' + + + @staticmethod + def from_opf_guide(guide_elem, base_dir=os.getcwdu()): + coll = Guide() + for ref in guide_elem.findAll('reference'): + try: + ref = Guide.Reference.from_opf_resource_item(ref, base_dir) + coll.append(ref) + except: + continue + return coll + + def set_cover(self, path): + map(self.remove, [i for i in self if 'cover' in i.type.lower()]) + for type in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'): + self.append(Guide.Reference(path, is_path=True)) + self[-1].type = type + self[-1].title = '' + class standard_field(object): @@ -97,8 +209,6 @@ class standard_field(object): def __get__(self, obj, typ=None): return getattr(obj, 'get_'+self.name)() - def __set__(self, obj, val): - getattr(obj, 'set_'+self.name)(val) class OPF(MetaInformation): @@ -109,6 +219,7 @@ class OPF(MetaInformation): application_id = standard_field('application_id') title = standard_field('title') authors = standard_field('authors') + language = standard_field('language') title_sort = standard_field('title_sort') author_sort = standard_field('author_sort') comments = standard_field('comments') @@ -121,93 +232,15 @@ class OPF(MetaInformation): rating = standard_field('rating') tags = standard_field('tags') - HEADER = '''\ - - -''' def __init__(self): raise NotImplementedError('Abstract base class') - def _initialize(self): - if not hasattr(self, 'soup'): - self.soup = BeautifulStoneSoup(u'''\ -%s - - - - - -'''%(__appname__, self.HEADER)) - - def _commit(self, doc): - self.soup = BeautifulStoneSoup(doc.toxml('utf-8'), fromEncoding='utf-8') - - def _find_element(self, package, name, attrs=[]): - tags = package.getElementsByTagName(name) - for tag in tags: - match = True - for attr, vattr in attrs: - if tag.getAttribute(attr) != vattr: - match = False - break - if match: - return tag - return None - - def _set_metadata_element(self, name, value, attrs=[], - type='dc-metadata', replace=False): - self._initialize() - if isinstance(value, basestring): - value = [value] - attrs = [attrs] - - doc = dom.parseString(self.soup.__str__('UTF-8').strip()) - package = doc.documentElement - metadata = package.getElementsByTagName('metadata')[0] - - dcms = metadata.getElementsByTagName(type) - if dcms: - dcm = dcms[0] - else: - dcm = doc.createElement(type) - metadata.appendChild(dcm) - metadata.appendChild(doc.createTextNode('\n')) - tags = dcm.getElementsByTagName(name) - if tags and not replace: - for tag in tags: - tag.parentNode.removeChild(tag) - tag.unlink() - - for val, vattrs in zip(value, attrs): - if replace: - el = self._find_element(package, name, vattrs) - if el: - el.parentNode.removeChild(el) - el.unlink() - el = doc.createElement(name) - el.appendChild(doc.createTextNode(val)) - for attr, vattr in vattrs: - el.setAttribute(attr, vattr) - dcm.appendChild(el) - dcm.appendChild(doc.createTextNode('\n')) - self._commit(doc) - - def get_title(self): title = self.soup.package.metadata.find('dc:title') if title: return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string).strip() return self.default_title.strip() - def set_title(self, title): - if not title: - title = 'Unknown' - self._set_metadata_element('dc:title', title) - def get_authors(self): creators = self.soup.package.metadata.findAll('dc:creator') for elem in creators: @@ -225,12 +258,6 @@ class OPF(MetaInformation): return [a.strip() for a in ans] return [] - def set_authors(self, authors): - if not authors: - authors = ['Unknown'] - attrs = list(repeat([('role', 'aut')], len(authors))) - self._set_metadata_element('dc:creator', authors, attrs) - def get_author_sort(self): creators = self.soup.package.metadata.findAll('dc:creator') for elem in creators: @@ -242,18 +269,6 @@ class OPF(MetaInformation): return self.ENTITY_PATTERN.sub(entity_to_unicode, fa).strip() if fa else None return None - def set_author_sort(self, aus): - if not aus: - aus = '' - self._initialize() - if not self.authors: - self.set_authors([]) - doc = dom.parseString(self.soup.__str__('UTF-8')) - package = doc.documentElement - aut = package.getElementsByTagName('dc:creator')[0] - aut.setAttribute('file-as', aus) - self._commit(doc) - def get_title_sort(self): title = self.soup.package.find('dc:title') if title: @@ -261,61 +276,29 @@ class OPF(MetaInformation): return title['file-as'].strip() return None - def set_title_sort(self, title_sort): - if not title_sort: - title_sort = '' - self._initialize() - if not self.title: - self.title = None - doc = dom.parseString(self.soup.__str__('UTF-8')) - package = doc.documentElement - tit = package.getElementsByTagName('dc:title')[0] - tit.setAttribute('file-as', title_sort) - self._commit(doc) - def get_comments(self): comments = self.soup.find('dc:description') if comments: return self.ENTITY_PATTERN.sub(entity_to_unicode, comments.string).strip() return None - def set_comments(self, comments): - if not comments: - comments = '' - self._set_metadata_element('dc:description', comments) - def get_uid(self): package = self.soup.find('package') if package.has_key('unique-identifier'): return package['unique-identifier'] - def set_uid(self, uid): - package = self.soup.find('package') - package['unique-identifier'] = str(uid) - def get_category(self): category = self.soup.find('dc:type') if category: return self.ENTITY_PATTERN.sub(entity_to_unicode, category.string).strip() return None - def set_category(self, category): - if not category: - category = '' - self._set_metadata_element('dc:type', category) - def get_publisher(self): publisher = self.soup.find('dc:publisher') if publisher: return self.ENTITY_PATTERN.sub(entity_to_unicode, publisher.string).strip() return None - def set_publisher(self, category): - if not category: - category = 'Unknown' - self._set_metadata_element('dc:publisher', category) - - def get_isbn(self): for item in self.soup.package.metadata.findAll('dc:identifier'): scheme = item.get('scheme') @@ -325,51 +308,27 @@ class OPF(MetaInformation): return str(item.string).strip() return None - def set_isbn(self, isbn): - if isbn: - self._set_metadata_element('dc:identifier', isbn, [('scheme', 'ISBN')], - replace=True) - + def get_language(self): + item = self.soup.package.metadata.find('dc:language') + if not item: + return _('Unknown') + return ''.join(item.findAll(text=True)).strip() + def get_application_id(self): for item in self.soup.package.metadata.findAll('dc:identifier'): if item.has_key('scheme') and item['scheme'] == __appname__: return str(item.string).strip() return None - def set_application_id(self, val): - if val: - self._set_metadata_element('dc:identifier', str(val), [('scheme', __appname__), ('id', __appname__+'_id')], - replace=True) - def get_cover(self): - guide = self.soup.package.find('guide') - if guide: - references = guide.findAll('reference') - for reference in references: - type = reference.get('type') - if not type: - continue - if type.lower() in ['cover', 'other.ms-coverimage-standard', "other.ms-coverimage"]: - return reference.get('href') - return None - - def set_cover(self, path, type='cover'): - self._initialize() - doc = dom.parseString(self.soup.__str__('UTF-8')) - package = doc.documentElement - guide = package.getElementsByTagName('guide') - if guide: - guide = guide[0] - else: - guide = doc.createElement('guide') - package.appendChild(guide) - el = self._find_element(guide, 'reference', [('type', type)]) - if not el: - el = doc.createElement('reference') - guide.appendChild(el) - el.setAttribute('type', type) - el.setAttribute('href', path) - self._commit(doc) + guide = getattr(self, 'guide', []) + if not guide: + guide = [] + references = [ref for ref in guide if 'cover' in ref.type.lower()] + for candidate in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'): + matches = [r for r in references if r.type.lower() == candidate and r.path] + if matches: + return matches[0].path def possible_cover_prefixes(self): isbn, ans = [], [] @@ -388,11 +347,6 @@ class OPF(MetaInformation): return str(s.string).strip() return None - def set_series(self, val): - if not val: - val = '' - self._set_metadata_element('series', val, type='x-metadata') - def get_series_index(self): s = self.soup.package.metadata.find('series-index') if s: @@ -402,11 +356,6 @@ class OPF(MetaInformation): return None return None - def set_series_index(self, val): - if not val: - val = 1 - self._set_metadata_element('series-index', str(val), type='x-metadata') - def get_rating(self): xm = self.soup.package.metadata.find('x-metadata') if not xm: @@ -419,11 +368,6 @@ class OPF(MetaInformation): return None return None - def set_rating(self, val): - if not val: - val = 0 - self._set_metadata_element('rating', str(val), type='x-metadata') - def get_tags(self): ans = [] subs = self.soup.findAll('dc:subject') @@ -433,42 +377,7 @@ class OPF(MetaInformation): ans.append(val) return [unicode(a).strip() for a in ans] - def set_tags(self, tags): - self._set_metadata_element('dc:subject', tags) - - def write(self, stream): - from lxml import etree - root = etree.fromstring(unicode(self.soup)) - root.text = '\n%4s'%' ' - for child in root: - child.text = '\n%8s'%' ' - child.tail = '\n%4s'%' ' if child is not root[-1] else '\n' - for grandchild in child: - grandchild.tail = '\n%8s'%' ' if grandchild is not child[-1] else '\n%4s'%' ' - - metadata = root.find('metadata') - if metadata is not None: - for parent in ['dc-metadata', 'x-metadata']: - parent = metadata.find(parent) - if parent is None: - continue - parent.text = '\n%12s'%' ' - for child in parent: - child.tail = '\n%8s'%' ' if child is parent[-1] else '\n%12s'%' ' - - def fix_self_closing_tags(el): - ''' Makes tags that have only whitespace content self closing ''' - if len(el) == 0 and (el.text is None or el.text.strip() == ''): - el.text = None - for child in el: - fix_self_closing_tags(child) - - fix_self_closing_tags(root) - - raw = self.HEADER + etree.tostring(root, encoding='UTF-8') - - stream.write(raw+'\n') - + class OPFReader(OPF): def __init__(self, stream, dir=os.getcwdu()): @@ -480,15 +389,27 @@ class OPFReader(OPF): self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown' if hasattr(stream, 'seek'): stream.seek(0) - self.soup = BeautifulStoneSoup(stream.read()) + self.soup = OPFSoup(stream.read()) if manage: stream.close() - self.manifest = Manifest(self.soup, dir) - self.spine = Spine(self.soup, self.manifest) + self.manifest = Manifest() + m = self.soup.find('manifest') + if m is not None: + self.manifest = Manifest.from_opf_manifest_element(m, dir) + self.spine = None + spine = self.soup.find('spine') + if spine is not None: + self.spine = Spine.from_opf_spine_element(spine, self.manifest) + self.toc = TOC(base_path=dir) self.toc.read_from_opf(self) + guide = self.soup.find('guide') + if guide is not None: + self.guide = Guide.from_opf_guide(guide, dir) + self.base_dir = dir self.cover_data = (None, None) + class OPFCreator(MetaInformation): def __init__(self, base_path, *args, **kwargs): @@ -502,47 +423,34 @@ class OPFCreator(MetaInformation): self.base_path = os.path.abspath(base_path) if self.application_id is None: self.application_id = str(uuid.uuid4()) - self.toc = None - if isinstance(self.manifest, Manifest): - manifest = [] - for path, mt in self.manifest: - if not path.startswith(self.base_path): - raise ValueError('Inavlid manifest item %s for base path %s'%(path, self.base_path)) - path = path[len(self.base_path)+1:] - manifest.append((path, mt)) - self.manifest = manifest + if not isinstance(self.toc, TOC): + self.toc = None if not self.authors: self.authors = [_('Unknown')] - + if self.guide is None: + self.guide = Guide() + if self.cover: + self.guide.set_cover(self.cover) + + def create_manifest(self, entries): ''' Create - @param entries: List of (path, mime-type) - @param base_path: It is used to convert each path into a path relative to itself - @type entries: list of 2-tuples + + `entries`: List of (path, mime-type) If mime-type is None it is autodetected ''' - rentries = [] - base_path = self.base_path - mimetypes.init() - for href, mt in entries: - href = os.path.abspath(href) - if not href.startswith(base_path): - raise ValueError('OPF should only refer to files below it. %s is above %s'%(href, base_path)) - href = href[len(base_path)+1:].replace(os.sep, '/') - if not mt: - mt = mimetypes.guess_type(href)[0] - if not mt: - mt = '' - rentries.append((href, mt)) - - self.manifest = rentries + entries = map(lambda x: x if os.path.isabs(x[0]) else + (os.path.abspath(os.path.join(self.base_path, x[0])), x[1]), + entries) + self.manifest = Manifest.from_paths(entries) + self.manifest.set_basedir(self.base_path) def create_manifest_from_files_in(self, files_and_dirs): - #self.base_path = os.path.commonprefix(files_and_dirs) entries = [] def dodir(dir): - for root, dirs, files in os.walk(dir): + for spec in os.walk(dir): + root, files = spec[0], spec[-1] for name in files: path = os.path.join(root, name) if os.path.isfile(path): @@ -558,47 +466,48 @@ class OPFCreator(MetaInformation): def create_spine(self, entries): ''' - Create the element. Must first call L{create_manifest}. - @param: List of paths - @type param: list of strings - ''' - self.spine = [] + Create the element. Must first call :method:`create_manifest`. - for path in entries: - if not os.path.isabs(path): - path = os.path.join(self.base_path, path) - if not path.startswith(self.base_path): - raise ValueError('Invalid entry %s for base path %s'%(path, self.base_path)) - href = path[len(self.base_path)+1:] - in_manifest = False - for i, m in enumerate(self.manifest): - if m[0] == href: - in_manifest = True - break - if not in_manifest: - raise ValueError('%s is not in the manifest. (%s)'%(href, path)) - self.spine.append(i) - - + `entries`: List of paths + ''' + entries = map(lambda x: x if os.path.isabs(x) else + os.path.abspath(os.path.join(self.base_path, x)), entries) + self.spine = Spine.from_paths(entries, self.manifest) def set_toc(self, toc): ''' - Set the toc. You must call L{create_spine} before calling this + Set the toc. You must call :method:`create_spine` before calling this method. - @param toc: A Table of Contents - @type toc: L{TOC} + + `toc`: A :class:`TOC` object ''' self.toc = toc + def create_guide(self, guide_element): + self.guide = Guide.from_opf_guide(guide_element, self.base_path) + self.guide.set_basedir(self.base_path) + def render(self, opf_stream, ncx_stream=None): from calibre.resources import opf_template - from genshi.template import MarkupTemplate + from calibre.utils.genshi.template import MarkupTemplate template = MarkupTemplate(opf_template) + if self.manifest: + self.manifest.set_basedir(self.base_path) + if not self.guide: + self.guide = Guide() + self.guide.set_basedir(self.base_path) + if self.cover: + cover = self.cover + if not os.path.isabs(cover): + cover = os.path.abspath(os.path.join(self.base_path, cover)) + self.guide.set_cover(cover) opf = template.generate(__appname__=__appname__, mi=self).render('xml') opf_stream.write(opf) + opf_stream.flush() toc = getattr(self, 'toc', None) if toc is not None and ncx_stream is not None: toc.render(ncx_stream, self.application_id) + ncx_stream.flush() def option_parser(): return get_parser('opf') @@ -609,18 +518,35 @@ def main(args=sys.argv): if len(args) != 2: parser.print_help() return 1 - mi = MetaInformation(OPFReader(open(args[1], 'rb'))) + mi = MetaInformation(OPFReader(open(args[1], 'rb'), os.path.abspath(os.path.dirname(args[1])))) + write = False if opts.title is not None: mi.title = opts.title.replace('&', '&').replace('<', '<').replace('>', '>') + write = True if opts.authors is not None: aus = [i.strip().replace('&', '&').replace('<', '<').replace('>', '>') for i in opts.authors.split(',')] mi.authors = aus + write = True if opts.category is not None: mi.category = opts.category.replace('&', '&').replace('<', '<').replace('>', '>') + write = True if opts.comment is not None: mi.comments = opts.comment.replace('&', '&').replace('<', '<').replace('>', '>') - mo = OPFCreator(os.getcwd(), mi) - mo.render(open(args[1], 'wb')) + write = True + if write: + mo = OPFCreator(os.path.dirname(args[1]), mi) + ncx = cStringIO.StringIO() + mo.render(open(args[1], 'wb'), ncx) + ncx = ncx.getvalue() + if ncx: + f = glob.glob(os.path.join(os.path.dirname(args[1]), '*.ncx')) + if f: + f = open(f[0], 'wb') + else: + f = open(os.path.splitext(args[1])[0]+'.ncx', 'wb') + f.write(ncx) + f.close() + print MetaInformation(OPFReader(open(args[1], 'rb'), os.path.abspath(os.path.dirname(args[1])))) return 0 if __name__ == '__main__': diff --git a/src/calibre/ebooks/metadata/opf.xml b/src/calibre/ebooks/metadata/opf.xml index b74c67e085..10623715ff 100644 --- a/src/calibre/ebooks/metadata/opf.xml +++ b/src/calibre/ebooks/metadata/opf.xml @@ -23,24 +23,23 @@ - - - - - - + + + + - - - - - - - - + + - + + + + + + + + diff --git a/src/calibre/ebooks/metadata/toc.py b/src/calibre/ebooks/metadata/toc.py index e63b5363ae..a966dd6fae 100644 --- a/src/calibre/ebooks/metadata/toc.py +++ b/src/calibre/ebooks/metadata/toc.py @@ -65,8 +65,8 @@ class TOC(list): toc = opfreader.soup.find('guide').find('reference', attrs={'type':'toc'})['href'] except: for item in opfreader.manifest: - if 'toc' in item.href.lower(): - toc = item.href + if 'toc' in item.href().lower(): + toc = item.href() break if toc is not None: @@ -120,6 +120,9 @@ class TOC(list): process_navpoint(c, nd) nm = soup.find('navmap') + if nm is None: + raise ValueError('NCX files must have a element.') + for elem in nm: if getattr(elem, 'name', None) == 'navpoint': process_navpoint(elem, self) @@ -138,7 +141,7 @@ class TOC(list): def render(self, stream, uid): from calibre.resources import ncx_template - from genshi.template import MarkupTemplate + from calibre.utils.genshi.template import MarkupTemplate doctype = ('ncx', "-//NISO//DTD ncx 2005-1//EN", "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd") template = MarkupTemplate(ncx_template) raw = template.generate(uid=uid, toc=self, __appname__=__appname__) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 87e312ec23..f606a1e183 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -190,19 +190,11 @@ class MobiReader(object): open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx) def cleanup(self): - self.processed_html = re.sub(r'
', '', self.processed_html) + self.processed_html = re.sub(r'
', '', self.processed_html) def create_opf(self, htmlfile, guide=None): mi = self.book_header.exth.mi opf = OPFCreator(os.path.dirname(htmlfile), mi) - guide_elements, toc = [], None - if guide: - for elem in guide.findAll('reference'): - if elem['type'] == 'toc': - toc = elem['href'] - continue - guide_elements.append((elem['title'], elem['type'], elem['href'])) - opf.extra_mobi_guide_elements = guide_elements if hasattr(self.book_header.exth, 'cover_offset'): opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1) manifest = [(htmlfile, 'text/x-oeb1-document')] @@ -212,7 +204,12 @@ class MobiReader(object): opf.create_manifest(manifest) opf.create_spine([os.path.basename(htmlfile)]) - + toc = None + if guide: + opf.create_guide(guide) + for ref in opf.guide: + if ref.type.lower() == 'toc': + toc = ref.href() if toc: index = self.processed_html.find('%s: %s'%exception msg += u'

Failed to perform job: '+description msg += u'

Detailed traceback:

'
+        if not isinstance(formatted_traceback, unicode):
+            formatted_traceback = formatted_traceback.decode(preferred_encoding, 'replace')
         msg += formatted_traceback + '
' msg += '

Log:

'
         if log:
diff --git a/src/calibre/gui2/main_window.py b/src/calibre/gui2/main_window.py
index b0ddc0a72d..c84ce915fe 100644
--- a/src/calibre/gui2/main_window.py
+++ b/src/calibre/gui2/main_window.py
@@ -40,8 +40,6 @@ class MainWindow(QMainWindow):
             self.__console_redirect = DebugWindow(self)
             sys.stdout = sys.stderr = self.__console_redirect
             self.__console_redirect.show()
-            print 'testing 1'
-            print 'testing 2'
     
     def unhandled_exception(self, type, value, tb):
         try:
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index fc35a54b78..a7999050bb 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -354,7 +354,7 @@ def install_man_pages(fatal_errors):
         prog = src[:src.index('=')].strip()
         if prog in ('prs500', 'pdf-meta', 'epub-meta', 'lit-meta', 
                     'markdown-calibre', 'calibre-debug', 'fb2-meta',
-                    'calibre-fontconfig'):
+                    'calibre-fontconfig', 'calibre-parallel'):
             continue
         help2man = ('help2man', prog, '--name', 'part of %s'%__appname__,
                     '--section', '1', '--no-info', '--include',
diff --git a/src/calibre/trac/plugins/download.py b/src/calibre/trac/plugins/download.py
index fe66dad363..d22efcf0f0 100644
--- a/src/calibre/trac/plugins/download.py
+++ b/src/calibre/trac/plugins/download.py
@@ -37,7 +37,6 @@ class Distribution(object):
         ('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'),
         ('convertlit', '1.8', 'convertlit', None, None),
         ('lxml', '1.3.3', 'lxml', 'python-lxml', 'python-lxml'),
-        ('genshi', '0.4.4', 'genshi', 'python-genshi', 'python-genshi'),
         ('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'),
         ]
     
@@ -231,7 +230,7 @@ If not, head over to >> doc = tag.p('Some text and ', tag.a('a link', href='http://example.org/'), '.')
+>>> doc
+
+
+This produces an `Element` instance which can be further modified to add child
+nodes and attributes. This is done by "calling" the element: positional
+arguments are added as child nodes (alternatively, the `Element.append` method
+can be used for that purpose), whereas keywords arguments are added as
+attributes:
+
+>>> doc(tag.br)
+
+>>> print doc
+

Some text and a link.

+ +If an attribute name collides with a Python keyword, simply append an underscore +to the name: + +>>> doc(class_='intro') + +>>> print doc +

Some text and a link.

+ +As shown above, an `Element` can easily be directly rendered to XML text by +printing it or using the Python ``str()`` function. This is basically a +shortcut for converting the `Element` to a stream and serializing that +stream: + +>>> stream = doc.generate() +>>> stream #doctest: +ELLIPSIS + +>>> print stream +

Some text and a link.

+ + +The `tag` object also allows creating "fragments", which are basically lists +of nodes (elements or text) that don't have a parent element. This can be useful +for creating snippets of markup that are attached to a parent element later (for +example in a template). Fragments are created by calling the `tag` object, which +returns an object of type `Fragment`: + +>>> fragment = tag('Hello, ', tag.em('world'), '!') +>>> fragment + +>>> print fragment +Hello, world! +""" + +try: + set +except NameError: + from sets import Set as set + +from calibre.utils.genshi.core import Attrs, Markup, Namespace, QName, Stream, \ + START, END, TEXT + +__all__ = ['Fragment', 'Element', 'ElementFactory', 'tag'] +__docformat__ = 'restructuredtext en' + + +class Fragment(object): + """Represents a markup fragment, which is basically just a list of element + or text nodes. + """ + __slots__ = ['children'] + + def __init__(self): + """Create a new fragment.""" + self.children = [] + + def __add__(self, other): + return Fragment()(self, other) + + def __call__(self, *args): + """Append any positional arguments as child nodes. + + :see: `append` + """ + map(self.append, args) + return self + + def __iter__(self): + return self._generate() + + def __repr__(self): + return '<%s>' % self.__class__.__name__ + + def __str__(self): + return str(self.generate()) + + def __unicode__(self): + return unicode(self.generate()) + + def __html__(self): + return Markup(self.generate()) + + def append(self, node): + """Append an element or string as child node. + + :param node: the node to append; can be an `Element`, `Fragment`, or a + `Stream`, or a Python string or number + """ + if isinstance(node, (Stream, Element, basestring, int, float, long)): + # For objects of a known/primitive type, we avoid the check for + # whether it is iterable for better performance + self.children.append(node) + elif isinstance(node, Fragment): + self.children.extend(node.children) + elif node is not None: + try: + map(self.append, iter(node)) + except TypeError: + self.children.append(node) + + def _generate(self): + for child in self.children: + if isinstance(child, Fragment): + for event in child._generate(): + yield event + elif isinstance(child, Stream): + for event in child: + yield event + else: + if not isinstance(child, basestring): + child = unicode(child) + yield TEXT, child, (None, -1, -1) + + def generate(self): + """Return a markup event stream for the fragment. + + :rtype: `Stream` + """ + return Stream(self._generate()) + + +def _kwargs_to_attrs(kwargs): + attrs = [] + names = set() + for name, value in kwargs.items(): + name = name.rstrip('_').replace('_', '-') + if value is not None and name not in names: + attrs.append((QName(name), unicode(value))) + names.add(name) + return Attrs(attrs) + + +class Element(Fragment): + """Simple XML output generator based on the builder pattern. + + Construct XML elements by passing the tag name to the constructor: + + >>> print Element('strong') + + + Attributes can be specified using keyword arguments. The values of the + arguments will be converted to strings and any special XML characters + escaped: + + >>> print Element('textarea', rows=10, cols=60) +