mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Implementing parsing of manifest, spine and toc in OPFReader
This commit is contained in:
parent
c1b6d4c136
commit
bea1ea2ef5
@ -14,22 +14,91 @@
|
|||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
'''Read/Write metadata from Open Packaging Format (.opf) files.'''
|
'''Read/Write metadata from Open Packaging Format (.opf) files.'''
|
||||||
|
|
||||||
import sys, re
|
import sys, re, os
|
||||||
|
from urllib import unquote
|
||||||
|
from urlparse import urlparse
|
||||||
|
|
||||||
from libprs500.ebooks.metadata import MetaInformation
|
from libprs500.ebooks.metadata import MetaInformation
|
||||||
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup
|
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup, NavigableString
|
||||||
from libprs500.ebooks.lrf import entity_to_unicode
|
from libprs500.ebooks.lrf import entity_to_unicode
|
||||||
|
|
||||||
|
class ManifestItem(object):
|
||||||
|
def __init__(self, item, cwd):
|
||||||
|
self.id = item['id'] if item.has_key('id') else ''
|
||||||
|
self.href = urlparse(unquote(item['href']))[2] if item.has_key('href') else ''
|
||||||
|
if not os.path.isabs(self.href):
|
||||||
|
self.href = os.path.join(cwd, self.href)
|
||||||
|
self.href = os.path.normpath(self.href)
|
||||||
|
self.media_type = item['media-type'] if item.has_key('media-type') else ''
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href, self.media_type)
|
||||||
|
|
||||||
|
class Manifest(list):
|
||||||
|
|
||||||
|
def __init__(self, soup, dir):
|
||||||
|
manifest = soup.find('manifest')
|
||||||
|
if manifest is not None:
|
||||||
|
for item in manifest.findAll('item'):
|
||||||
|
self.append(ManifestItem(item, dir))
|
||||||
|
|
||||||
|
def item(self, id):
|
||||||
|
for i in self:
|
||||||
|
if i.id == id:
|
||||||
|
return i
|
||||||
|
|
||||||
|
class Spine(list):
|
||||||
|
|
||||||
|
def __init__(self, soup, manifest):
|
||||||
|
self.manifest = manifest
|
||||||
|
spine = soup.find('spine')
|
||||||
|
if spine is not None:
|
||||||
|
for itemref in spine.findAll('itemref'):
|
||||||
|
if itemref.has_key('idref'):
|
||||||
|
self.append(itemref['idref'])
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
for i in self:
|
||||||
|
yield self.manifest.item(i)
|
||||||
|
|
||||||
|
class TOC(list):
|
||||||
|
|
||||||
|
def __init__(self, opfreader, cwd):
|
||||||
|
try:
|
||||||
|
toc = opfreader.soup.find('guide').find('reference', attrs={'type':'toc'})['href']
|
||||||
|
except:
|
||||||
|
for item in opfreader.manifest:
|
||||||
|
if 'toc' in item.href.lower():
|
||||||
|
toc = item.href
|
||||||
|
break
|
||||||
|
toc = urlparse(unquote(toc))[2]
|
||||||
|
if not os.path.isabs(toc):
|
||||||
|
toc = os.path.join(cwd, toc)
|
||||||
|
self.toc = toc
|
||||||
|
soup = BeautifulSoup(open(toc, 'rb').read(), convertEntities=BeautifulSoup.HTML_ENTITIES)
|
||||||
|
for a in soup.findAll('a'):
|
||||||
|
if not a.has_key('href'):
|
||||||
|
continue
|
||||||
|
href = urlparse(unquote(a['href']))[2]
|
||||||
|
if not os.path.isabs(href):
|
||||||
|
href = os.path.join(cwd, href)
|
||||||
|
txt = ''.join([unicode(s).strip() for s in a.findAll(text=True)])
|
||||||
|
self.append((href, txt))
|
||||||
|
|
||||||
|
|
||||||
class OPFReader(MetaInformation):
|
class OPFReader(MetaInformation):
|
||||||
|
|
||||||
ENTITY_PATTERN = re.compile(r'&(\S+);')
|
ENTITY_PATTERN = re.compile(r'&(\S+);')
|
||||||
|
|
||||||
def __init__(self, stream):
|
def __init__(self, stream, dir=os.getcwd()):
|
||||||
self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown'
|
self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown'
|
||||||
if hasattr(stream, 'seek'):
|
if hasattr(stream, 'seek'):
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
self.soup = BeautifulStoneSoup(stream.read())
|
self.soup = BeautifulStoneSoup(stream.read())
|
||||||
self.series = self.series_index = self.rating = None
|
self.series = self.series_index = self.rating = None
|
||||||
|
self.manifest = Manifest(self.soup, dir)
|
||||||
|
self.spine = Spine(self.soup, self.manifest)
|
||||||
|
self.toc = TOC(self, dir)
|
||||||
|
|
||||||
@apply
|
@apply
|
||||||
def title():
|
def title():
|
||||||
@ -153,6 +222,7 @@ class OPFReader(MetaInformation):
|
|||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
|
r = OPFReader(open(args[1], 'rb'))
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user