This commit is contained in:
Kovid Goyal 2007-11-18 18:58:54 +00:00
parent bc83e1f996
commit 33efccb6ad

View File

@ -108,121 +108,99 @@ class OPFReader(MetaInformation):
self.soup = BeautifulStoneSoup(stream.read())
if manage:
stream.close()
self.title = self.get_title()
self.authors = self.get_authors()
self.title_sort = self.get_title_sort()
self.author_sort = self.get_author_sort()
self.comments = self.get_comments()
self.category = self.get_category()
self.publisher = self.get_publisher()
self.isbn = self.get_isbn()
self.series = self.series_index = self.rating = None
self.manifest = Manifest(self.soup, dir)
self.spine = Spine(self.soup, self.manifest)
self.toc = TOC(self, dir)
self.cover = self.get_cover()
@apply
def title():
doc = '''title'''
def fget(self):
title = self.soup.package.metadata.find('dc:title')
if title:
return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string)
return self.default_title
return property(doc=doc, fget=fget)
def get_title(self):
title = self.soup.package.metadata.find('dc:title')
if title:
return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string)
return self.default_title
def get_authors(self):
creators = self.soup.package.metadata.findAll('dc:creator')
for elem in creators:
role = elem.get('role')
if not role:
role = elem.get('opf:role')
if not role:
role = 'aut'
if role == 'aut':
raw = self.ENTITY_PATTERN.sub(entity_to_unicode, elem.string)
au = raw.split(',')
ans = []
for i in au:
ans.extend(i.split('&'))
return ans
return []
@apply
def authors():
doc = '''authors'''
def fget(self):
creators = self.soup.package.metadata.findAll('dc:creator')
for elem in creators:
role = elem.get('role')
if not role:
role = elem.get('opf:role')
if not role:
role = 'aut'
if role == 'aut':
raw = self.ENTITY_PATTERN.sub(entity_to_unicode, elem.string)
au = raw.split(',')
ans = []
for i in au:
ans.extend(i.split('&'))
return ans
return None
return property(doc=doc, fget=fget)
@apply
def author_sort():
doc = '''author sort'''
def fget(self):
creators = self.soup.package.metadata.findAll('dc:creator')
for elem in creators:
role = elem.get('role')
if not role:
role = elem.get('opf:role')
if role == 'aut':
fa = elem.get('file-as')
return self.ENTITY_PATTERN.sub(entity_to_unicode, fa) if fa else None
return property(doc=doc, fget=fget)
def get_author_sort(self):
creators = self.soup.package.metadata.findAll('dc:creator')
for elem in creators:
role = elem.get('role')
if not role:
role = elem.get('opf:role')
if role == 'aut':
fa = elem.get('file-as')
return self.ENTITY_PATTERN.sub(entity_to_unicode, fa) if fa else None
return None
def get_title_sort(self):
return None
@apply
def title_sort():
doc = 'title sort'
def fget(self):
return None
return property(doc=doc, fget=fget)
@apply
def comments():
doc = 'comments'
def fget(self):
comments = self.soup.find('dc:description')
if comments:
return self.ENTITY_PATTERN.sub(entity_to_unicode, comments.string)
return None
return property(doc=doc, fget=fget)
def get_comments(self):
comments = self.soup.find('dc:description')
if comments:
return self.ENTITY_PATTERN.sub(entity_to_unicode, comments.string)
return None
@apply
def category():
doc = 'category'
def fget(self):
category = self.soup.find('dc:type')
if category:
return self.ENTITY_PATTERN.sub(entity_to_unicode, category.string)
return None
return property(doc=doc, fget=fget)
def get_category(self):
category = self.soup.find('dc:type')
if category:
return self.ENTITY_PATTERN.sub(entity_to_unicode, category.string)
return None
@apply
def publisher():
doc = 'publisher'
def fget(self):
publisher = self.soup.find('dc:publisher')
if publisher:
return self.ENTITY_PATTERN.sub(entity_to_unicode, publisher.string)
return None
return property(doc=doc, fget=fget)
def get_publisher(self):
publisher = self.soup.find('dc:publisher')
if publisher:
return self.ENTITY_PATTERN.sub(entity_to_unicode, publisher.string)
return None
@apply
def isbn():
doc = 'ISBN number'
def fget(self):
for item in self.soup.package.metadata.findAll('dc:identifier'):
scheme = item.get('scheme')
if not scheme:
scheme = item.get('opf:scheme')
if scheme.lower() == 'isbn':
return item.string
return None
return property(doc=doc, fget=fget)
@apply
def cover():
doc = 'cover'
def fget(self):
guide = self.soup.package.find('guide')
if guide:
references = guide.findAll('reference')
for reference in references:
type = reference.get('type')
if not type:
continue
if type.lower() in ['cover', 'other.ms-coverimage-standard']:
return reference.get('href')
return None
return property(doc=doc, fget=fget)
def get_isbn(self):
for item in self.soup.package.metadata.findAll('dc:identifier'):
scheme = item.get('scheme')
if not scheme:
scheme = item.get('opf:scheme')
if scheme.lower() == 'isbn':
return item.string
return None
def get_cover(self):
guide = self.soup.package.find('guide')
if guide:
references = guide.findAll('reference')
for reference in references:
type = reference.get('type')
if not type:
continue
if type.lower() in ['cover', 'other.ms-coverimage-standard']:
return reference.get('href')
return None
def possible_cover_prefixes(self):
isbn, ans = [], []
@ -237,7 +215,7 @@ class OPFReader(MetaInformation):
def main(args=sys.argv):
r = OPFReader(open(args[1], 'rb'))
print OPFReader(open(args[1], 'rb'))
return 0
if __name__ == '__main__':