mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Implement #518
This commit is contained in:
parent
3df02e65e1
commit
ac0c54d15c
@ -43,11 +43,12 @@ class MetaInformation(object):
|
||||
@staticmethod
|
||||
def copy(mi):
|
||||
ans = MetaInformation(mi.title, mi.authors)
|
||||
ans.author_sort = mi.author_sort
|
||||
ans.title_sort = mi.title_sort
|
||||
ans.comments = mi.comments
|
||||
ans.category = mi.category
|
||||
ans.publisher = mi.publisher
|
||||
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
||||
'publisher', 'series', 'series_index', 'rating',
|
||||
'isbn', 'tags', 'cover_data'):
|
||||
if hasattr(mi, attr):
|
||||
setattr(ans, attr, getattr(mi, attr))
|
||||
|
||||
|
||||
def __init__(self, title, authors):
|
||||
'''
|
||||
@ -77,6 +78,32 @@ class MetaInformation(object):
|
||||
self.cover_data = (None, None) if not mi else mi.cover_data #(extension, data)
|
||||
|
||||
|
||||
def smart_update(self, mi):
|
||||
'''
|
||||
Merge the information in C{mi} into self. In case of conflicts, the information
|
||||
in C{mi} takes precedence, unless the information in mi is NULL.
|
||||
'''
|
||||
if mi.title and mi.title.lower() != 'unknown':
|
||||
self.title = mi.title
|
||||
|
||||
if mi.authors and mi.authors[0].lower() != 'unknown':
|
||||
self.authors = mi.authors
|
||||
|
||||
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
||||
'publisher', 'series', 'series_index', 'rating',
|
||||
'isbn'):
|
||||
if hasattr(mi, attr):
|
||||
val = getattr(mi, attr)
|
||||
if val is not None:
|
||||
setattr(self, attr, val)
|
||||
|
||||
self.tags += mi.tags
|
||||
self.tags = list(set(self.tags))
|
||||
|
||||
if mi.cover_data[0] is not None:
|
||||
self.cover_data = mi.cover_data
|
||||
|
||||
|
||||
def __str__(self):
|
||||
ans = u''
|
||||
ans += u'Title : ' + unicode(self.title) + u'\n'
|
||||
|
62
src/libprs500/ebooks/metadata/html.py
Normal file
62
src/libprs500/ebooks/metadata/html.py
Normal file
@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
## Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
## the Free Software Foundation; either version 2 of the License, or
|
||||
## (at your option) any later version.
|
||||
##
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
## GNU General Public License for more details.
|
||||
##
|
||||
## You should have received a copy of the GNU General Public License along
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
'''
|
||||
Try to read metadata from an HTML file.
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from libprs500.ebooks.metadata import MetaInformation
|
||||
|
||||
def get_metadata(stream):
|
||||
src = stream.read()
|
||||
|
||||
# Title
|
||||
title = None
|
||||
pat = re.compile(r'<!--.*?TITLE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
|
||||
match = pat.search(src)
|
||||
if match:
|
||||
title = match.group(1)
|
||||
|
||||
# Author
|
||||
author = None
|
||||
pat = re.compile(r'<!--.*?AUTHOR=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
|
||||
match = pat.search(src)
|
||||
if match:
|
||||
author = match.group(1).replace(',', ';')
|
||||
|
||||
mi = MetaInformation(title, [author])
|
||||
|
||||
# Publisher
|
||||
pat = re.compile(r'<!--.*?PUBLISHER=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
|
||||
match = pat.search(src)
|
||||
if match:
|
||||
mi.publisher = match.group(1)
|
||||
|
||||
# ISBN
|
||||
pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
|
||||
match = pat.search(src)
|
||||
if match:
|
||||
isbn = match.group(1)
|
||||
mi.isbn = re.sub(r'[^0-9xX]', '', isbn)
|
||||
|
||||
print mi
|
||||
|
||||
return mi
|
||||
|
||||
|
@ -13,11 +13,14 @@
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
import os, re
|
||||
|
||||
from libprs500.ebooks.metadata.rtf import get_metadata as rtf_metadata
|
||||
from libprs500.ebooks.lrf.meta import get_metadata as lrf_metadata
|
||||
from libprs500.ebooks.metadata.pdf import get_metadata as pdf_metadata
|
||||
from libprs500.ebooks.metadata.lit import get_metadata as lit_metadata
|
||||
from libprs500.ebooks.metadata.epub import get_metadata as epub_metadata
|
||||
from libprs500.ebooks.metadata.html import get_metadata as html_metadata
|
||||
from libprs500.ebooks.metadata.rtf import set_metadata as set_rtf_metadata
|
||||
from libprs500.ebooks.lrf.meta import set_metadata as set_lrf_metadata
|
||||
|
||||
@ -25,17 +28,23 @@ from libprs500.ebooks.metadata import MetaInformation
|
||||
|
||||
def get_metadata(stream, stream_type='lrf'):
|
||||
if stream_type: stream_type = stream_type.lower()
|
||||
if stream_type == 'rtf':
|
||||
return MetaInformation(rtf_metadata(stream), None)
|
||||
if stream_type == 'lrf':
|
||||
return MetaInformation(lrf_metadata(stream), None)
|
||||
if stream_type == 'pdf':
|
||||
return MetaInformation(pdf_metadata(stream), None)
|
||||
if stream_type == 'lit':
|
||||
return MetaInformation(lit_metadata(stream), None)
|
||||
if stream_type == 'epub':
|
||||
return MetaInformation(epub_metadata(stream), None)
|
||||
return MetaInformation(None, None)
|
||||
if stream_type in ('html', 'html', 'xhtml', 'xhtm'):
|
||||
stream_type = 'html'
|
||||
|
||||
try:
|
||||
func = eval(stream_type + '_metadata')
|
||||
mi = func(stream)
|
||||
except NameError:
|
||||
mi = MetaInformation(None, None)
|
||||
|
||||
name = os.path.basename(stream.name) if hasattr(stream, 'name') else ''
|
||||
base = metadata_from_filename(name)
|
||||
if not base.title:
|
||||
base.title = name if name else 'Unknown'
|
||||
if not base.authors:
|
||||
base.authors = ['Unknown']
|
||||
base.smart_update(mi)
|
||||
return base
|
||||
|
||||
def set_metadata(stream, mi, stream_type='lrf'):
|
||||
if stream_type: stream_type = stream_type.lower()
|
||||
@ -44,3 +53,29 @@ def set_metadata(stream, mi, stream_type='lrf'):
|
||||
elif stream_type == 'rtf':
|
||||
set_rtf_metadata(stream, mi)
|
||||
|
||||
_filename_pat = re.compile(r'(?P<title>.+) - (?P<author>[^_]+)')
|
||||
|
||||
def metadata_from_filename(name):
|
||||
name = os.path.splitext(name)[0]
|
||||
mi = MetaInformation(None, None)
|
||||
match = _filename_pat.search(name)
|
||||
if match:
|
||||
try:
|
||||
mi.title = match.group('title')
|
||||
except IndexError:
|
||||
pass
|
||||
try:
|
||||
mi.authors = [match.group('author')]
|
||||
except IndexError:
|
||||
pass
|
||||
try:
|
||||
au = match.group('authors')
|
||||
aus = au.split(',')
|
||||
authors = []
|
||||
for a in aus:
|
||||
authors.extend(a.split('&'))
|
||||
mi.authors = authors
|
||||
except IndexError:
|
||||
pass
|
||||
return mi
|
||||
|
Loading…
x
Reference in New Issue
Block a user