mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Implement #518
This commit is contained in:
parent
3df02e65e1
commit
ac0c54d15c
@ -43,11 +43,12 @@ class MetaInformation(object):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def copy(mi):
|
def copy(mi):
|
||||||
ans = MetaInformation(mi.title, mi.authors)
|
ans = MetaInformation(mi.title, mi.authors)
|
||||||
ans.author_sort = mi.author_sort
|
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
||||||
ans.title_sort = mi.title_sort
|
'publisher', 'series', 'series_index', 'rating',
|
||||||
ans.comments = mi.comments
|
'isbn', 'tags', 'cover_data'):
|
||||||
ans.category = mi.category
|
if hasattr(mi, attr):
|
||||||
ans.publisher = mi.publisher
|
setattr(ans, attr, getattr(mi, attr))
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, title, authors):
|
def __init__(self, title, authors):
|
||||||
'''
|
'''
|
||||||
@ -76,7 +77,33 @@ class MetaInformation(object):
|
|||||||
self.tags = [] if not mi else mi.tags
|
self.tags = [] if not mi else mi.tags
|
||||||
self.cover_data = (None, None) if not mi else mi.cover_data #(extension, data)
|
self.cover_data = (None, None) if not mi else mi.cover_data #(extension, data)
|
||||||
|
|
||||||
|
|
||||||
|
def smart_update(self, mi):
|
||||||
|
'''
|
||||||
|
Merge the information in C{mi} into self. In case of conflicts, the information
|
||||||
|
in C{mi} takes precedence, unless the information in mi is NULL.
|
||||||
|
'''
|
||||||
|
if mi.title and mi.title.lower() != 'unknown':
|
||||||
|
self.title = mi.title
|
||||||
|
|
||||||
|
if mi.authors and mi.authors[0].lower() != 'unknown':
|
||||||
|
self.authors = mi.authors
|
||||||
|
|
||||||
|
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
||||||
|
'publisher', 'series', 'series_index', 'rating',
|
||||||
|
'isbn'):
|
||||||
|
if hasattr(mi, attr):
|
||||||
|
val = getattr(mi, attr)
|
||||||
|
if val is not None:
|
||||||
|
setattr(self, attr, val)
|
||||||
|
|
||||||
|
self.tags += mi.tags
|
||||||
|
self.tags = list(set(self.tags))
|
||||||
|
|
||||||
|
if mi.cover_data[0] is not None:
|
||||||
|
self.cover_data = mi.cover_data
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
ans = u''
|
ans = u''
|
||||||
ans += u'Title : ' + unicode(self.title) + u'\n'
|
ans += u'Title : ' + unicode(self.title) + u'\n'
|
||||||
|
62
src/libprs500/ebooks/metadata/html.py
Normal file
62
src/libprs500/ebooks/metadata/html.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
## Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
'''
|
||||||
|
Try to read metadata from an HTML file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from libprs500.ebooks.metadata import MetaInformation
|
||||||
|
|
||||||
|
def get_metadata(stream):
|
||||||
|
src = stream.read()
|
||||||
|
|
||||||
|
# Title
|
||||||
|
title = None
|
||||||
|
pat = re.compile(r'<!--.*?TITLE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
|
||||||
|
match = pat.search(src)
|
||||||
|
if match:
|
||||||
|
title = match.group(1)
|
||||||
|
|
||||||
|
# Author
|
||||||
|
author = None
|
||||||
|
pat = re.compile(r'<!--.*?AUTHOR=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
|
||||||
|
match = pat.search(src)
|
||||||
|
if match:
|
||||||
|
author = match.group(1).replace(',', ';')
|
||||||
|
|
||||||
|
mi = MetaInformation(title, [author])
|
||||||
|
|
||||||
|
# Publisher
|
||||||
|
pat = re.compile(r'<!--.*?PUBLISHER=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
|
||||||
|
match = pat.search(src)
|
||||||
|
if match:
|
||||||
|
mi.publisher = match.group(1)
|
||||||
|
|
||||||
|
# ISBN
|
||||||
|
pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
|
||||||
|
match = pat.search(src)
|
||||||
|
if match:
|
||||||
|
isbn = match.group(1)
|
||||||
|
mi.isbn = re.sub(r'[^0-9xX]', '', isbn)
|
||||||
|
|
||||||
|
print mi
|
||||||
|
|
||||||
|
return mi
|
||||||
|
|
||||||
|
|
@ -13,11 +13,14 @@
|
|||||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
import os, re
|
||||||
|
|
||||||
from libprs500.ebooks.metadata.rtf import get_metadata as rtf_metadata
|
from libprs500.ebooks.metadata.rtf import get_metadata as rtf_metadata
|
||||||
from libprs500.ebooks.lrf.meta import get_metadata as lrf_metadata
|
from libprs500.ebooks.lrf.meta import get_metadata as lrf_metadata
|
||||||
from libprs500.ebooks.metadata.pdf import get_metadata as pdf_metadata
|
from libprs500.ebooks.metadata.pdf import get_metadata as pdf_metadata
|
||||||
from libprs500.ebooks.metadata.lit import get_metadata as lit_metadata
|
from libprs500.ebooks.metadata.lit import get_metadata as lit_metadata
|
||||||
from libprs500.ebooks.metadata.epub import get_metadata as epub_metadata
|
from libprs500.ebooks.metadata.epub import get_metadata as epub_metadata
|
||||||
|
from libprs500.ebooks.metadata.html import get_metadata as html_metadata
|
||||||
from libprs500.ebooks.metadata.rtf import set_metadata as set_rtf_metadata
|
from libprs500.ebooks.metadata.rtf import set_metadata as set_rtf_metadata
|
||||||
from libprs500.ebooks.lrf.meta import set_metadata as set_lrf_metadata
|
from libprs500.ebooks.lrf.meta import set_metadata as set_lrf_metadata
|
||||||
|
|
||||||
@ -25,17 +28,23 @@ from libprs500.ebooks.metadata import MetaInformation
|
|||||||
|
|
||||||
def get_metadata(stream, stream_type='lrf'):
|
def get_metadata(stream, stream_type='lrf'):
|
||||||
if stream_type: stream_type = stream_type.lower()
|
if stream_type: stream_type = stream_type.lower()
|
||||||
if stream_type == 'rtf':
|
if stream_type in ('html', 'html', 'xhtml', 'xhtm'):
|
||||||
return MetaInformation(rtf_metadata(stream), None)
|
stream_type = 'html'
|
||||||
if stream_type == 'lrf':
|
|
||||||
return MetaInformation(lrf_metadata(stream), None)
|
try:
|
||||||
if stream_type == 'pdf':
|
func = eval(stream_type + '_metadata')
|
||||||
return MetaInformation(pdf_metadata(stream), None)
|
mi = func(stream)
|
||||||
if stream_type == 'lit':
|
except NameError:
|
||||||
return MetaInformation(lit_metadata(stream), None)
|
mi = MetaInformation(None, None)
|
||||||
if stream_type == 'epub':
|
|
||||||
return MetaInformation(epub_metadata(stream), None)
|
name = os.path.basename(stream.name) if hasattr(stream, 'name') else ''
|
||||||
return MetaInformation(None, None)
|
base = metadata_from_filename(name)
|
||||||
|
if not base.title:
|
||||||
|
base.title = name if name else 'Unknown'
|
||||||
|
if not base.authors:
|
||||||
|
base.authors = ['Unknown']
|
||||||
|
base.smart_update(mi)
|
||||||
|
return base
|
||||||
|
|
||||||
def set_metadata(stream, mi, stream_type='lrf'):
|
def set_metadata(stream, mi, stream_type='lrf'):
|
||||||
if stream_type: stream_type = stream_type.lower()
|
if stream_type: stream_type = stream_type.lower()
|
||||||
@ -43,4 +52,30 @@ def set_metadata(stream, mi, stream_type='lrf'):
|
|||||||
set_lrf_metadata(stream, mi)
|
set_lrf_metadata(stream, mi)
|
||||||
elif stream_type == 'rtf':
|
elif stream_type == 'rtf':
|
||||||
set_rtf_metadata(stream, mi)
|
set_rtf_metadata(stream, mi)
|
||||||
|
|
||||||
|
_filename_pat = re.compile(r'(?P<title>.+) - (?P<author>[^_]+)')
|
||||||
|
|
||||||
|
def metadata_from_filename(name):
|
||||||
|
name = os.path.splitext(name)[0]
|
||||||
|
mi = MetaInformation(None, None)
|
||||||
|
match = _filename_pat.search(name)
|
||||||
|
if match:
|
||||||
|
try:
|
||||||
|
mi.title = match.group('title')
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
mi.authors = [match.group('author')]
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
au = match.group('authors')
|
||||||
|
aus = au.split(',')
|
||||||
|
authors = []
|
||||||
|
for a in aus:
|
||||||
|
authors.extend(a.split('&'))
|
||||||
|
mi.authors = authors
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
return mi
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user