mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Bug fixes and auto import of metadata from .opf files
This commit is contained in:
parent
dba36da690
commit
5961fe6fc7
@ -33,7 +33,7 @@ You may have to adjust the GROUP and the location of the rules file to
|
|||||||
suit your distribution.
|
suit your distribution.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__version__ = "0.3.28"
|
__version__ = "0.3.29"
|
||||||
__docformat__ = "epytext"
|
__docformat__ = "epytext"
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
|
@ -14,13 +14,14 @@
|
|||||||
## You should have received a copy of the GNU General Public License along
|
## You should have received a copy of the GNU General Public License along
|
||||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
from libprs500.lrf.html.BeautifulSoup import BeautifulStoneSoup
|
||||||
"""
|
"""
|
||||||
Code to convert HTML ebooks into LRF ebooks.
|
Code to convert HTML ebooks into LRF ebooks.
|
||||||
|
|
||||||
I am indebted to esperanc for the initial CSS->Xylog Style conversion routines
|
I am indebted to esperanc for the initial CSS->Xylog Style conversion routines
|
||||||
and to Falstaff for pylrs.
|
and to Falstaff for pylrs.
|
||||||
"""
|
"""
|
||||||
import os, re, sys, shutil, traceback, copy, codecs
|
import os, re, sys, shutil, traceback, copy, glob
|
||||||
from htmlentitydefs import name2codepoint
|
from htmlentitydefs import name2codepoint
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
@ -32,7 +33,7 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
import Image as PILImage
|
import Image as PILImage
|
||||||
|
|
||||||
from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, \
|
from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Comment, Tag, \
|
||||||
NavigableString, Declaration, ProcessingInstruction
|
NavigableString, Declaration, ProcessingInstruction
|
||||||
from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \
|
from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \
|
||||||
ImageBlock, JumpButton, CharButton, \
|
ImageBlock, JumpButton, CharButton, \
|
||||||
@ -997,9 +998,19 @@ class HTMLConverter(object):
|
|||||||
def process_file(path, options):
|
def process_file(path, options):
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
dirpath = None
|
dirpath = None
|
||||||
|
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
|
||||||
try:
|
try:
|
||||||
dirpath, path = get_path(path)
|
dirpath, path = get_path(path)
|
||||||
cpath, tpath = '', ''
|
cpath, tpath = '', ''
|
||||||
|
isbn = try_opf(path, options)
|
||||||
|
if not options.cover and isbn:
|
||||||
|
for item in isbn:
|
||||||
|
matches = glob.glob(re.sub('-', '', item[1])+'.*')
|
||||||
|
for match in matches:
|
||||||
|
if match.lower().endswith('.jpeg') or match.lower().endswith('.jpg') or \
|
||||||
|
match.lower().endswith('.gif') or match.lower().endswith('.bmp'):
|
||||||
|
options.cover = match
|
||||||
|
break
|
||||||
if options.cover:
|
if options.cover:
|
||||||
options.cover = os.path.abspath(os.path.expanduser(options.cover))
|
options.cover = os.path.abspath(os.path.expanduser(options.cover))
|
||||||
cpath = options.cover
|
cpath = options.cover
|
||||||
@ -1021,6 +1032,10 @@ def process_file(path, options):
|
|||||||
tpath = tf.name
|
tpath = tf.name
|
||||||
else:
|
else:
|
||||||
raise ConversionError, 'Cannot read from: %s', (options.cover,)
|
raise ConversionError, 'Cannot read from: %s', (options.cover,)
|
||||||
|
|
||||||
|
|
||||||
|
if not options.title:
|
||||||
|
options.title = default_title
|
||||||
title = (options.title, options.title_sort)
|
title = (options.title, options.title_sort)
|
||||||
author = (options.author, options.author_sort)
|
author = (options.author, options.author_sort)
|
||||||
args = dict(font_delta=options.font_delta, title=title, \
|
args = dict(font_delta=options.font_delta, title=title, \
|
||||||
@ -1051,7 +1066,7 @@ def process_file(path, options):
|
|||||||
link_exclude=re.compile(le), page_break=pb,
|
link_exclude=re.compile(le), page_break=pb,
|
||||||
hide_broken_links=not options.show_broken_links)
|
hide_broken_links=not options.show_broken_links)
|
||||||
conv.process_links()
|
conv.process_links()
|
||||||
oname = options.output
|
oname = options.output
|
||||||
if not oname:
|
if not oname:
|
||||||
suffix = '.lrs' if options.lrs else '.lrf'
|
suffix = '.lrs' if options.lrs else '.lrf'
|
||||||
name = os.path.splitext(os.path.basename(path))[0] + suffix
|
name = os.path.splitext(os.path.basename(path))[0] + suffix
|
||||||
@ -1064,7 +1079,42 @@ def process_file(path, options):
|
|||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
if dirpath:
|
if dirpath:
|
||||||
shutil.rmtree(dirpath, True)
|
shutil.rmtree(dirpath, True)
|
||||||
|
|
||||||
|
def try_opf(path, options):
|
||||||
|
try:
|
||||||
|
opf = glob.glob('*.opf')[0]
|
||||||
|
except IndexError:
|
||||||
|
return
|
||||||
|
soup = BeautifulStoneSoup(open(opf).read())
|
||||||
|
try:
|
||||||
|
title = soup.package.metadata.find('dc:title')
|
||||||
|
if title and not options.title:
|
||||||
|
options.title = title.string
|
||||||
|
creators = soup.package.metadata.findAll('dc:creator')
|
||||||
|
if options.author == 'Unknown':
|
||||||
|
for author in creators:
|
||||||
|
role = author.get('role')
|
||||||
|
if not role:
|
||||||
|
role = author.get('opf:role')
|
||||||
|
if role == 'aut':
|
||||||
|
options.author = author.string
|
||||||
|
fa = author.get('file-as')
|
||||||
|
if fa:
|
||||||
|
options.author_sort = fa
|
||||||
|
isbn = []
|
||||||
|
for item in soup.package.metadata.findAll('dc:identifier'):
|
||||||
|
scheme = item.get('scheme')
|
||||||
|
if not scheme:
|
||||||
|
scheme = item.get('opf:scheme')
|
||||||
|
isbn.append((scheme, item.string))
|
||||||
|
return isbn
|
||||||
|
except Exception, err:
|
||||||
|
if options.verbose:
|
||||||
|
print >>sys.stderr, 'Failed to process opf file', err
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_options(argv=None, cli=True):
|
def parse_options(argv=None, cli=True):
|
||||||
""" CLI for html -> lrf conversions """
|
""" CLI for html -> lrf conversions """
|
||||||
if not argv:
|
if not argv:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user