mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Implemented any2lrf
This commit is contained in:
parent
709dd81a08
commit
9c82e833ac
1
setup.py
1
setup.py
@ -31,6 +31,7 @@ entry_points = {
|
|||||||
'web2disk = libprs500.web.fetch.simple:main',\
|
'web2disk = libprs500.web.fetch.simple:main',\
|
||||||
'web2lrf = libprs500.ebooks.lrf.web.convert_from:main',\
|
'web2lrf = libprs500.ebooks.lrf.web.convert_from:main',\
|
||||||
'pdf2lrf = libprs500.ebooks.lrf.pdf.convert_from:main',\
|
'pdf2lrf = libprs500.ebooks.lrf.pdf.convert_from:main',\
|
||||||
|
'any2lrf = libprs500.ebooks.lrf.any.convert_from:main',\
|
||||||
'libprs500-beta = libprs500.gui2.main:main',\
|
'libprs500-beta = libprs500.gui2.main:main',\
|
||||||
],
|
],
|
||||||
'gui_scripts' : [ APPNAME+' = libprs500.gui.main:main']
|
'gui_scripts' : [ APPNAME+' = libprs500.gui.main:main']
|
||||||
|
@ -64,14 +64,13 @@ def filename_to_utf8(name):
|
|||||||
return name.decode(codec, 'replace').encode('utf8')
|
return name.decode(codec, 'replace').encode('utf8')
|
||||||
|
|
||||||
def extract(path, dir):
|
def extract(path, dir):
|
||||||
import os
|
|
||||||
ext = os.path.splitext(path)[1][1:].lower()
|
ext = os.path.splitext(path)[1][1:].lower()
|
||||||
extractor = None
|
extractor = None
|
||||||
if ext == 'zip':
|
if ext == 'zip':
|
||||||
from libprs500.libunzip import extract
|
from libprs500.libunzip import extract
|
||||||
extractor = extract
|
extractor = extract
|
||||||
elif ext == 'rar':
|
elif ext == 'rar':
|
||||||
from libprs500.libunrar import extract
|
from libprs500.libunrar import extract # In case the dll is not found
|
||||||
extractor = extract
|
extractor = extract
|
||||||
if not extractor:
|
if not extractor:
|
||||||
raise Exception('Unknown archive type')
|
raise Exception('Unknown archive type')
|
||||||
|
@ -131,7 +131,7 @@ class Device(object):
|
|||||||
keys C{title}, C{authors}, C{cover}, C{tags}. The value of the C{cover}
|
keys C{title}, C{authors}, C{cover}, C{tags}. The value of the C{cover}
|
||||||
element can be None or a three element tuple (width, height, data)
|
element can be None or a three element tuple (width, height, data)
|
||||||
where data is the image data in JPEG format as a string. C{tags} must be
|
where data is the image data in JPEG format as a string. C{tags} must be
|
||||||
a possibly empty list of strings.
|
a possibly empty list of strings. C{authors} must be a string.
|
||||||
@param booklists: A tuple containing the result of calls to
|
@param booklists: A tuple containing the result of calls to
|
||||||
(L{books}(oncard=False), L{books}(oncard=True)).
|
(L{books}(oncard=False), L{books}(oncard=True)).
|
||||||
'''
|
'''
|
||||||
|
14
src/libprs500/ebooks/lrf/any/__init__.py
Normal file
14
src/libprs500/ebooks/lrf/any/__init__.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
136
src/libprs500/ebooks/lrf/any/convert_from.py
Normal file
136
src/libprs500/ebooks/lrf/any/convert_from.py
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
'''Convert any ebook file into a LRF file.'''
|
||||||
|
|
||||||
|
import sys, os, logging, shutil, tempfile, glob
|
||||||
|
|
||||||
|
from libprs500.ebooks.lrf import option_parser
|
||||||
|
from libprs500 import __appname__, setup_cli_handlers, extract
|
||||||
|
from libprs500.ebooks.lrf.lit.convert_from import process_file as lit2lrf
|
||||||
|
from libprs500.ebooks.lrf.pdf.convert_from import process_file as pdf2lrf
|
||||||
|
from libprs500.ebooks.lrf.rtf.convert_from import process_file as rtf2lrf
|
||||||
|
from libprs500.ebooks.lrf.txt.convert_from import process_file as txt2lrf
|
||||||
|
from libprs500.ebooks.lrf.html.convert_from import process_file as html2lrf
|
||||||
|
|
||||||
|
def largest_file(files):
|
||||||
|
maxsize, file = 0, None
|
||||||
|
for f in files:
|
||||||
|
size = os.stat(f).st_size
|
||||||
|
if size > maxsize:
|
||||||
|
maxsize = size
|
||||||
|
file = f
|
||||||
|
return file
|
||||||
|
|
||||||
|
def find_htmlfile(dir):
|
||||||
|
for pair in (('*toc*.htm*', '*toc*.xhtm*'), ('*.htm*', '*.xhtm*')):
|
||||||
|
files = glob.glob(os.path.join(dir, pair[0]))
|
||||||
|
files += glob.glob(os.path.join(dir, pair[1]))
|
||||||
|
file = largest_file(files)
|
||||||
|
if file:
|
||||||
|
return file
|
||||||
|
|
||||||
|
|
||||||
|
def handle_archive(path):
|
||||||
|
tdir = tempfile.mkdtemp(prefix=__appname__+'_')
|
||||||
|
extract(path, tdir)
|
||||||
|
files = []
|
||||||
|
cdir = tdir
|
||||||
|
temp = os.listdir(tdir)
|
||||||
|
file = None
|
||||||
|
if len(temp) == 1 and os.path.isdir(os.path.join(tdir, temp[0])):
|
||||||
|
cdir = os.path.join(tdir, temp[0])
|
||||||
|
for ext in ('lit', 'rtf', 'pdf', 'txt'):
|
||||||
|
pat = os.path.join(cdir, '*.'+ext)
|
||||||
|
files.extend(glob.glob(pat))
|
||||||
|
file = largest_file(files)
|
||||||
|
if file:
|
||||||
|
return tdir, file
|
||||||
|
file = find_htmlfile(cdir)
|
||||||
|
return tdir, file
|
||||||
|
|
||||||
|
def process_file(path, options, logger=None):
|
||||||
|
path = os.path.abspath(os.path.expanduser(path))
|
||||||
|
tdir = None
|
||||||
|
if logger is None:
|
||||||
|
level = logging.DEBUG if options.verbose else logging.INFO
|
||||||
|
logger = logging.getLogger('any2lrf')
|
||||||
|
setup_cli_handlers(logger, level)
|
||||||
|
if not os.access(path, os.R_OK):
|
||||||
|
logger.critical('Cannot read from %s', path)
|
||||||
|
return 1
|
||||||
|
ext = os.path.splitext(path)[1]
|
||||||
|
if not ext or ext == '.':
|
||||||
|
logger.critical('Unknown file type: %s', path)
|
||||||
|
return 1
|
||||||
|
ext = ext[1:].lower()
|
||||||
|
cwd = os.getcwd()
|
||||||
|
if not options.output:
|
||||||
|
fmt = '.lrs' if options.lrs else '.lrf'
|
||||||
|
options.output = os.path.splitext(os.path.basename(path))[0] + fmt
|
||||||
|
options.output = os.path.abspath(os.path.expanduser(options.output))
|
||||||
|
if ext in ['zip', 'rar']:
|
||||||
|
newpath = None
|
||||||
|
try:
|
||||||
|
tdir, newpath = handle_archive(path)
|
||||||
|
except:
|
||||||
|
logger.exception(' ')
|
||||||
|
if not newpath:
|
||||||
|
logger.critical('Could not find ebook in archive')
|
||||||
|
return 1
|
||||||
|
path = newpath
|
||||||
|
logger.info('Found ebook in archive: %s', path)
|
||||||
|
try:
|
||||||
|
ext = os.path.splitext(path)[1][1:].lower()
|
||||||
|
convertor = None
|
||||||
|
if 'htm' in ext:
|
||||||
|
convertor = html2lrf
|
||||||
|
elif 'lit' == ext:
|
||||||
|
convertor = lit2lrf
|
||||||
|
elif 'pdf' == ext:
|
||||||
|
convertor = pdf2lrf
|
||||||
|
elif 'rtf' == ext:
|
||||||
|
convertor = rtf2lrf
|
||||||
|
elif 'txt' == ext:
|
||||||
|
convertor = txt2lrf
|
||||||
|
convertor(path, options, logger)
|
||||||
|
finally:
|
||||||
|
os.chdir(cwd)
|
||||||
|
if tdir and os.path.exists(tdir):
|
||||||
|
shutil.rmtree(tdir)
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=sys.argv, logger=None):
|
||||||
|
parser = option_parser('''\
|
||||||
|
any2lrf myfile
|
||||||
|
|
||||||
|
Convert any ebook format into LRF. Supported formats are:
|
||||||
|
LIT, RTF, TXT, HTML and PDF. any2lrf will also process a RAR or
|
||||||
|
ZIP archive.
|
||||||
|
''')
|
||||||
|
options, args = parser.parse_args(args)
|
||||||
|
if len(args) != 2:
|
||||||
|
parser.print_help()
|
||||||
|
print
|
||||||
|
print 'No file to convert specified.'
|
||||||
|
return 1
|
||||||
|
|
||||||
|
process_file(args[1], options, logger)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
@ -20,20 +20,18 @@ Code to convert HTML ebooks into LRF ebooks.
|
|||||||
I am indebted to esperanc for the initial CSS->Xylog Style conversion code
|
I am indebted to esperanc for the initial CSS->Xylog Style conversion code
|
||||||
and to Falstaff for pylrs.
|
and to Falstaff for pylrs.
|
||||||
"""
|
"""
|
||||||
import os, re, sys, shutil, copy, glob, logging
|
import os, re, sys, copy, glob, logging
|
||||||
from htmlentitydefs import name2codepoint
|
from htmlentitydefs import name2codepoint
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
from tempfile import mkdtemp
|
|
||||||
from operator import itemgetter
|
|
||||||
from math import ceil, floor
|
from math import ceil, floor
|
||||||
try:
|
try:
|
||||||
from PIL import Image as PILImage
|
from PIL import Image as PILImage
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import Image as PILImage
|
import Image as PILImage
|
||||||
|
|
||||||
from libprs500.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, \
|
from libprs500.ebooks.BeautifulSoup import BeautifulSoup, Comment, Tag, \
|
||||||
Comment, Tag, NavigableString, Declaration, ProcessingInstruction
|
NavigableString, Declaration, ProcessingInstruction
|
||||||
from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \
|
from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \
|
||||||
TextBlock, ImageBlock, JumpButton, CharButton, \
|
TextBlock, ImageBlock, JumpButton, CharButton, \
|
||||||
Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas, DropCaps, \
|
Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas, DropCaps, \
|
||||||
@ -43,8 +41,9 @@ from libprs500.ebooks.lrf import Book
|
|||||||
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
|
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
|
||||||
from libprs500.ebooks import ConversionError
|
from libprs500.ebooks import ConversionError
|
||||||
from libprs500.ebooks.lrf.html.table import Table
|
from libprs500.ebooks.lrf.html.table import Table
|
||||||
from libprs500 import extract, filename_to_utf8, setup_cli_handlers
|
from libprs500 import filename_to_utf8, setup_cli_handlers, __appname__
|
||||||
from libprs500.ptempfile import PersistentTemporaryFile
|
from libprs500.ptempfile import PersistentTemporaryFile
|
||||||
|
from libprs500.ebooks.metadata.opf import OPFReader
|
||||||
|
|
||||||
class Span(_Span):
|
class Span(_Span):
|
||||||
replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo' ]
|
replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo' ]
|
||||||
@ -643,7 +642,7 @@ class HTMLConverter(object):
|
|||||||
except Exception:
|
except Exception:
|
||||||
self.logger.warning('Unable to process %s', path)
|
self.logger.warning('Unable to process %s', path)
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
self.logger.exception('')
|
self.logger.exception(' ')
|
||||||
continue
|
continue
|
||||||
finally:
|
finally:
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
@ -1291,15 +1290,13 @@ def process_file(path, options, logger=None):
|
|||||||
logger = logging.getLogger('html2lrf')
|
logger = logging.getLogger('html2lrf')
|
||||||
setup_cli_handlers(logger, level)
|
setup_cli_handlers(logger, level)
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
dirpath = None
|
|
||||||
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
|
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
|
||||||
|
dirpath = os.path.dirname(path)
|
||||||
try:
|
try:
|
||||||
dirpath, path = get_path(path)
|
|
||||||
cpath, tpath = '', ''
|
cpath, tpath = '', ''
|
||||||
try_opf(path, options, logger)
|
try_opf(path, options, logger)
|
||||||
if options.cover:
|
if options.cover:
|
||||||
dp = dirpath if dirpath else os.path.dirname(path)
|
cpath = os.path.join(dirpath, os.path.basename(options.cover))
|
||||||
cpath = os.path.join(dp, os.path.basename(options.cover))
|
|
||||||
if not os.path.exists(cpath):
|
if not os.path.exists(cpath):
|
||||||
cpath = os.path.abspath(os.path.expanduser(options.cover))
|
cpath = os.path.abspath(os.path.expanduser(options.cover))
|
||||||
options.cover = cpath
|
options.cover = cpath
|
||||||
@ -1309,7 +1306,7 @@ def process_file(path, options, logger=None):
|
|||||||
cim = im.resize((options.profile.screen_width,
|
cim = im.resize((options.profile.screen_width,
|
||||||
options.profile.screen_height),
|
options.profile.screen_height),
|
||||||
PILImage.BICUBIC).convert('RGB')
|
PILImage.BICUBIC).convert('RGB')
|
||||||
cf = PersistentTemporaryFile(prefix="html2lrf_", suffix=".jpg")
|
cf = PersistentTemporaryFile(prefix=__appname__+"_", suffix=".jpg")
|
||||||
cf.close()
|
cf.close()
|
||||||
cim.save(cf.name)
|
cim.save(cf.name)
|
||||||
cpath = cf.name
|
cpath = cf.name
|
||||||
@ -1376,70 +1373,57 @@ def process_file(path, options, logger=None):
|
|||||||
return oname
|
return oname
|
||||||
finally:
|
finally:
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
if dirpath:
|
|
||||||
shutil.rmtree(dirpath, True)
|
|
||||||
|
|
||||||
def try_opf(path, options, logger):
|
def try_opf(path, options, logger):
|
||||||
try:
|
try:
|
||||||
opf = glob.glob(os.path.join(os.path.dirname(path),'*.opf'))[0]
|
opf = glob.glob(os.path.join(os.path.dirname(path),'*.opf'))[0]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
return
|
return
|
||||||
soup = BeautifulStoneSoup(open(opf).read())
|
opf = OPFReader(open(opf, 'rb'))
|
||||||
try:
|
try:
|
||||||
title = soup.package.metadata.find('dc:title')
|
title = opf.title
|
||||||
if title and not options.title:
|
if title and not options.title:
|
||||||
options.title = title.string
|
options.title = title
|
||||||
creators = soup.package.metadata.findAll('dc:creator')
|
|
||||||
if options.author == 'Unknown':
|
if options.author == 'Unknown':
|
||||||
for author in creators:
|
if opf.authors:
|
||||||
role = author.get('role')
|
options.author = ', '.join(opf.authors)
|
||||||
if not role:
|
if opf.author_sort:
|
||||||
role = author.get('opf:role')
|
options.author_sort = opf.author_sort
|
||||||
if role == 'aut':
|
|
||||||
options.author = author.string
|
|
||||||
fa = author.get('file-as')
|
|
||||||
if fa:
|
|
||||||
options.author_sort = fa
|
|
||||||
if options.publisher == 'Unknown':
|
if options.publisher == 'Unknown':
|
||||||
publisher = soup.package.metadata.find('dc:publisher')
|
publisher = opf.publisher
|
||||||
if publisher:
|
if publisher:
|
||||||
options.publisher = publisher.string
|
options.publisher = publisher
|
||||||
if not options.category.strip():
|
if not options.category:
|
||||||
category = soup.package.metadata.find('dc:type')
|
category = opf.category
|
||||||
if category:
|
if category:
|
||||||
options.category = category.string
|
options.category = category
|
||||||
isbn = []
|
|
||||||
for item in soup.package.metadata.findAll('dc:identifier'):
|
|
||||||
scheme = item.get('scheme')
|
|
||||||
if not scheme:
|
|
||||||
scheme = item.get('opf:scheme')
|
|
||||||
isbn.append((scheme, item.string))
|
|
||||||
if not options.cover:
|
|
||||||
for item in isbn:
|
|
||||||
src = item[1].replace('-', '')
|
|
||||||
matches = glob.glob(os.path.join(os.path.dirname(path), src+'.*'))
|
|
||||||
for match in matches:
|
|
||||||
test = os.path.splitext(match)[1].lower()
|
|
||||||
if test in ['.jpeg', '.jpg', '.gif', '.png']:
|
|
||||||
options.cover = match
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
if not options.cover:
|
if not options.cover:
|
||||||
# Search for cover image in opf as created by convertlit
|
cover = opf.cover
|
||||||
ref = soup.package.find('reference', {'type':'other.ms-coverimage-standard'})
|
if cover:
|
||||||
if ref:
|
cover = os.path.join(os.path.dirname(path), cover)
|
||||||
try:
|
if os.access(cover, os.R_OK):
|
||||||
options.cover = os.path.join(os.path.dirname(path), ref.get('href'))
|
try:
|
||||||
if not os.access(options.cover, os.R_OK):
|
PILImage.open(cover)
|
||||||
options.cover = None
|
options.cover = cover
|
||||||
except:
|
except:
|
||||||
logger.exception('Could not load cover')
|
pass
|
||||||
|
if not options.cover:
|
||||||
|
for prefix in opf.possible_cover_prefixes():
|
||||||
|
if options.cover:
|
||||||
|
break
|
||||||
|
for suffix in ['.jpg', '.jpeg', '.gif', '.png', '.bmp']:
|
||||||
|
cpath = os.path.join(os.path.dirname(path), prefix+suffix)
|
||||||
|
try:
|
||||||
|
PILImage.open(cpath)
|
||||||
|
options.cover = cpath
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
continue
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception('Failed to process opf file')
|
logger.exception('Failed to process opf file')
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
return lrf_option_parser('''Usage: %prog [options] mybook.[html|rar|zip]\n\n'''
|
return lrf_option_parser('''Usage: %prog [options] mybook.html\n\n'''
|
||||||
'''%prog converts mybook.html to mybook.lrf''')
|
'''%prog converts mybook.html to mybook.lrf''')
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
@ -1461,66 +1445,6 @@ def main(args=sys.argv):
|
|||||||
process_file(src, options)
|
process_file(src, options)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def console_query(dirpath, candidate, docs):
|
|
||||||
if len(docs) == 1:
|
|
||||||
return 0
|
|
||||||
try:
|
|
||||||
import readline
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
i = 0
|
|
||||||
for doc in docs:
|
|
||||||
prefix = '>' if i == candidate else ''
|
|
||||||
print prefix+str(i)+'.\t', doc[0]
|
|
||||||
i += 1
|
|
||||||
print
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
choice = raw_input('Choose file to convert (0-'+str(i-1) + \
|
|
||||||
'). Current choice is ['+ str(candidate) + ']:')
|
|
||||||
if not choice:
|
|
||||||
return candidate
|
|
||||||
choice = int(choice)
|
|
||||||
if choice < 0 or choice >= i:
|
|
||||||
continue
|
|
||||||
candidate = choice
|
|
||||||
except EOFError, KeyboardInterrupt:
|
|
||||||
sys.exit()
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
break
|
|
||||||
return candidate
|
|
||||||
|
|
||||||
|
|
||||||
def get_path(path, query=console_query):
|
|
||||||
path = os.path.abspath(os.path.expanduser(path))
|
|
||||||
ext = os.path.splitext(path)[1][1:].lower()
|
|
||||||
if ext in ['htm', 'html', 'xhtml', 'php']:
|
|
||||||
return None, path
|
|
||||||
dirpath = mkdtemp('','html2lrf')
|
|
||||||
extract(path, dirpath)
|
|
||||||
candidate, docs = None, []
|
|
||||||
for root, dirs, files in os.walk(dirpath):
|
|
||||||
for name in files:
|
|
||||||
ext = os.path.splitext(name)[1][1:].lower()
|
|
||||||
if ext not in ['html', 'xhtml', 'htm', 'xhtm']:
|
|
||||||
continue
|
|
||||||
docs.append((name, root, os.stat(os.path.join(root, name)).st_size))
|
|
||||||
if 'toc' in name.lower():
|
|
||||||
candidate = name
|
|
||||||
docs.sort(key=itemgetter(2))
|
|
||||||
if candidate:
|
|
||||||
for i in range(len(docs)):
|
|
||||||
if docs[i][0] == candidate:
|
|
||||||
candidate = i
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
candidate = len(docs) - 1
|
|
||||||
if len(docs) == 0:
|
|
||||||
raise ConversionError('No suitable files found in archive')
|
|
||||||
if len(docs) > 0:
|
|
||||||
candidate = query(dirpath, candidate, docs)
|
|
||||||
return dirpath, os.path.join(docs[candidate][1], docs[candidate][0])
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -12,13 +12,13 @@
|
|||||||
## You should have received a copy of the GNU General Public License along
|
## You should have received a copy of the GNU General Public License along
|
||||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
import os, sys, shutil, glob
|
import os, sys, shutil, glob, logging
|
||||||
from tempfile import mkdtemp
|
from tempfile import mkdtemp
|
||||||
from subprocess import Popen, PIPE
|
from subprocess import Popen, PIPE
|
||||||
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
|
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
|
||||||
from libprs500.ebooks import ConversionError
|
from libprs500.ebooks import ConversionError
|
||||||
from libprs500.ebooks.lrf.html.convert_from import process_file
|
from libprs500.ebooks.lrf.html.convert_from import process_file as html_process_file
|
||||||
from libprs500 import isosx, __appname__
|
from libprs500 import isosx, __appname__, setup_cli_handlers
|
||||||
CLIT = 'clit'
|
CLIT = 'clit'
|
||||||
if isosx and hasattr(sys, 'frameworks_dir'):
|
if isosx and hasattr(sys, 'frameworks_dir'):
|
||||||
CLIT = os.path.join(sys.frameworks_dir, CLIT)
|
CLIT = os.path.join(sys.frameworks_dir, CLIT)
|
||||||
@ -29,29 +29,27 @@ def option_parser():
|
|||||||
'''%prog converts mybook.lit to mybook.lrf'''
|
'''%prog converts mybook.lit to mybook.lrf'''
|
||||||
)
|
)
|
||||||
|
|
||||||
def generate_html(pathtolit):
|
def generate_html(pathtolit, logger):
|
||||||
if not os.access(pathtolit, os.R_OK):
|
if not os.access(pathtolit, os.R_OK):
|
||||||
raise ConversionError, 'Cannot read from ' + pathtolit
|
raise ConversionError, 'Cannot read from ' + pathtolit
|
||||||
tdir = mkdtemp(prefix=__appname__+'_')
|
tdir = mkdtemp(prefix=__appname__+'_')
|
||||||
cmd = ' '.join([CLIT, '"'+pathtolit+'"', tdir])
|
cmd = ' '.join([CLIT, '"'+pathtolit+'"', tdir])
|
||||||
p = Popen(cmd, shell=True, stderr=PIPE)
|
p = Popen(cmd, shell=True, stderr=PIPE, stdout=PIPE)
|
||||||
ret = p.wait()
|
ret = p.wait()
|
||||||
|
logger.info(p.stdout.read())
|
||||||
if ret != 0:
|
if ret != 0:
|
||||||
shutil.rmtree(tdir)
|
shutil.rmtree(tdir)
|
||||||
err = p.stderr.read()
|
err = p.stderr.read()
|
||||||
raise ConversionError, err
|
raise ConversionError, err
|
||||||
return tdir
|
return tdir
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def process_file(path, options, logger=None):
|
||||||
parser = option_parser()
|
if logger is None:
|
||||||
options, args = parser.parse_args(args)
|
level = logging.DEBUG if options.verbose else logging.INFO
|
||||||
if len(args) != 2:
|
logger = logging.getLogger('lit2lrf')
|
||||||
parser.print_help()
|
setup_cli_handlers(logger, level)
|
||||||
print
|
lit = os.path.abspath(os.path.expanduser(path))
|
||||||
print 'No lit file specified'
|
tdir = generate_html(lit, logger)
|
||||||
return 1
|
|
||||||
lit = os.path.abspath(os.path.expanduser(args[1]))
|
|
||||||
tdir = generate_html(lit)
|
|
||||||
try:
|
try:
|
||||||
l = glob.glob(os.path.join(tdir, '*toc*.htm*'))
|
l = glob.glob(os.path.join(tdir, '*toc*.htm*'))
|
||||||
if not l:
|
if not l:
|
||||||
@ -61,7 +59,9 @@ def main(args=sys.argv):
|
|||||||
if not l:
|
if not l:
|
||||||
l = glob.glob(os.path.join(tdir, '*.htm*'))
|
l = glob.glob(os.path.join(tdir, '*.htm*'))
|
||||||
if not l:
|
if not l:
|
||||||
raise ConversionError, 'Conversion of lit to html failed. Cannot find html file.'
|
l = glob.glob(os.path.join(tdir, '*.txt*')) # Some lit file apparently have .txt files in them
|
||||||
|
if not l:
|
||||||
|
raise ConversionError('Conversion of lit to html failed. Cannot find html file.')
|
||||||
maxsize, htmlfile = 0, None
|
maxsize, htmlfile = 0, None
|
||||||
for c in l:
|
for c in l:
|
||||||
sz = os.path.getsize(c)
|
sz = os.path.getsize(c)
|
||||||
@ -71,13 +71,24 @@ def main(args=sys.argv):
|
|||||||
htmlfile = l[0]
|
htmlfile = l[0]
|
||||||
if not options.output:
|
if not options.output:
|
||||||
ext = '.lrs' if options.lrs else '.lrf'
|
ext = '.lrs' if options.lrs else '.lrf'
|
||||||
options.output = os.path.abspath(os.path.basename(os.path.splitext(args[1])[0]) + ext)
|
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
|
||||||
else:
|
options.output = os.path.abspath(os.path.expanduser(options.output))
|
||||||
options.output = os.path.abspath(options.output)
|
html_process_file(htmlfile, options, logger=logger)
|
||||||
process_file(htmlfile, options)
|
|
||||||
finally:
|
finally:
|
||||||
shutil.rmtree(tdir)
|
shutil.rmtree(tdir)
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=sys.argv, logger=None):
|
||||||
|
parser = option_parser()
|
||||||
|
options, args = parser.parse_args(args)
|
||||||
|
if len(args) != 2:
|
||||||
|
parser.print_help()
|
||||||
|
print
|
||||||
|
print 'No lit file specified'
|
||||||
|
return 1
|
||||||
|
process_file(options, args[1], logger)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
@ -254,19 +254,35 @@ def get_metadata(stream):
|
|||||||
L{MetaInformation} object.
|
L{MetaInformation} object.
|
||||||
"""
|
"""
|
||||||
lrf = LRFMetaFile(stream)
|
lrf = LRFMetaFile(stream)
|
||||||
mi = MetaInformation(lrf.title.strip(), lrf.author.strip())
|
au = lrf.author.strip().split(',')
|
||||||
|
authors = []
|
||||||
|
for i in au:
|
||||||
|
authors.extend(i.split('&'))
|
||||||
|
mi = MetaInformation(lrf.title.strip(), authors)
|
||||||
|
mi.author = lrf.author.strip()
|
||||||
mi.comments = lrf.free_text.strip()
|
mi.comments = lrf.free_text.strip()
|
||||||
mi.category = lrf.category.strip()
|
mi.category = lrf.category.strip()+', '+lrf.classification.strip()
|
||||||
mi.classification = lrf.classification.strip()
|
|
||||||
mi.publisher = lrf.publisher.strip()
|
mi.publisher = lrf.publisher.strip()
|
||||||
|
try:
|
||||||
|
mi.title_sort = lrf.title_reading.strip()
|
||||||
|
if not mi.title_sort:
|
||||||
|
mi.title_sort = None
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
mi.author_sort = lrf.author_reading.strip()
|
||||||
|
if not mi.author_sort:
|
||||||
|
mi.author_sort = None
|
||||||
|
except:
|
||||||
|
pass
|
||||||
if not mi.title or 'unknown' in mi.title.lower():
|
if not mi.title or 'unknown' in mi.title.lower():
|
||||||
mi.title = None
|
mi.title = None
|
||||||
|
if not mi.authors:
|
||||||
|
mi.authors = None
|
||||||
if not mi.author or 'unknown' in mi.author.lower():
|
if not mi.author or 'unknown' in mi.author.lower():
|
||||||
mi.author = None
|
mi.author = None
|
||||||
if not mi.category or 'unknown' in mi.category.lower():
|
if not mi.category or 'unknown' in mi.category.lower():
|
||||||
mi.category = None
|
mi.category = None
|
||||||
if not mi.classification or 'unknown' in mi.classification.lower():
|
|
||||||
mi.classification = None
|
|
||||||
if not mi.publisher or 'unknown' in mi.publisher.lower() or \
|
if not mi.publisher or 'unknown' in mi.publisher.lower() or \
|
||||||
'some publisher' in mi.publisher.lower():
|
'some publisher' in mi.publisher.lower():
|
||||||
mi.publisher = None
|
mi.publisher = None
|
||||||
|
@ -15,19 +15,19 @@
|
|||||||
from libprs500 import filename_to_utf8
|
from libprs500 import filename_to_utf8
|
||||||
''''''
|
''''''
|
||||||
|
|
||||||
import sys, os, subprocess
|
import sys, os, subprocess, logging
|
||||||
from libprs500 import isosx
|
from libprs500 import isosx, setup_cli_handlers
|
||||||
from libprs500.ebooks import ConversionError
|
from libprs500.ebooks import ConversionError
|
||||||
from libprs500.ptempfile import PersistentTemporaryFile
|
from libprs500.ptempfile import PersistentTemporaryFile
|
||||||
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
|
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
|
||||||
from libprs500.ebooks.lrf.html.convert_from import process_file
|
from libprs500.ebooks.lrf.html.convert_from import process_file as html_process_file
|
||||||
|
|
||||||
PDFTOHTML = 'pdftohtml'
|
PDFTOHTML = 'pdftohtml'
|
||||||
if isosx and hasattr(sys, 'frameworks_dir'):
|
if isosx and hasattr(sys, 'frameworks_dir'):
|
||||||
PDFTOHTML = os.path.join(sys.frameworks_dir, PDFTOHTML)
|
PDFTOHTML = os.path.join(sys.frameworks_dir, PDFTOHTML)
|
||||||
|
|
||||||
|
|
||||||
def generate_html(pathtopdf):
|
def generate_html(pathtopdf, logger):
|
||||||
'''
|
'''
|
||||||
Convert the pdf into html.
|
Convert the pdf into html.
|
||||||
@return: A closed PersistentTemporaryFile.
|
@return: A closed PersistentTemporaryFile.
|
||||||
@ -41,8 +41,10 @@ def generate_html(pathtopdf):
|
|||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
try:
|
try:
|
||||||
os.chdir(os.path.dirname(pf.name))
|
os.chdir(os.path.dirname(pf.name))
|
||||||
p = subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE)
|
p = subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE)
|
||||||
ret = p.wait()
|
ret = p.wait()
|
||||||
|
logger.info(p.stdout.read())
|
||||||
if ret != 0:
|
if ret != 0:
|
||||||
err = p.stderr.read()
|
err = p.stderr.read()
|
||||||
raise ConversionError, err
|
raise ConversionError, err
|
||||||
@ -56,8 +58,25 @@ def option_parser():
|
|||||||
'''%prog converts mybook.pdf to mybook.lrf\n\n'''
|
'''%prog converts mybook.pdf to mybook.lrf\n\n'''
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def process_file(path, options, logger=None):
|
||||||
|
if logger is None:
|
||||||
|
level = logging.DEBUG if options.verbose else logging.INFO
|
||||||
|
logger = logging.getLogger('pdf2lrf')
|
||||||
|
setup_cli_handlers(logger, level)
|
||||||
|
pdf = os.path.abspath(os.path.expanduser(path))
|
||||||
|
htmlfile = generate_html(pdf, logger)
|
||||||
|
if not options.output:
|
||||||
|
ext = '.lrs' if options.lrs else '.lrf'
|
||||||
|
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
|
||||||
|
else:
|
||||||
|
options.output = os.path.abspath(options.output)
|
||||||
|
options.pdftohtml = True
|
||||||
|
if not options.title:
|
||||||
|
options.title = filename_to_utf8(os.path.splitext(os.path.basename(options.output))[0])
|
||||||
|
html_process_file(htmlfile.name, options, logger)
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
|
def main(args=sys.argv, logger=None):
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
options, args = parser.parse_args(args)
|
options, args = parser.parse_args(args)
|
||||||
if len(args) != 2:
|
if len(args) != 2:
|
||||||
@ -65,17 +84,7 @@ def main(args=sys.argv):
|
|||||||
print
|
print
|
||||||
print 'No pdf file specified'
|
print 'No pdf file specified'
|
||||||
return 1
|
return 1
|
||||||
pdf = os.path.abspath(os.path.expanduser(args[1]))
|
process_file(args[1], options, logger)
|
||||||
htmlfile = generate_html(pdf)
|
|
||||||
if not options.output:
|
|
||||||
ext = '.lrs' if options.lrs else '.lrf'
|
|
||||||
options.output = os.path.abspath(os.path.basename(os.path.splitext(args[1])[0]) + ext)
|
|
||||||
else:
|
|
||||||
options.output = os.path.abspath(options.output)
|
|
||||||
options.pdftohtml = True
|
|
||||||
if not options.title:
|
|
||||||
options.title = filename_to_utf8(os.path.splitext(os.path.basename(options.output))[0])
|
|
||||||
process_file(htmlfile.name, options)
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -12,13 +12,13 @@
|
|||||||
## You should have received a copy of the GNU General Public License along
|
## You should have received a copy of the GNU General Public License along
|
||||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
import os, sys, tempfile, subprocess, shutil
|
import os, sys, tempfile, subprocess, shutil, logging
|
||||||
|
|
||||||
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
|
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
|
||||||
from libprs500.ebooks.metadata.meta import get_metadata
|
from libprs500.ebooks.metadata.meta import get_metadata
|
||||||
from libprs500.ebooks.lrf.html.convert_from import process_file
|
from libprs500.ebooks.lrf.html.convert_from import process_file as html_process_file
|
||||||
from libprs500.ebooks import ConversionError
|
from libprs500.ebooks import ConversionError
|
||||||
from libprs500 import isosx
|
from libprs500 import isosx, setup_cli_handlers
|
||||||
|
|
||||||
UNRTF = 'unrtf'
|
UNRTF = 'unrtf'
|
||||||
if isosx and hasattr(sys, 'frameworks_dir'):
|
if isosx and hasattr(sys, 'frameworks_dir'):
|
||||||
@ -30,50 +30,47 @@ def option_parser():
|
|||||||
'''%prog converts mybook.rtf to mybook.lrf'''
|
'''%prog converts mybook.rtf to mybook.lrf'''
|
||||||
)
|
)
|
||||||
|
|
||||||
def generate_html(rtfpath):
|
def generate_html(rtfpath, logger):
|
||||||
tdir = tempfile.mkdtemp(prefix='rtf2lrf_')
|
tdir = tempfile.mkdtemp(prefix='rtf2lrf_')
|
||||||
cwd = os.path.abspath(os.getcwd())
|
cwd = os.path.abspath(os.getcwd())
|
||||||
os.chdir(tdir)
|
os.chdir(tdir)
|
||||||
try:
|
try:
|
||||||
print 'Converting to HTML...',
|
logger.info('Converting to HTML...')
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
handle, path = tempfile.mkstemp(dir=tdir, suffix='.html')
|
handle, path = tempfile.mkstemp(dir=tdir, suffix='.html')
|
||||||
file = os.fdopen(handle, 'wb')
|
file = os.fdopen(handle, 'wb')
|
||||||
cmd = ' '.join([UNRTF, '"'+rtfpath+'"'])
|
cmd = ' '.join([UNRTF, '"'+rtfpath+'"'])
|
||||||
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
|
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE)
|
||||||
file.write(p.stdout.read())
|
file.write(p.stdout.read())
|
||||||
ret = p.wait()
|
ret = p.wait()
|
||||||
if ret != 0:
|
if ret != 0:
|
||||||
if isosx and ret == -11: #unrtf segfaults on OSX but seems to convert most of the file.
|
if isosx and ret == -11: #unrtf segfaults on OSX but seems to convert most of the file.
|
||||||
file.write('</body>\n</html>')
|
file.write('</body>\n</html>')
|
||||||
else:
|
else:
|
||||||
|
logger.critical(p.stderr.read())
|
||||||
raise ConversionError, 'unrtf failed with error code: %d'%(ret,)
|
raise ConversionError, 'unrtf failed with error code: %d'%(ret,)
|
||||||
print 'done'
|
|
||||||
file.close()
|
file.close()
|
||||||
return path
|
return path
|
||||||
finally:
|
finally:
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def process_file(path, options, logger=None):
|
||||||
parser = option_parser()
|
if logger is None:
|
||||||
options, args = parser.parse_args(args)
|
level = logging.DEBUG if options.verbose else logging.INFO
|
||||||
if len(args) != 2:
|
logger = logging.getLogger('pdf2lrf')
|
||||||
parser.print_help()
|
setup_cli_handlers(logger, level)
|
||||||
print
|
rtf = os.path.abspath(os.path.expanduser(path))
|
||||||
print 'No rtf file specified'
|
|
||||||
return 1
|
|
||||||
rtf = os.path.abspath(os.path.expanduser(args[1]))
|
|
||||||
f = open(rtf, 'rb')
|
f = open(rtf, 'rb')
|
||||||
mi = get_metadata(f, 'rtf')
|
mi = get_metadata(f, 'rtf')
|
||||||
f.close()
|
f.close()
|
||||||
html = generate_html(rtf)
|
html = generate_html(rtf, logger)
|
||||||
tdir = os.path.dirname(html)
|
tdir = os.path.dirname(html)
|
||||||
try:
|
try:
|
||||||
if not options.output:
|
if not options.output:
|
||||||
ext = '.lrs' if options.lrs else '.lrf'
|
ext = '.lrs' if options.lrs else '.lrf'
|
||||||
options.output = os.path.abspath(os.path.basename(os.path.splitext(args[1])[0]) + ext)
|
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
|
||||||
else:
|
options.output = os.path.abspath(os.path.expanduser(options.output))
|
||||||
options.output = os.path.abspath(options.output)
|
|
||||||
if (not options.title or options.title == 'Unknown') and mi.title:
|
if (not options.title or options.title == 'Unknown') and mi.title:
|
||||||
sys.argv.append('-t')
|
sys.argv.append('-t')
|
||||||
sys.argv.append('"'+mi.title+'"')
|
sys.argv.append('"'+mi.title+'"')
|
||||||
@ -86,9 +83,22 @@ def main(args=sys.argv):
|
|||||||
if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
|
if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
|
||||||
sys.argv.append('--comment')
|
sys.argv.append('--comment')
|
||||||
sys.argv.append('"'+mi.comments+'"')
|
sys.argv.append('"'+mi.comments+'"')
|
||||||
process_file(html, options)
|
html_process_file(html, options, logger)
|
||||||
finally:
|
finally:
|
||||||
shutil.rmtree(tdir)
|
shutil.rmtree(tdir)
|
||||||
|
|
||||||
|
def main(args=sys.argv, logger=None):
|
||||||
|
parser = option_parser()
|
||||||
|
options, args = parser.parse_args(args)
|
||||||
|
if len(args) != 2:
|
||||||
|
parser.print_help()
|
||||||
|
print
|
||||||
|
print 'No rtf file specified'
|
||||||
|
return 1
|
||||||
|
process_file(args[1], options, logger)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
@ -15,14 +15,14 @@
|
|||||||
"""
|
"""
|
||||||
Convert .txt files to .lrf
|
Convert .txt files to .lrf
|
||||||
"""
|
"""
|
||||||
import os, sys, codecs
|
import os, sys, codecs, logging
|
||||||
|
|
||||||
from libprs500 import iswindows
|
|
||||||
from libprs500.ptempfile import PersistentTemporaryFile
|
from libprs500.ptempfile import PersistentTemporaryFile
|
||||||
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
|
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
|
||||||
from libprs500.ebooks import ConversionError
|
from libprs500.ebooks import ConversionError
|
||||||
from libprs500.ebooks.lrf.html.convert_from import process_file
|
from libprs500.ebooks.lrf.html.convert_from import process_file as html_process_file
|
||||||
from libprs500.ebooks.markdown import markdown
|
from libprs500.ebooks.markdown import markdown
|
||||||
|
from libprs500 import setup_cli_handlers
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
parser = lrf_option_parser('''Usage: %prog [options] mybook.txt\n\n'''
|
parser = lrf_option_parser('''Usage: %prog [options] mybook.txt\n\n'''
|
||||||
@ -65,7 +65,24 @@ def generate_html(txtfile, encoding):
|
|||||||
codecs.open(p.name, 'wb', enc).write(html)
|
codecs.open(p.name, 'wb', enc).write(html)
|
||||||
return p
|
return p
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def process_file(path, options, logger=None):
|
||||||
|
if logger is None:
|
||||||
|
level = logging.DEBUG if options.verbose else logging.INFO
|
||||||
|
logger = logging.getLogger('txt2lrf')
|
||||||
|
setup_cli_handlers(logger, level)
|
||||||
|
txt = os.path.abspath(os.path.expanduser(path))
|
||||||
|
if not hasattr(options, 'encoding'):
|
||||||
|
options.encoding = None
|
||||||
|
htmlfile = generate_html(txt, options.encoding)
|
||||||
|
options.force_page_break = 'h2'
|
||||||
|
if not options.output:
|
||||||
|
ext = '.lrs' if options.lrs else '.lrf'
|
||||||
|
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
|
||||||
|
options.output = os.path.abspath(os.path.expanduser(options.output))
|
||||||
|
|
||||||
|
html_process_file(htmlfile.name, options, logger)
|
||||||
|
|
||||||
|
def main(args=sys.argv, logger=None):
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
options, args = parser.parse_args(args)
|
options, args = parser.parse_args(args)
|
||||||
if len(args) != 2:
|
if len(args) != 2:
|
||||||
@ -73,16 +90,8 @@ def main(args=sys.argv):
|
|||||||
print
|
print
|
||||||
print 'No txt file specified'
|
print 'No txt file specified'
|
||||||
return 1
|
return 1
|
||||||
txt = os.path.abspath(os.path.expanduser(args[1]))
|
process_file(args[1], options, logger)
|
||||||
htmlfile = generate_html(txt, options.encoding)
|
return 0
|
||||||
options.force_page_break = 'h2'
|
|
||||||
if not options.output:
|
|
||||||
ext = '.lrs' if options.lrs else '.lrf'
|
|
||||||
options.output = os.path.abspath(os.path.basename(os.path.splitext(args[1])[0]) + ext)
|
|
||||||
else:
|
|
||||||
options.output = os.path.abspath(options.output)
|
|
||||||
|
|
||||||
process_file(htmlfile.name, options)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
@ -40,12 +40,20 @@ def get_parser(extension):
|
|||||||
class MetaInformation(object):
|
class MetaInformation(object):
|
||||||
'''Convenient encapsulation of book metadata'''
|
'''Convenient encapsulation of book metadata'''
|
||||||
|
|
||||||
def __init__(self, title, author):
|
def __init__(self, title, authors):
|
||||||
|
'''
|
||||||
|
@param title: title or "Unknonw"
|
||||||
|
@param authors: List of strings or []
|
||||||
|
'''
|
||||||
self.title = title
|
self.title = title
|
||||||
self.author = author
|
self.author = authors # Needed for backward compatibility
|
||||||
|
#: List of strings or []
|
||||||
|
self.authors = authors
|
||||||
|
#: Sort text for author
|
||||||
|
self.author_sort = None
|
||||||
|
self.title_sort = None
|
||||||
self.comments = None
|
self.comments = None
|
||||||
self.category = None
|
self.category = None
|
||||||
self.classification = None
|
|
||||||
self.publisher = None
|
self.publisher = None
|
||||||
self.series = None
|
self.series = None
|
||||||
self.series_index = None
|
self.series_index = None
|
||||||
|
155
src/libprs500/ebooks/metadata/opf.py
Normal file
155
src/libprs500/ebooks/metadata/opf.py
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
'''Read/Write metadata from Open Packaging Format (.opf) files.'''
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from libprs500.ebooks.metadata import MetaInformation
|
||||||
|
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||||
|
|
||||||
|
class OPFReader(MetaInformation):
|
||||||
|
|
||||||
|
def __init__(self, stream):
|
||||||
|
self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown'
|
||||||
|
if hasattr(stream, 'seek'):
|
||||||
|
stream.seek(0)
|
||||||
|
self.soup = BeautifulStoneSoup(stream.read())
|
||||||
|
self.series = self.series_index = self.rating = None
|
||||||
|
|
||||||
|
@apply
|
||||||
|
def title():
|
||||||
|
doc = '''title'''
|
||||||
|
def fget(self):
|
||||||
|
title = self.soup.package.metadata.find('dc:title')
|
||||||
|
if title:
|
||||||
|
return title.string
|
||||||
|
return self.default_title
|
||||||
|
return property(doc=doc, fget=fget)
|
||||||
|
|
||||||
|
@apply
|
||||||
|
def authors():
|
||||||
|
doc = '''authors'''
|
||||||
|
def fget(self):
|
||||||
|
creators = self.soup.package.metadata.findAll('dc:creator')
|
||||||
|
for elem in creators:
|
||||||
|
role = elem.get('role')
|
||||||
|
if not role:
|
||||||
|
role = elem.get('opf:role')
|
||||||
|
if role == 'aut':
|
||||||
|
au = elem.string.split(',')
|
||||||
|
ans = []
|
||||||
|
for i in au:
|
||||||
|
ans.extend(i.split('&'))
|
||||||
|
return ans
|
||||||
|
return None
|
||||||
|
return property(doc=doc, fget=fget)
|
||||||
|
|
||||||
|
@apply
|
||||||
|
def author_sort():
|
||||||
|
doc = '''author sort'''
|
||||||
|
def fget(self):
|
||||||
|
creators = self.soup.package.metadata.findAll('dc:creator')
|
||||||
|
for elem in creators:
|
||||||
|
role = elem.get('role')
|
||||||
|
if not role:
|
||||||
|
role = elem.get('opf:role')
|
||||||
|
if role == 'aut':
|
||||||
|
fa = elem.get('file-as')
|
||||||
|
return fa if fa else None
|
||||||
|
return property(doc=doc, fget=fget)
|
||||||
|
|
||||||
|
@apply
|
||||||
|
def title_sort():
|
||||||
|
doc = 'title sort'
|
||||||
|
def fget(self):
|
||||||
|
return None
|
||||||
|
return property(doc=doc, fget=fget)
|
||||||
|
|
||||||
|
@apply
|
||||||
|
def comments():
|
||||||
|
doc = 'comments'
|
||||||
|
def fget(self):
|
||||||
|
comments = self.soup.find('dc:description')
|
||||||
|
if comments:
|
||||||
|
return comments.string
|
||||||
|
return None
|
||||||
|
return property(doc=doc, fget=fget)
|
||||||
|
|
||||||
|
@apply
|
||||||
|
def category():
|
||||||
|
doc = 'category'
|
||||||
|
def fget(self):
|
||||||
|
category = self.soup.find('dc:type')
|
||||||
|
if category:
|
||||||
|
return category.string
|
||||||
|
return None
|
||||||
|
return property(doc=doc, fget=fget)
|
||||||
|
|
||||||
|
@apply
|
||||||
|
def publisher():
|
||||||
|
doc = 'publisher'
|
||||||
|
def fget(self):
|
||||||
|
publisher = self.soup.find('dc:publisher')
|
||||||
|
if publisher:
|
||||||
|
return publisher.string
|
||||||
|
return None
|
||||||
|
return property(doc=doc, fget=fget)
|
||||||
|
|
||||||
|
@apply
|
||||||
|
def isbn():
|
||||||
|
doc = 'ISBN number'
|
||||||
|
def fget(self):
|
||||||
|
for item in self.soup.package.metadata.findAll('dc:identifier'):
|
||||||
|
scheme = item.get('scheme')
|
||||||
|
if not scheme:
|
||||||
|
scheme = item.get('opf:scheme')
|
||||||
|
if scheme.lower() == 'isbn':
|
||||||
|
return item.string
|
||||||
|
return None
|
||||||
|
return property(doc=doc, fget=fget)
|
||||||
|
|
||||||
|
@apply
|
||||||
|
def cover():
|
||||||
|
doc = 'cover'
|
||||||
|
def fget(self):
|
||||||
|
guide = self.soup.package.find('guide')
|
||||||
|
if guide:
|
||||||
|
references = guide.findAll('reference')
|
||||||
|
for reference in references:
|
||||||
|
type = reference.get('type')
|
||||||
|
if not type:
|
||||||
|
continue
|
||||||
|
if type.lower() in ['cover', 'other.ms-coverimage-standard']:
|
||||||
|
return reference.get('href')
|
||||||
|
return None
|
||||||
|
return property(doc=doc, fget=fget)
|
||||||
|
|
||||||
|
def possible_cover_prefixes(self):
|
||||||
|
isbn, ans = [], []
|
||||||
|
for item in self.soup.package.metadata.findAll('dc:identifier'):
|
||||||
|
scheme = item.get('scheme')
|
||||||
|
if not scheme:
|
||||||
|
scheme = item.get('opf:scheme')
|
||||||
|
isbn.append((scheme, item.string))
|
||||||
|
for item in isbn:
|
||||||
|
ans.append(item[1].replace('-', ''))
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
@ -86,6 +86,11 @@ def get_metadata(stream):
|
|||||||
if category_match:
|
if category_match:
|
||||||
category = category_match.group(1).strip()
|
category = category_match.group(1).strip()
|
||||||
mi = MetaInformation(title, author)
|
mi = MetaInformation(title, author)
|
||||||
|
if author:
|
||||||
|
au = author.split(',')
|
||||||
|
mi.authors = []
|
||||||
|
for i in au:
|
||||||
|
mi.authors.extend(i.split('&'))
|
||||||
mi.comments = comment
|
mi.comments = comment
|
||||||
mi.category = category
|
mi.category = category
|
||||||
return mi
|
return mi
|
||||||
|
@ -68,8 +68,6 @@ class LibraryDatabase(object):
|
|||||||
mi.title = title
|
mi.title = title
|
||||||
if mi.category:
|
if mi.category:
|
||||||
tags.append(mi.category)
|
tags.append(mi.category)
|
||||||
if mi.classification:
|
|
||||||
tags.append(mi.classification)
|
|
||||||
if tags:
|
if tags:
|
||||||
tags = ', '.join(tags)
|
tags = ', '.join(tags)
|
||||||
else:
|
else:
|
||||||
|
@ -23,6 +23,7 @@ class Dialog(QObject):
|
|||||||
self.dialog = QDialog(window)
|
self.dialog = QDialog(window)
|
||||||
self.accept = self.dialog.accept
|
self.accept = self.dialog.accept
|
||||||
self.reject = self.dialog.reject
|
self.reject = self.dialog.reject
|
||||||
|
self._close_event = self.dialog.closeEvent
|
||||||
self.dialog.closeEvent = self.close_event
|
self.dialog.closeEvent = self.close_event
|
||||||
self.window = window
|
self.window = window
|
||||||
self.isVisible = self.dialog.isVisible
|
self.isVisible = self.dialog.isVisible
|
||||||
|
@ -265,7 +265,7 @@ class Main(QObject, Ui_MainWindow):
|
|||||||
formats.append(format)
|
formats.append(format)
|
||||||
metadata.append(mi)
|
metadata.append(mi)
|
||||||
names.append(os.path.basename(book))
|
names.append(os.path.basename(book))
|
||||||
infos.append({'title':mi.title, 'authors':mi.author,
|
infos.append({'title':mi.title, 'authors':', '.join(mi.authors),
|
||||||
'cover':self.default_thumbnail, 'tags':[]})
|
'cover':self.default_thumbnail, 'tags':[]})
|
||||||
|
|
||||||
if not to_device:
|
if not to_device:
|
||||||
|
@ -81,7 +81,9 @@ def setup_completion():
|
|||||||
f.write(opts_and_exts('lit2lrf', htmlop, ['lit']))
|
f.write(opts_and_exts('lit2lrf', htmlop, ['lit']))
|
||||||
f.write(opts_and_exts('rtf2lrf', htmlop, ['rtf']))
|
f.write(opts_and_exts('rtf2lrf', htmlop, ['rtf']))
|
||||||
f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
|
f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
|
||||||
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
|
f.write(opts_and_exts('any2lrf', htmlop,
|
||||||
|
['htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', 'txt', 'lit', 'rtf', 'pdf']))
|
||||||
|
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
|
||||||
f.write('''
|
f.write('''
|
||||||
_prs500_ls()
|
_prs500_ls()
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user