Implemented any2lrf

This commit is contained in:
Kovid Goyal 2007-08-13 22:13:45 +00:00
parent 709dd81a08
commit 9c82e833ac
18 changed files with 506 additions and 208 deletions

View File

@ -31,6 +31,7 @@ entry_points = {
'web2disk = libprs500.web.fetch.simple:main',\
'web2lrf = libprs500.ebooks.lrf.web.convert_from:main',\
'pdf2lrf = libprs500.ebooks.lrf.pdf.convert_from:main',\
'any2lrf = libprs500.ebooks.lrf.any.convert_from:main',\
'libprs500-beta = libprs500.gui2.main:main',\
],
'gui_scripts' : [ APPNAME+' = libprs500.gui.main:main']

View File

@ -64,14 +64,13 @@ def filename_to_utf8(name):
return name.decode(codec, 'replace').encode('utf8')
def extract(path, dir):
import os
ext = os.path.splitext(path)[1][1:].lower()
extractor = None
if ext == 'zip':
from libprs500.libunzip import extract
extractor = extract
elif ext == 'rar':
from libprs500.libunrar import extract
from libprs500.libunrar import extract # In case the dll is not found
extractor = extract
if not extractor:
raise Exception('Unknown archive type')

View File

@ -131,7 +131,7 @@ class Device(object):
keys C{title}, C{authors}, C{cover}, C{tags}. The value of the C{cover}
element can be None or a three element tuple (width, height, data)
where data is the image data in JPEG format as a string. C{tags} must be
a possibly empty list of strings.
a possibly empty list of strings. C{authors} must be a string.
@param booklists: A tuple containing the result of calls to
(L{books}(oncard=False), L{books}(oncard=True)).
'''

View File

@ -0,0 +1,14 @@
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

View File

@ -0,0 +1,136 @@
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
'''Convert any ebook file into a LRF file.'''
import sys, os, logging, shutil, tempfile, glob
from libprs500.ebooks.lrf import option_parser
from libprs500 import __appname__, setup_cli_handlers, extract
from libprs500.ebooks.lrf.lit.convert_from import process_file as lit2lrf
from libprs500.ebooks.lrf.pdf.convert_from import process_file as pdf2lrf
from libprs500.ebooks.lrf.rtf.convert_from import process_file as rtf2lrf
from libprs500.ebooks.lrf.txt.convert_from import process_file as txt2lrf
from libprs500.ebooks.lrf.html.convert_from import process_file as html2lrf
def largest_file(files):
maxsize, file = 0, None
for f in files:
size = os.stat(f).st_size
if size > maxsize:
maxsize = size
file = f
return file
def find_htmlfile(dir):
for pair in (('*toc*.htm*', '*toc*.xhtm*'), ('*.htm*', '*.xhtm*')):
files = glob.glob(os.path.join(dir, pair[0]))
files += glob.glob(os.path.join(dir, pair[1]))
file = largest_file(files)
if file:
return file
def handle_archive(path):
tdir = tempfile.mkdtemp(prefix=__appname__+'_')
extract(path, tdir)
files = []
cdir = tdir
temp = os.listdir(tdir)
file = None
if len(temp) == 1 and os.path.isdir(os.path.join(tdir, temp[0])):
cdir = os.path.join(tdir, temp[0])
for ext in ('lit', 'rtf', 'pdf', 'txt'):
pat = os.path.join(cdir, '*.'+ext)
files.extend(glob.glob(pat))
file = largest_file(files)
if file:
return tdir, file
file = find_htmlfile(cdir)
return tdir, file
def process_file(path, options, logger=None):
path = os.path.abspath(os.path.expanduser(path))
tdir = None
if logger is None:
level = logging.DEBUG if options.verbose else logging.INFO
logger = logging.getLogger('any2lrf')
setup_cli_handlers(logger, level)
if not os.access(path, os.R_OK):
logger.critical('Cannot read from %s', path)
return 1
ext = os.path.splitext(path)[1]
if not ext or ext == '.':
logger.critical('Unknown file type: %s', path)
return 1
ext = ext[1:].lower()
cwd = os.getcwd()
if not options.output:
fmt = '.lrs' if options.lrs else '.lrf'
options.output = os.path.splitext(os.path.basename(path))[0] + fmt
options.output = os.path.abspath(os.path.expanduser(options.output))
if ext in ['zip', 'rar']:
newpath = None
try:
tdir, newpath = handle_archive(path)
except:
logger.exception(' ')
if not newpath:
logger.critical('Could not find ebook in archive')
return 1
path = newpath
logger.info('Found ebook in archive: %s', path)
try:
ext = os.path.splitext(path)[1][1:].lower()
convertor = None
if 'htm' in ext:
convertor = html2lrf
elif 'lit' == ext:
convertor = lit2lrf
elif 'pdf' == ext:
convertor = pdf2lrf
elif 'rtf' == ext:
convertor = rtf2lrf
elif 'txt' == ext:
convertor = txt2lrf
convertor(path, options, logger)
finally:
os.chdir(cwd)
if tdir and os.path.exists(tdir):
shutil.rmtree(tdir)
def main(args=sys.argv, logger=None):
parser = option_parser('''\
any2lrf myfile
Convert any ebook format into LRF. Supported formats are:
LIT, RTF, TXT, HTML and PDF. any2lrf will also process a RAR or
ZIP archive.
''')
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No file to convert specified.'
return 1
process_file(args[1], options, logger)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -20,20 +20,18 @@ Code to convert HTML ebooks into LRF ebooks.
I am indebted to esperanc for the initial CSS->Xylog Style conversion code
and to Falstaff for pylrs.
"""
import os, re, sys, shutil, copy, glob, logging
import os, re, sys, copy, glob, logging
from htmlentitydefs import name2codepoint
from urllib import unquote
from urlparse import urlparse
from tempfile import mkdtemp
from operator import itemgetter
from math import ceil, floor
try:
from PIL import Image as PILImage
except ImportError:
import Image as PILImage
from libprs500.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, \
Comment, Tag, NavigableString, Declaration, ProcessingInstruction
from libprs500.ebooks.BeautifulSoup import BeautifulSoup, Comment, Tag, \
NavigableString, Declaration, ProcessingInstruction
from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \
TextBlock, ImageBlock, JumpButton, CharButton, \
Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas, DropCaps, \
@ -43,8 +41,9 @@ from libprs500.ebooks.lrf import Book
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
from libprs500.ebooks import ConversionError
from libprs500.ebooks.lrf.html.table import Table
from libprs500 import extract, filename_to_utf8, setup_cli_handlers
from libprs500 import filename_to_utf8, setup_cli_handlers, __appname__
from libprs500.ptempfile import PersistentTemporaryFile
from libprs500.ebooks.metadata.opf import OPFReader
class Span(_Span):
replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo' ]
@ -643,7 +642,7 @@ class HTMLConverter(object):
except Exception:
self.logger.warning('Unable to process %s', path)
if self.verbose:
self.logger.exception('')
self.logger.exception(' ')
continue
finally:
os.chdir(cwd)
@ -1291,15 +1290,13 @@ def process_file(path, options, logger=None):
logger = logging.getLogger('html2lrf')
setup_cli_handlers(logger, level)
cwd = os.getcwd()
dirpath = None
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
dirpath = os.path.dirname(path)
try:
dirpath, path = get_path(path)
cpath, tpath = '', ''
try_opf(path, options, logger)
if options.cover:
dp = dirpath if dirpath else os.path.dirname(path)
cpath = os.path.join(dp, os.path.basename(options.cover))
if options.cover:
cpath = os.path.join(dirpath, os.path.basename(options.cover))
if not os.path.exists(cpath):
cpath = os.path.abspath(os.path.expanduser(options.cover))
options.cover = cpath
@ -1309,7 +1306,7 @@ def process_file(path, options, logger=None):
cim = im.resize((options.profile.screen_width,
options.profile.screen_height),
PILImage.BICUBIC).convert('RGB')
cf = PersistentTemporaryFile(prefix="html2lrf_", suffix=".jpg")
cf = PersistentTemporaryFile(prefix=__appname__+"_", suffix=".jpg")
cf.close()
cim.save(cf.name)
cpath = cf.name
@ -1376,70 +1373,57 @@ def process_file(path, options, logger=None):
return oname
finally:
os.chdir(cwd)
if dirpath:
shutil.rmtree(dirpath, True)
def try_opf(path, options, logger):
try:
opf = glob.glob(os.path.join(os.path.dirname(path),'*.opf'))[0]
except IndexError:
return
soup = BeautifulStoneSoup(open(opf).read())
opf = OPFReader(open(opf, 'rb'))
try:
title = soup.package.metadata.find('dc:title')
title = opf.title
if title and not options.title:
options.title = title.string
creators = soup.package.metadata.findAll('dc:creator')
options.title = title
if options.author == 'Unknown':
for author in creators:
role = author.get('role')
if not role:
role = author.get('opf:role')
if role == 'aut':
options.author = author.string
fa = author.get('file-as')
if fa:
options.author_sort = fa
if opf.authors:
options.author = ', '.join(opf.authors)
if opf.author_sort:
options.author_sort = opf.author_sort
if options.publisher == 'Unknown':
publisher = soup.package.metadata.find('dc:publisher')
publisher = opf.publisher
if publisher:
options.publisher = publisher.string
if not options.category.strip():
category = soup.package.metadata.find('dc:type')
options.publisher = publisher
if not options.category:
category = opf.category
if category:
options.category = category.string
isbn = []
for item in soup.package.metadata.findAll('dc:identifier'):
scheme = item.get('scheme')
if not scheme:
scheme = item.get('opf:scheme')
isbn.append((scheme, item.string))
if not options.cover:
for item in isbn:
src = item[1].replace('-', '')
matches = glob.glob(os.path.join(os.path.dirname(path), src+'.*'))
for match in matches:
test = os.path.splitext(match)[1].lower()
if test in ['.jpeg', '.jpg', '.gif', '.png']:
options.cover = match
break
options.category = category
if not options.cover:
# Search for cover image in opf as created by convertlit
ref = soup.package.find('reference', {'type':'other.ms-coverimage-standard'})
if ref:
try:
options.cover = os.path.join(os.path.dirname(path), ref.get('href'))
if not os.access(options.cover, os.R_OK):
options.cover = None
except:
logger.exception('Could not load cover')
cover = opf.cover
if cover:
cover = os.path.join(os.path.dirname(path), cover)
if os.access(cover, os.R_OK):
try:
PILImage.open(cover)
options.cover = cover
except:
pass
if not options.cover:
for prefix in opf.possible_cover_prefixes():
if options.cover:
break
for suffix in ['.jpg', '.jpeg', '.gif', '.png', '.bmp']:
cpath = os.path.join(os.path.dirname(path), prefix+suffix)
try:
PILImage.open(cpath)
options.cover = cpath
break
except:
continue
except Exception:
logger.exception('Failed to process opf file')
def option_parser():
return lrf_option_parser('''Usage: %prog [options] mybook.[html|rar|zip]\n\n'''
return lrf_option_parser('''Usage: %prog [options] mybook.html\n\n'''
'''%prog converts mybook.html to mybook.lrf''')
def main(args=sys.argv):
@ -1461,66 +1445,6 @@ def main(args=sys.argv):
process_file(src, options)
return 0
def console_query(dirpath, candidate, docs):
if len(docs) == 1:
return 0
try:
import readline
except ImportError:
pass
i = 0
for doc in docs:
prefix = '>' if i == candidate else ''
print prefix+str(i)+'.\t', doc[0]
i += 1
print
while True:
try:
choice = raw_input('Choose file to convert (0-'+str(i-1) + \
'). Current choice is ['+ str(candidate) + ']:')
if not choice:
return candidate
choice = int(choice)
if choice < 0 or choice >= i:
continue
candidate = choice
except EOFError, KeyboardInterrupt:
sys.exit()
except:
continue
break
return candidate
def get_path(path, query=console_query):
path = os.path.abspath(os.path.expanduser(path))
ext = os.path.splitext(path)[1][1:].lower()
if ext in ['htm', 'html', 'xhtml', 'php']:
return None, path
dirpath = mkdtemp('','html2lrf')
extract(path, dirpath)
candidate, docs = None, []
for root, dirs, files in os.walk(dirpath):
for name in files:
ext = os.path.splitext(name)[1][1:].lower()
if ext not in ['html', 'xhtml', 'htm', 'xhtm']:
continue
docs.append((name, root, os.stat(os.path.join(root, name)).st_size))
if 'toc' in name.lower():
candidate = name
docs.sort(key=itemgetter(2))
if candidate:
for i in range(len(docs)):
if docs[i][0] == candidate:
candidate = i
break
else:
candidate = len(docs) - 1
if len(docs) == 0:
raise ConversionError('No suitable files found in archive')
if len(docs) > 0:
candidate = query(dirpath, candidate, docs)
return dirpath, os.path.join(docs[candidate][1], docs[candidate][0])
if __name__ == '__main__':

View File

@ -12,13 +12,13 @@
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import os, sys, shutil, glob
import os, sys, shutil, glob, logging
from tempfile import mkdtemp
from subprocess import Popen, PIPE
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
from libprs500.ebooks import ConversionError
from libprs500.ebooks.lrf.html.convert_from import process_file
from libprs500 import isosx, __appname__
from libprs500.ebooks.lrf.html.convert_from import process_file as html_process_file
from libprs500 import isosx, __appname__, setup_cli_handlers
CLIT = 'clit'
if isosx and hasattr(sys, 'frameworks_dir'):
CLIT = os.path.join(sys.frameworks_dir, CLIT)
@ -29,29 +29,27 @@ def option_parser():
'''%prog converts mybook.lit to mybook.lrf'''
)
def generate_html(pathtolit):
def generate_html(pathtolit, logger):
if not os.access(pathtolit, os.R_OK):
raise ConversionError, 'Cannot read from ' + pathtolit
tdir = mkdtemp(prefix=__appname__+'_')
cmd = ' '.join([CLIT, '"'+pathtolit+'"', tdir])
p = Popen(cmd, shell=True, stderr=PIPE)
p = Popen(cmd, shell=True, stderr=PIPE, stdout=PIPE)
ret = p.wait()
logger.info(p.stdout.read())
if ret != 0:
shutil.rmtree(tdir)
err = p.stderr.read()
raise ConversionError, err
return tdir
def main(args=sys.argv):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No lit file specified'
return 1
lit = os.path.abspath(os.path.expanduser(args[1]))
tdir = generate_html(lit)
def process_file(path, options, logger=None):
if logger is None:
level = logging.DEBUG if options.verbose else logging.INFO
logger = logging.getLogger('lit2lrf')
setup_cli_handlers(logger, level)
lit = os.path.abspath(os.path.expanduser(path))
tdir = generate_html(lit, logger)
try:
l = glob.glob(os.path.join(tdir, '*toc*.htm*'))
if not l:
@ -61,7 +59,9 @@ def main(args=sys.argv):
if not l:
l = glob.glob(os.path.join(tdir, '*.htm*'))
if not l:
raise ConversionError, 'Conversion of lit to html failed. Cannot find html file.'
l = glob.glob(os.path.join(tdir, '*.txt*')) # Some lit file apparently have .txt files in them
if not l:
raise ConversionError('Conversion of lit to html failed. Cannot find html file.')
maxsize, htmlfile = 0, None
for c in l:
sz = os.path.getsize(c)
@ -71,13 +71,24 @@ def main(args=sys.argv):
htmlfile = l[0]
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(args[1])[0]) + ext)
else:
options.output = os.path.abspath(options.output)
process_file(htmlfile, options)
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
options.output = os.path.abspath(os.path.expanduser(options.output))
html_process_file(htmlfile, options, logger=logger)
finally:
shutil.rmtree(tdir)
def main(args=sys.argv, logger=None):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No lit file specified'
return 1
process_file(options, args[1], logger)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -254,19 +254,35 @@ def get_metadata(stream):
L{MetaInformation} object.
"""
lrf = LRFMetaFile(stream)
mi = MetaInformation(lrf.title.strip(), lrf.author.strip())
au = lrf.author.strip().split(',')
authors = []
for i in au:
authors.extend(i.split('&'))
mi = MetaInformation(lrf.title.strip(), authors)
mi.author = lrf.author.strip()
mi.comments = lrf.free_text.strip()
mi.category = lrf.category.strip()
mi.classification = lrf.classification.strip()
mi.category = lrf.category.strip()+', '+lrf.classification.strip()
mi.publisher = lrf.publisher.strip()
try:
mi.title_sort = lrf.title_reading.strip()
if not mi.title_sort:
mi.title_sort = None
except:
pass
try:
mi.author_sort = lrf.author_reading.strip()
if not mi.author_sort:
mi.author_sort = None
except:
pass
if not mi.title or 'unknown' in mi.title.lower():
mi.title = None
if not mi.authors:
mi.authors = None
if not mi.author or 'unknown' in mi.author.lower():
mi.author = None
if not mi.category or 'unknown' in mi.category.lower():
mi.category = None
if not mi.classification or 'unknown' in mi.classification.lower():
mi.classification = None
if not mi.publisher or 'unknown' in mi.publisher.lower() or \
'some publisher' in mi.publisher.lower():
mi.publisher = None

View File

@ -15,19 +15,19 @@
from libprs500 import filename_to_utf8
''''''
import sys, os, subprocess
from libprs500 import isosx
import sys, os, subprocess, logging
from libprs500 import isosx, setup_cli_handlers
from libprs500.ebooks import ConversionError
from libprs500.ptempfile import PersistentTemporaryFile
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
from libprs500.ebooks.lrf.html.convert_from import process_file
from libprs500.ebooks.lrf.html.convert_from import process_file as html_process_file
PDFTOHTML = 'pdftohtml'
if isosx and hasattr(sys, 'frameworks_dir'):
PDFTOHTML = os.path.join(sys.frameworks_dir, PDFTOHTML)
def generate_html(pathtopdf):
def generate_html(pathtopdf, logger):
'''
Convert the pdf into html.
@return: A closed PersistentTemporaryFile.
@ -41,8 +41,10 @@ def generate_html(pathtopdf):
cwd = os.getcwd()
try:
os.chdir(os.path.dirname(pf.name))
p = subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE)
p = subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
ret = p.wait()
logger.info(p.stdout.read())
if ret != 0:
err = p.stderr.read()
raise ConversionError, err
@ -56,8 +58,25 @@ def option_parser():
'''%prog converts mybook.pdf to mybook.lrf\n\n'''
)
def process_file(path, options, logger=None):
if logger is None:
level = logging.DEBUG if options.verbose else logging.INFO
logger = logging.getLogger('pdf2lrf')
setup_cli_handlers(logger, level)
pdf = os.path.abspath(os.path.expanduser(path))
htmlfile = generate_html(pdf, logger)
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
else:
options.output = os.path.abspath(options.output)
options.pdftohtml = True
if not options.title:
options.title = filename_to_utf8(os.path.splitext(os.path.basename(options.output))[0])
html_process_file(htmlfile.name, options, logger)
def main(args=sys.argv):
def main(args=sys.argv, logger=None):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
@ -65,17 +84,7 @@ def main(args=sys.argv):
print
print 'No pdf file specified'
return 1
pdf = os.path.abspath(os.path.expanduser(args[1]))
htmlfile = generate_html(pdf)
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(args[1])[0]) + ext)
else:
options.output = os.path.abspath(options.output)
options.pdftohtml = True
if not options.title:
options.title = filename_to_utf8(os.path.splitext(os.path.basename(options.output))[0])
process_file(htmlfile.name, options)
process_file(args[1], options, logger)
return 0
if __name__ == '__main__':

View File

@ -12,13 +12,13 @@
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import os, sys, tempfile, subprocess, shutil
import os, sys, tempfile, subprocess, shutil, logging
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
from libprs500.ebooks.metadata.meta import get_metadata
from libprs500.ebooks.lrf.html.convert_from import process_file
from libprs500.ebooks.lrf.html.convert_from import process_file as html_process_file
from libprs500.ebooks import ConversionError
from libprs500 import isosx
from libprs500 import isosx, setup_cli_handlers
UNRTF = 'unrtf'
if isosx and hasattr(sys, 'frameworks_dir'):
@ -30,50 +30,47 @@ def option_parser():
'''%prog converts mybook.rtf to mybook.lrf'''
)
def generate_html(rtfpath):
def generate_html(rtfpath, logger):
tdir = tempfile.mkdtemp(prefix='rtf2lrf_')
cwd = os.path.abspath(os.getcwd())
os.chdir(tdir)
try:
print 'Converting to HTML...',
logger.info('Converting to HTML...')
sys.stdout.flush()
handle, path = tempfile.mkstemp(dir=tdir, suffix='.html')
file = os.fdopen(handle, 'wb')
cmd = ' '.join([UNRTF, '"'+rtfpath+'"'])
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
file.write(p.stdout.read())
ret = p.wait()
if ret != 0:
if isosx and ret == -11: #unrtf segfaults on OSX but seems to convert most of the file.
file.write('</body>\n</html>')
else:
logger.critical(p.stderr.read())
raise ConversionError, 'unrtf failed with error code: %d'%(ret,)
print 'done'
file.close()
return path
finally:
os.chdir(cwd)
def main(args=sys.argv):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No rtf file specified'
return 1
rtf = os.path.abspath(os.path.expanduser(args[1]))
def process_file(path, options, logger=None):
if logger is None:
level = logging.DEBUG if options.verbose else logging.INFO
logger = logging.getLogger('pdf2lrf')
setup_cli_handlers(logger, level)
rtf = os.path.abspath(os.path.expanduser(path))
f = open(rtf, 'rb')
mi = get_metadata(f, 'rtf')
f.close()
html = generate_html(rtf)
html = generate_html(rtf, logger)
tdir = os.path.dirname(html)
try:
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(args[1])[0]) + ext)
else:
options.output = os.path.abspath(options.output)
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
options.output = os.path.abspath(os.path.expanduser(options.output))
if (not options.title or options.title == 'Unknown') and mi.title:
sys.argv.append('-t')
sys.argv.append('"'+mi.title+'"')
@ -86,9 +83,22 @@ def main(args=sys.argv):
if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
sys.argv.append('--comment')
sys.argv.append('"'+mi.comments+'"')
process_file(html, options)
html_process_file(html, options, logger)
finally:
shutil.rmtree(tdir)
def main(args=sys.argv, logger=None):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No rtf file specified'
return 1
process_file(args[1], options, logger)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -15,14 +15,14 @@
"""
Convert .txt files to .lrf
"""
import os, sys, codecs
import os, sys, codecs, logging
from libprs500 import iswindows
from libprs500.ptempfile import PersistentTemporaryFile
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
from libprs500.ebooks import ConversionError
from libprs500.ebooks.lrf.html.convert_from import process_file
from libprs500.ebooks.lrf.html.convert_from import process_file as html_process_file
from libprs500.ebooks.markdown import markdown
from libprs500 import setup_cli_handlers
def option_parser():
parser = lrf_option_parser('''Usage: %prog [options] mybook.txt\n\n'''
@ -65,7 +65,24 @@ def generate_html(txtfile, encoding):
codecs.open(p.name, 'wb', enc).write(html)
return p
def main(args=sys.argv):
def process_file(path, options, logger=None):
if logger is None:
level = logging.DEBUG if options.verbose else logging.INFO
logger = logging.getLogger('txt2lrf')
setup_cli_handlers(logger, level)
txt = os.path.abspath(os.path.expanduser(path))
if not hasattr(options, 'encoding'):
options.encoding = None
htmlfile = generate_html(txt, options.encoding)
options.force_page_break = 'h2'
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
options.output = os.path.abspath(os.path.expanduser(options.output))
html_process_file(htmlfile.name, options, logger)
def main(args=sys.argv, logger=None):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
@ -73,16 +90,8 @@ def main(args=sys.argv):
print
print 'No txt file specified'
return 1
txt = os.path.abspath(os.path.expanduser(args[1]))
htmlfile = generate_html(txt, options.encoding)
options.force_page_break = 'h2'
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(args[1])[0]) + ext)
else:
options.output = os.path.abspath(options.output)
process_file(htmlfile.name, options)
process_file(args[1], options, logger)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -40,12 +40,20 @@ def get_parser(extension):
class MetaInformation(object):
'''Convenient encapsulation of book metadata'''
def __init__(self, title, author):
def __init__(self, title, authors):
'''
@param title: title or "Unknonw"
@param authors: List of strings or []
'''
self.title = title
self.author = author
self.author = authors # Needed for backward compatibility
#: List of strings or []
self.authors = authors
#: Sort text for author
self.author_sort = None
self.title_sort = None
self.comments = None
self.category = None
self.classification = None
self.publisher = None
self.series = None
self.series_index = None

View File

@ -0,0 +1,155 @@
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
'''Read/Write metadata from Open Packaging Format (.opf) files.'''
import sys
from libprs500.ebooks.metadata import MetaInformation
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup
class OPFReader(MetaInformation):
def __init__(self, stream):
self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown'
if hasattr(stream, 'seek'):
stream.seek(0)
self.soup = BeautifulStoneSoup(stream.read())
self.series = self.series_index = self.rating = None
@apply
def title():
doc = '''title'''
def fget(self):
title = self.soup.package.metadata.find('dc:title')
if title:
return title.string
return self.default_title
return property(doc=doc, fget=fget)
@apply
def authors():
doc = '''authors'''
def fget(self):
creators = self.soup.package.metadata.findAll('dc:creator')
for elem in creators:
role = elem.get('role')
if not role:
role = elem.get('opf:role')
if role == 'aut':
au = elem.string.split(',')
ans = []
for i in au:
ans.extend(i.split('&'))
return ans
return None
return property(doc=doc, fget=fget)
@apply
def author_sort():
doc = '''author sort'''
def fget(self):
creators = self.soup.package.metadata.findAll('dc:creator')
for elem in creators:
role = elem.get('role')
if not role:
role = elem.get('opf:role')
if role == 'aut':
fa = elem.get('file-as')
return fa if fa else None
return property(doc=doc, fget=fget)
@apply
def title_sort():
doc = 'title sort'
def fget(self):
return None
return property(doc=doc, fget=fget)
@apply
def comments():
doc = 'comments'
def fget(self):
comments = self.soup.find('dc:description')
if comments:
return comments.string
return None
return property(doc=doc, fget=fget)
@apply
def category():
doc = 'category'
def fget(self):
category = self.soup.find('dc:type')
if category:
return category.string
return None
return property(doc=doc, fget=fget)
@apply
def publisher():
doc = 'publisher'
def fget(self):
publisher = self.soup.find('dc:publisher')
if publisher:
return publisher.string
return None
return property(doc=doc, fget=fget)
@apply
def isbn():
doc = 'ISBN number'
def fget(self):
for item in self.soup.package.metadata.findAll('dc:identifier'):
scheme = item.get('scheme')
if not scheme:
scheme = item.get('opf:scheme')
if scheme.lower() == 'isbn':
return item.string
return None
return property(doc=doc, fget=fget)
@apply
def cover():
doc = 'cover'
def fget(self):
guide = self.soup.package.find('guide')
if guide:
references = guide.findAll('reference')
for reference in references:
type = reference.get('type')
if not type:
continue
if type.lower() in ['cover', 'other.ms-coverimage-standard']:
return reference.get('href')
return None
return property(doc=doc, fget=fget)
def possible_cover_prefixes(self):
isbn, ans = [], []
for item in self.soup.package.metadata.findAll('dc:identifier'):
scheme = item.get('scheme')
if not scheme:
scheme = item.get('opf:scheme')
isbn.append((scheme, item.string))
for item in isbn:
ans.append(item[1].replace('-', ''))
return ans
def main(args=sys.argv):
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -86,6 +86,11 @@ def get_metadata(stream):
if category_match:
category = category_match.group(1).strip()
mi = MetaInformation(title, author)
if author:
au = author.split(',')
mi.authors = []
for i in au:
mi.authors.extend(i.split('&'))
mi.comments = comment
mi.category = category
return mi

View File

@ -68,8 +68,6 @@ class LibraryDatabase(object):
mi.title = title
if mi.category:
tags.append(mi.category)
if mi.classification:
tags.append(mi.classification)
if tags:
tags = ', '.join(tags)
else:

View File

@ -23,6 +23,7 @@ class Dialog(QObject):
self.dialog = QDialog(window)
self.accept = self.dialog.accept
self.reject = self.dialog.reject
self._close_event = self.dialog.closeEvent
self.dialog.closeEvent = self.close_event
self.window = window
self.isVisible = self.dialog.isVisible

View File

@ -265,7 +265,7 @@ class Main(QObject, Ui_MainWindow):
formats.append(format)
metadata.append(mi)
names.append(os.path.basename(book))
infos.append({'title':mi.title, 'authors':mi.author,
infos.append({'title':mi.title, 'authors':', '.join(mi.authors),
'cover':self.default_thumbnail, 'tags':[]})
if not to_device:

View File

@ -81,7 +81,9 @@ def setup_completion():
f.write(opts_and_exts('lit2lrf', htmlop, ['lit']))
f.write(opts_and_exts('rtf2lrf', htmlop, ['rtf']))
f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
f.write(opts_and_exts('any2lrf', htmlop,
['htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', 'txt', 'lit', 'rtf', 'pdf']))
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
f.write('''
_prs500_ls()
{