mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Added rar,zip support to html2lrf and squashed a few more html processing bugs. Version bump.
This commit is contained in:
parent
bc4e05a417
commit
b1a02af907
14
Makefile
14
Makefile
@ -1,14 +0,0 @@
|
|||||||
APIDOCS=/var/www/libprs500.kovidgoyal.net/htdocs/apidocs
|
|
||||||
targets:
|
|
||||||
@echo Targets are: installer doc
|
|
||||||
|
|
||||||
installer:
|
|
||||||
@scp dist/libprs500-*.exe castalia:/var/www/vhosts/kovidgoyal.net/subdomains/libprs500/httpdocs/downloads/
|
|
||||||
@ssh castalia chmod a+r /var/www/vhosts/kovidgoyal.net/subdomains/libprs500/httpdocs/downloads/\*
|
|
||||||
@echo Update link on the libprs500 wiki
|
|
||||||
|
|
||||||
doc:
|
|
||||||
epydoc --config epydoc.conf
|
|
||||||
cp -r docs/html ${APIDOCS}/
|
|
||||||
epydoc -v --config epydoc-pdf.conf
|
|
||||||
cp docs/pdf/api.pdf ${APIDOCS}/
|
|
@ -15,10 +15,11 @@ Var MUI_TEMP
|
|||||||
|
|
||||||
!define PRODUCT_NAME "libprs500"
|
!define PRODUCT_NAME "libprs500"
|
||||||
!define XPUI_BRANDINGTEXT "${PRODUCT_NAME} created by Kovid Goyal"
|
!define XPUI_BRANDINGTEXT "${PRODUCT_NAME} created by Kovid Goyal"
|
||||||
!define PRODUCT_VERSION "0.3.12"
|
!define PRODUCT_VERSION "0.3.13"
|
||||||
!define WEBSITE "https://libprs500.kovidgoyal.net"
|
!define WEBSITE "https://libprs500.kovidgoyal.net"
|
||||||
!define PY2EXE_DIR "C:\libprs500"
|
!define PY2EXE_DIR "C:\libprs500"
|
||||||
!define LIBUSB_DIR "C:\libusb-prs500"
|
!define LIBUSB_DIR "C:\libusb-prs500"
|
||||||
|
!define LIBUNRAR_DIR "C:\Program Files\UnrarDLL"
|
||||||
!define QT_DIR "C:\Qt\4.2.3\bin"
|
!define QT_DIR "C:\Qt\4.2.3\bin"
|
||||||
|
|
||||||
;------------------------------------------------------------------------------------------------------
|
;------------------------------------------------------------------------------------------------------
|
||||||
@ -98,6 +99,7 @@ Section "libprs500" Seclibprs500
|
|||||||
|
|
||||||
SetOutPath "$SYSDIR"
|
SetOutPath "$SYSDIR"
|
||||||
File "${LIBUSB_DIR}\libusb0.dll"
|
File "${LIBUSB_DIR}\libusb0.dll"
|
||||||
|
File "${LIBUNRAR_DIR}\unrar.dll"
|
||||||
DetailPrint " "
|
DetailPrint " "
|
||||||
|
|
||||||
DetailPrint "Installing USB driver (this may take a few seconds) ..."
|
DetailPrint "Installing USB driver (this may take a few seconds) ..."
|
||||||
|
58
setup.py
58
setup.py
@ -21,6 +21,8 @@ import ez_setup
|
|||||||
ez_setup.use_setuptools()
|
ez_setup.use_setuptools()
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
|
||||||
|
################################# py2exe #######################################
|
||||||
py2exe_options = {}
|
py2exe_options = {}
|
||||||
if sys.argv[1] == 'py2exe':
|
if sys.argv[1] == 'py2exe':
|
||||||
py2exe_dir = 'C:\libprs500'
|
py2exe_dir = 'C:\libprs500'
|
||||||
@ -33,10 +35,13 @@ if sys.argv[1] == 'py2exe':
|
|||||||
f.close()
|
f.close()
|
||||||
try:
|
try:
|
||||||
import py2exe
|
import py2exe
|
||||||
console = [{
|
console = [
|
||||||
'script' : 'src/libprs500/cli/main.py', 'dest_base':'prs500',
|
{'script' : 'src/libprs500/cli/main.py', 'dest_base':'prs500'},
|
||||||
'script' : 'src/libprs500/lrf/html/convert_from.py', 'dest_base':'html2lrf'
|
{'script' : 'src/libprs500/lrf/html/convert_from.py', 'dest_base':'html2lrf'},
|
||||||
}]
|
{'script' : 'src/libprs500/lrf/txt/convert_from.py', 'dest_base':'txt2lrf'},
|
||||||
|
{'script' : 'src/libprs500/lrf/meta.py', 'dest_base':'lrf-meta'},
|
||||||
|
{'script' : 'src/libprs500/metadata/rtf.py', 'dest_base':'rtf-meta'},
|
||||||
|
]
|
||||||
windows = [{'script' : 'src/libprs500/gui/main.py', 'dest_base':'prs500-gui',
|
windows = [{'script' : 'src/libprs500/gui/main.py', 'dest_base':'prs500-gui',
|
||||||
'icon_resources':[(1,'icons/library.ico')]}]
|
'icon_resources':[(1,'icons/library.ico')]}]
|
||||||
excludes = ["Tkconstants", "Tkinter", "tcl", "_imagingtk",
|
excludes = ["Tkconstants", "Tkinter", "tcl", "_imagingtk",
|
||||||
@ -50,29 +55,9 @@ if sys.argv[1] == 'py2exe':
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
print >>sys.stderr, 'Must be in Windows to run py2exe'
|
print >>sys.stderr, 'Must be in Windows to run py2exe'
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
|
||||||
# Try to install the Python imaging library as the package name (PIL) doesn't
|
|
||||||
# match the distribution file name, thus declaring itas a dependency is useless
|
|
||||||
from setuptools.command.easy_install import main as easy_install
|
|
||||||
try:
|
|
||||||
try:
|
|
||||||
import Image
|
|
||||||
except ImportError:
|
|
||||||
if sys.platform.lower()[:5] not in ['win32', 'darwin']:
|
|
||||||
print "Trying to install the Python Imaging Library"
|
|
||||||
easy_install(["-f", "http://www.pythonware.com/products/pil/", "Imaging"])
|
|
||||||
else:
|
|
||||||
raise Exception('Please install the Python Imaging library manually from '\
|
|
||||||
'http://www.pythonware.com/products/pil/')
|
|
||||||
except Exception, e:
|
|
||||||
print >> sys.stderr, e
|
|
||||||
print >> sys.stderr, \
|
|
||||||
"WARNING: Could not install the Python Imaging Library.", \
|
|
||||||
"Some functionality will be unavailable"
|
|
||||||
|
|
||||||
|
|
||||||
if sys.hexversion < 0x2050000:
|
if sys.hexversion < 0x2050000:
|
||||||
print >> sys.stderr, "You must use python >= 2.5 Try invoking this script as python2.5 setup.py."
|
print >> sys.stderr, "You must use python >= 2.5 Try invoking this script as python2.5 setup.py."
|
||||||
print >> sys.stderr, "If you are using easy_install, try easy_install-2.5"
|
print >> sys.stderr, "If you are using easy_install, try easy_install-2.5"
|
||||||
@ -110,20 +95,17 @@ setup(
|
|||||||
""",
|
""",
|
||||||
long_description =
|
long_description =
|
||||||
"""
|
"""
|
||||||
libprs500 is library to interface with the
|
libprs500 is a ebook management application. It maintains an ebook library
|
||||||
`SONY Portable Reader`_ over USB_.
|
and allows for easy transfer of books from the library to an ebook reader.
|
||||||
It provides methods to list the contents of the file system on the device,
|
At the moment, it supports the `SONY Portable Reader`_.
|
||||||
as well as copy files from and to the device.
|
|
||||||
It also provides a command line and a graphical user interface via
|
|
||||||
the commands prs500 and
|
|
||||||
prs500-gui. The graphical user interface is designed to
|
|
||||||
manage an ebook library and allows for easy
|
|
||||||
syncing between the library and the ebook reader.
|
|
||||||
In addition libprs500 has a utility to read/write the metadata
|
|
||||||
from LRF files (unencrypted books in the SONY BBeB format). A command line
|
|
||||||
interface to this is provided via the command lrf-meta.
|
|
||||||
|
|
||||||
A windows installer is available from https://libprs500.kovidgoyal.net
|
It can also convert various popular ebook formats into LRF, the native
|
||||||
|
ebook format of the SONY Reader.
|
||||||
|
|
||||||
|
For screenshots: https://libprs500.kovidgoyal.net/wiki/Screenshots
|
||||||
|
|
||||||
|
For installation/usage instructions please see
|
||||||
|
https://libprs500.kovidgoyal.net/wiki/WikiStart#Installation
|
||||||
|
|
||||||
For SVN access: svn co https://svn.kovidgoyal.net/code/libprs500
|
For SVN access: svn co https://svn.kovidgoyal.net/code/libprs500
|
||||||
|
|
||||||
|
@ -33,10 +33,24 @@ You may have to adjust the GROUP and the location of the rules file to
|
|||||||
suit your distribution.
|
suit your distribution.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__version__ = "0.3.12"
|
__version__ = "0.3.13"
|
||||||
__docformat__ = "epytext"
|
__docformat__ = "epytext"
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
iswindows = 'win32' in sys.platform.lower()
|
iswindows = 'win32' in sys.platform.lower()
|
||||||
isosx = 'darwin' in sys.platform.lower()
|
isosx = 'darwin' in sys.platform.lower()
|
||||||
|
|
||||||
|
def extract(path, dir):
|
||||||
|
import os
|
||||||
|
ext = os.path.splitext(path)[1][1:].lower()
|
||||||
|
extractor = None
|
||||||
|
if ext == 'zip':
|
||||||
|
from libprs500.libunzip import extract
|
||||||
|
extractor = extract
|
||||||
|
elif ext == 'rar':
|
||||||
|
from libprs500.libunrar import extract
|
||||||
|
extractor = extract
|
||||||
|
if not extract:
|
||||||
|
raise Exception('Unknown archive type')
|
||||||
|
extractor(path, dir)
|
||||||
|
@ -17,6 +17,7 @@ This module provides a thin ctypes based wrapper around libunrar.
|
|||||||
|
|
||||||
See ftp://ftp.rarlabs.com/rar/unrarsrc-3.7.5.tar.gz
|
See ftp://ftp.rarlabs.com/rar/unrarsrc-3.7.5.tar.gz
|
||||||
"""
|
"""
|
||||||
|
import os
|
||||||
from ctypes import Structure, c_char_p, c_uint, c_void_p, POINTER, \
|
from ctypes import Structure, c_char_p, c_uint, c_void_p, POINTER, \
|
||||||
byref, c_wchar_p, CFUNCTYPE, c_int, c_long, c_char, c_wchar
|
byref, c_wchar_p, CFUNCTYPE, c_int, c_long, c_char, c_wchar
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
@ -164,14 +165,21 @@ def get_archive_info(flags):
|
|||||||
print >>ios, 'First Volume:\t', 'yes' if (flags & 256) else 'no or older than 3.0'
|
print >>ios, 'First Volume:\t', 'yes' if (flags & 256) else 'no or older than 3.0'
|
||||||
return ios.getvalue()
|
return ios.getvalue()
|
||||||
|
|
||||||
def extract(path):
|
def extract(path, dir):
|
||||||
|
"""
|
||||||
|
Extract archive C{filename} into directory C{dir}
|
||||||
|
"""
|
||||||
open_archive_data = RAROpenArchiveDataEx(ArcName=path, OpenMode=RAR_OM_EXTRACT, CmtBuf=None)
|
open_archive_data = RAROpenArchiveDataEx(ArcName=path, OpenMode=RAR_OM_EXTRACT, CmtBuf=None)
|
||||||
arc_data = _libunrar.RAROpenArchiveEx(byref(open_archive_data))
|
arc_data = _libunrar.RAROpenArchiveEx(byref(open_archive_data))
|
||||||
|
cwd = os.getcwd()
|
||||||
|
if not os.path.isdir( dir ):
|
||||||
|
os.mkdir( dir )
|
||||||
|
os.chdir( dir )
|
||||||
try:
|
try:
|
||||||
if open_archive_data.OpenResult != 0:
|
if open_archive_data.OpenResult != 0:
|
||||||
raise UnRARException(_interpret_open_error(open_archive_data.OpenResult, path))
|
raise UnRARException(_interpret_open_error(open_archive_data.OpenResult, path))
|
||||||
print 'Archive:', path
|
print 'Archive:', path
|
||||||
print get_archive_info(open_archive_data.Flags)
|
#print get_archive_info(open_archive_data.Flags)
|
||||||
header_data = RARHeaderDataEx(CmtBuf=None)
|
header_data = RARHeaderDataEx(CmtBuf=None)
|
||||||
#_libunrar.RARSetCallback(arc_data, callback_func, mode)
|
#_libunrar.RARSetCallback(arc_data, callback_func, mode)
|
||||||
while True:
|
while True:
|
||||||
@ -184,7 +192,5 @@ def extract(path):
|
|||||||
if RHCode == ERAR_BAD_DATA:
|
if RHCode == ERAR_BAD_DATA:
|
||||||
raise UnRARException('File header broken')
|
raise UnRARException('File header broken')
|
||||||
finally:
|
finally:
|
||||||
|
os.chdir(cwd)
|
||||||
_libunrar.RARCloseArchive(arc_data)
|
_libunrar.RARCloseArchive(arc_data)
|
||||||
|
|
||||||
extract(r'z:\home\test.rar')
|
|
||||||
#extract('/home/kovid/ero/Fansadox Collections/C21 Ponygirl Inferno.rar')
|
|
52
src/libprs500/libunzip.py
Normal file
52
src/libprs500/libunzip.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
import os, zipfile
|
||||||
|
from cStringIO import StringIO
|
||||||
|
|
||||||
|
def extract(filename, dir):
|
||||||
|
"""
|
||||||
|
Extract archive C{filename} into directory C{dir}
|
||||||
|
"""
|
||||||
|
zf = zipfile.ZipFile( filename )
|
||||||
|
namelist = zf.namelist()
|
||||||
|
dirlist = filter( lambda x: x.endswith( '/' ), namelist )
|
||||||
|
filelist = filter( lambda x: not x.endswith( '/' ), namelist )
|
||||||
|
# make base
|
||||||
|
pushd = os.getcwd()
|
||||||
|
if not os.path.isdir( dir ):
|
||||||
|
os.mkdir( dir )
|
||||||
|
os.chdir( dir )
|
||||||
|
# create directory structure
|
||||||
|
dirlist.sort()
|
||||||
|
for dirs in dirlist:
|
||||||
|
dirs = dirs.split( '/' )
|
||||||
|
prefix = ''
|
||||||
|
for dir in dirs:
|
||||||
|
dirname = os.path.join( prefix, dir )
|
||||||
|
if dir and not os.path.isdir( dirname ):
|
||||||
|
os.mkdir( dirname )
|
||||||
|
prefix = dirname
|
||||||
|
# extract files
|
||||||
|
for fn in filelist:
|
||||||
|
out = open( fn, 'wb' )
|
||||||
|
buffer = StringIO( zf.read( fn ))
|
||||||
|
buflen = 2 ** 20
|
||||||
|
datum = buffer.read( buflen )
|
||||||
|
while datum:
|
||||||
|
out.write( datum )
|
||||||
|
datum = buffer.read( buflen )
|
||||||
|
out.close()
|
||||||
|
os.chdir( pushd )
|
@ -20,6 +20,7 @@ At the time fo writing, this package only supports reading and writing LRF meat
|
|||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
|
|
||||||
from libprs500.lrf.pylrs.pylrs import Book as _Book
|
from libprs500.lrf.pylrs.pylrs import Book as _Book
|
||||||
|
from libprs500 import __version__ as VERSION
|
||||||
|
|
||||||
__docformat__ = "epytext"
|
__docformat__ = "epytext"
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
@ -28,7 +29,7 @@ class ConversionError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def option_parser(usage):
|
def option_parser(usage):
|
||||||
parser = OptionParser(usage=usage)
|
parser = OptionParser(usage=usage, version='libprs500 '+VERSION)
|
||||||
parser.add_option("-t", "--title", action="store", type="string", \
|
parser.add_option("-t", "--title", action="store", type="string", \
|
||||||
dest="title", help="Set the title")
|
dest="title", help="Set the title")
|
||||||
parser.add_option("-a", "--author", action="store", type="string", \
|
parser.add_option("-a", "--author", action="store", type="string", \
|
||||||
|
@ -21,16 +21,20 @@ Code to convert HTML ebooks into LRF ebooks.
|
|||||||
I am indebted to esperanc for the CSS->Xylog Style conversion routines
|
I am indebted to esperanc for the CSS->Xylog Style conversion routines
|
||||||
and to Falstaff for pylrs.
|
and to Falstaff for pylrs.
|
||||||
"""
|
"""
|
||||||
import os, re, sys
|
import os, re, sys, shutil
|
||||||
from htmlentitydefs import name2codepoint
|
from htmlentitydefs import name2codepoint
|
||||||
from urllib import urlopen
|
from urllib import urlopen
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
|
from tempfile import mkdtemp
|
||||||
|
from operator import itemgetter
|
||||||
|
|
||||||
from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, NavigableString
|
from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, \
|
||||||
|
NavigableString, Declaration
|
||||||
from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \
|
from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \
|
||||||
ImageBlock, JumpButton, CharButton, Page
|
ImageBlock, JumpButton, CharButton, Page
|
||||||
from libprs500.lrf.pylrs.pylrs import Span as _Span
|
from libprs500.lrf.pylrs.pylrs import Span as _Span
|
||||||
from libprs500.lrf import ConversionError, option_parser, Book
|
from libprs500.lrf import ConversionError, option_parser, Book
|
||||||
|
from libprs500 import extract
|
||||||
|
|
||||||
def ImagePage():
|
def ImagePage():
|
||||||
return Page(evensidemargin=0, oddsidemargin=0, topmargin=0, \
|
return Page(evensidemargin=0, oddsidemargin=0, topmargin=0, \
|
||||||
@ -150,7 +154,7 @@ class Span(_Span):
|
|||||||
elif key == 'font-weight':
|
elif key == 'font-weight':
|
||||||
ans = font_weight(val)
|
ans = font_weight(val)
|
||||||
if ans:
|
if ans:
|
||||||
t['fontweight'] = val
|
t['fontweight'] = ans
|
||||||
elif key.startswith("margin"):
|
elif key.startswith("margin"):
|
||||||
if key == "margin":
|
if key == "margin":
|
||||||
u = []
|
u = []
|
||||||
@ -186,8 +190,6 @@ class Span(_Span):
|
|||||||
t["align"] = "head"
|
t["align"] = "head"
|
||||||
else:
|
else:
|
||||||
print >>sys.stderr, 'Unhandled/malformed CSS key:', key, d[key]
|
print >>sys.stderr, 'Unhandled/malformed CSS key:', key, d[key]
|
||||||
if 'small' in t.values():
|
|
||||||
print d, 'font-size' in d.keys()
|
|
||||||
return t
|
return t
|
||||||
|
|
||||||
def __init__(self, ns, css, font_delta=0):
|
def __init__(self, ns, css, font_delta=0):
|
||||||
@ -252,7 +254,7 @@ class HTMLConverter(object):
|
|||||||
self.current_page = None
|
self.current_page = None
|
||||||
self.current_para = None
|
self.current_para = None
|
||||||
self.current_style = {}
|
self.current_style = {}
|
||||||
self.parse_file(self.soup.html)
|
self.parse_file()
|
||||||
HTMLConverter.processed_files[path] = self
|
HTMLConverter.processed_files[path] = self
|
||||||
print 'done'
|
print 'done'
|
||||||
|
|
||||||
@ -318,14 +320,15 @@ class HTMLConverter(object):
|
|||||||
prop.update(self.parse_style_properties(tag["style"]))
|
prop.update(self.parse_style_properties(tag["style"]))
|
||||||
return prop
|
return prop
|
||||||
|
|
||||||
def parse_file(self, html):
|
def parse_file(self):
|
||||||
self.current_page = Page()
|
self.current_page = Page()
|
||||||
self.current_block = TextBlock()
|
self.current_block = TextBlock()
|
||||||
self.current_para = Paragraph()
|
self.current_para = Paragraph()
|
||||||
if self.cover:
|
if self.cover:
|
||||||
self.add_image_block(self.cover)
|
self.add_image_block(self.cover)
|
||||||
self.top = self.current_block
|
self.top = self.current_block
|
||||||
self.parse_tag(html, {})
|
|
||||||
|
self.process_children(self.soup, {})
|
||||||
if self.current_para:
|
if self.current_para:
|
||||||
self.current_block.append(self.current_para)
|
self.current_block.append(self.current_para)
|
||||||
if self.current_block:
|
if self.current_block:
|
||||||
@ -409,12 +412,12 @@ class HTMLConverter(object):
|
|||||||
"""
|
"""
|
||||||
if self.current_para.contents:
|
if self.current_para.contents:
|
||||||
self.current_block.append(self.current_para)
|
self.current_block.append(self.current_para)
|
||||||
|
self.current_para = Paragraph()
|
||||||
if self.current_block.contents:
|
if self.current_block.contents:
|
||||||
self.current_page.append(self.current_block)
|
self.current_page.append(self.current_block)
|
||||||
|
self.current_block = TextBlock()
|
||||||
if self.current_page.contents:
|
if self.current_page.contents:
|
||||||
self.book.append(self.current_page)
|
self.book.append(self.current_page)
|
||||||
self.current_para = Paragraph()
|
|
||||||
self.current_block = TextBlock()
|
|
||||||
self.current_page = Page()
|
self.current_page = Page()
|
||||||
|
|
||||||
|
|
||||||
@ -423,40 +426,42 @@ class HTMLConverter(object):
|
|||||||
self.end_page()
|
self.end_page()
|
||||||
page = ImagePage()
|
page = ImagePage()
|
||||||
if not self.images.has_key(path):
|
if not self.images.has_key(path):
|
||||||
self.images[path] = ImageBlock(ImageStream(path))
|
self.images[path] = ImageStream(path)
|
||||||
page.append(self.images[path])
|
page.append(ImageBlock(self.images[path]))
|
||||||
self.book.append(page)
|
self.book.append(page)
|
||||||
|
|
||||||
def parse_tag(self, tag, parent_css):
|
def process_children(self, ptag, pcss):
|
||||||
def sanctify_css(css):
|
""" Process the children of ptag """
|
||||||
""" Make css safe for use in a SPAM Xylog tag """
|
for c in ptag.contents:
|
||||||
for key in css.keys():
|
if isinstance(c, (Comment, Declaration)):
|
||||||
test = key.lower()
|
continue
|
||||||
if test.startswith('margin') or 'indent' in test or \
|
elif isinstance(c, Tag):
|
||||||
'padding' in test or 'border' in test or 'page-break' in test \
|
self.parse_tag(c, pcss)
|
||||||
or test.startswith('mso') \
|
elif isinstance(c, NavigableString):
|
||||||
or test in ['color', 'display', 'text-decoration', \
|
self.add_text(c, pcss)
|
||||||
'letter-spacing', 'text-autospace', 'text-transform']:
|
|
||||||
css.pop(key)
|
|
||||||
return css
|
|
||||||
|
|
||||||
def add_text(tag, css):
|
def add_text(self, tag, css):
|
||||||
try:
|
try:
|
||||||
self.current_para.append(Span(tag, sanctify_css(css), \
|
self.current_para.append(Span(tag, self.sanctify_css(css), \
|
||||||
font_delta=self.font_delta))
|
font_delta=self.font_delta))
|
||||||
except ConversionError, err:
|
except ConversionError, err:
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print >>sys.stderr, err
|
print >>sys.stderr, err
|
||||||
|
|
||||||
def process_children(ptag, pcss):
|
def sanctify_css(self, css):
|
||||||
""" Process the children of ptag """
|
""" Make css safe for use in a SPAM Xylog tag """
|
||||||
for c in ptag.contents:
|
for key in css.keys():
|
||||||
if isinstance(c, Comment):
|
test = key.lower()
|
||||||
continue
|
if test.startswith('margin') or 'indent' in test or \
|
||||||
elif isinstance(c, Tag):
|
'padding' in test or 'border' in test or 'page-break' in test \
|
||||||
self.parse_tag(c, pcss)
|
or test.startswith('mso') \
|
||||||
elif isinstance(c, NavigableString):
|
or test in ['color', 'display', 'text-decoration', \
|
||||||
add_text(c, pcss)
|
'letter-spacing', 'text-autospace', 'text-transform']:
|
||||||
|
css.pop(key)
|
||||||
|
return css
|
||||||
|
|
||||||
|
|
||||||
|
def parse_tag(self, tag, parent_css):
|
||||||
|
|
||||||
def process_text_tag(tag, tag_css):
|
def process_text_tag(tag, tag_css):
|
||||||
if 'page-break-before' in tag_css.keys():
|
if 'page-break-before' in tag_css.keys():
|
||||||
@ -467,16 +472,14 @@ class HTMLConverter(object):
|
|||||||
if 'page-break-after' in tag_css.keys():
|
if 'page-break-after' in tag_css.keys():
|
||||||
end_page = True
|
end_page = True
|
||||||
tag_css.pop('page-break-after')
|
tag_css.pop('page-break-after')
|
||||||
process_children(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
if end_page:
|
if end_page:
|
||||||
self.end_page()
|
self.end_page()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tagname = tag.name.lower()
|
tagname = tag.name.lower()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
add_text(tag, parent_css)
|
self.add_text(tag, parent_css)
|
||||||
return
|
return
|
||||||
tag_css = self.tag_css(tag, parent_css=parent_css)
|
tag_css = self.tag_css(tag, parent_css=parent_css)
|
||||||
try: # Skip element if its display attribute is set to none
|
try: # Skip element if its display attribute is set to none
|
||||||
@ -494,7 +497,7 @@ class HTMLConverter(object):
|
|||||||
self.current_block = tb
|
self.current_block = tb
|
||||||
self.current_para = Paragraph()
|
self.current_para = Paragraph()
|
||||||
self.targets[tag['name']] = tb
|
self.targets[tag['name']] = tb
|
||||||
process_children(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
elif tag.has_key('href'):
|
elif tag.has_key('href'):
|
||||||
purl = urlparse(tag['href'])
|
purl = urlparse(tag['href'])
|
||||||
path = purl[2]
|
path = purl[2]
|
||||||
@ -516,12 +519,15 @@ class HTMLConverter(object):
|
|||||||
elif tag.has_key('type') and tag['type'] == "text/css" \
|
elif tag.has_key('type') and tag['type'] == "text/css" \
|
||||||
and tag.has_key('href'):
|
and tag.has_key('href'):
|
||||||
url = tag['href']
|
url = tag['href']
|
||||||
if url.startswith('http://'):
|
try:
|
||||||
f = urlopen(url)
|
if url.startswith('http://'):
|
||||||
else:
|
f = urlopen(url)
|
||||||
f = open(url)
|
else:
|
||||||
self.parse_css(f.read())
|
f = open(url)
|
||||||
f.close()
|
self.parse_css(f.read())
|
||||||
|
f.close()
|
||||||
|
except IOError:
|
||||||
|
pass
|
||||||
elif tagname in ['p', 'div', 'ul', 'ol', 'tr', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
|
elif tagname in ['p', 'div', 'ul', 'ol', 'tr', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
|
||||||
# TODO: Implement ol
|
# TODO: Implement ol
|
||||||
indent = tag_css.pop('text-indent', '')
|
indent = tag_css.pop('text-indent', '')
|
||||||
@ -536,34 +542,33 @@ class HTMLConverter(object):
|
|||||||
elif tagname in ['b', 'strong', 'i', 'em', 'span']:
|
elif tagname in ['b', 'strong', 'i', 'em', 'span']:
|
||||||
process_text_tag(tag, tag_css)
|
process_text_tag(tag, tag_css)
|
||||||
elif tagname == 'font':
|
elif tagname == 'font':
|
||||||
pass
|
if tag.has_key('face'):
|
||||||
elif tagname == 'link':
|
tag_css['font-family'] = tag['face']
|
||||||
pass
|
process_text_tag(tag, tag_css)
|
||||||
elif tagname == 'style':
|
|
||||||
pass
|
|
||||||
elif tagname == 'br':
|
elif tagname == 'br':
|
||||||
self.current_para.append(CR())
|
self.current_para.append(CR())
|
||||||
elif tagname == 'hr':
|
elif tagname == 'hr':
|
||||||
self.current_para.append(CR())
|
self.current_para.append(CR())
|
||||||
# TODO: Horizontal line?
|
# TODO: Horizontal line?
|
||||||
else:
|
else:
|
||||||
process_children(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
|
|
||||||
def writeto(self, path, lrs=False):
|
def writeto(self, path, lrs=False):
|
||||||
self.book.renderLrs(path) if lrs else self.book.renderLrf(path)
|
self.book.renderLrs(path) if lrs else self.book.renderLrf(path)
|
||||||
|
|
||||||
|
|
||||||
def process_file(path, options):
|
def process_file(path, options):
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
|
dirpath = None
|
||||||
try:
|
try:
|
||||||
path = os.path.abspath(path)
|
dirpath, path = get_path(path)
|
||||||
cpath, tpath = options.cover, ''
|
cpath, tpath = options.cover, ''
|
||||||
if options.cover and os.access(options.cover, os.R_OK):
|
if options.cover and os.access(options.cover, os.R_OK):
|
||||||
try:
|
try:
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from libprs500.prs500 import PRS500
|
from libprs500.prs500 import PRS500
|
||||||
from libprs500.ptempfile import PersistentTemporaryFile
|
from libprs500.ptempfile import PersistentTemporaryFile
|
||||||
im = Image.open(cpath)
|
im = Image.open(os.path.join(cwd, cpath))
|
||||||
cim = im.resize((600, 800), Image.BICUBIC)
|
cim = im.resize((600, 800), Image.BICUBIC)
|
||||||
cf = PersistentTemporaryFile(prefix="html2lrf_", suffix=".jpg")
|
cf = PersistentTemporaryFile(prefix="html2lrf_", suffix=".jpg")
|
||||||
cf.close()
|
cf.close()
|
||||||
@ -596,6 +601,8 @@ def process_file(path, options):
|
|||||||
print 'Output written to', oname
|
print 'Output written to', oname
|
||||||
finally:
|
finally:
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
|
if dirpath:
|
||||||
|
shutil.rmtree(dirpath, True)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
""" CLI for html -> lrf conversions """
|
""" CLI for html -> lrf conversions """
|
||||||
@ -618,6 +625,67 @@ def main():
|
|||||||
options.title = os.path.splitext(os.path.basename(src))[0]
|
options.title = os.path.splitext(os.path.basename(src))[0]
|
||||||
process_file(src, options)
|
process_file(src, options)
|
||||||
|
|
||||||
|
def console_query(dirpath, candidate, docs):
|
||||||
|
if len(docs) == 1:
|
||||||
|
return 0
|
||||||
|
try:
|
||||||
|
import readline
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
i = 0
|
||||||
|
for doc in docs:
|
||||||
|
prefix = '>' if i == candidate else ''
|
||||||
|
print prefix+str(i)+'.\t', doc[0]
|
||||||
|
i += 1
|
||||||
|
print
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
choice = raw_input('Choose file to convert (0-'+str(i-1) + \
|
||||||
|
'). Current choice is ['+ str(candidate) + ']:')
|
||||||
|
if not choice:
|
||||||
|
return candidate
|
||||||
|
choice = int(choice)
|
||||||
|
if choice < 0 or choice >= i:
|
||||||
|
continue
|
||||||
|
candidate = choice
|
||||||
|
except EOFError, KeyboardInterrupt:
|
||||||
|
sys.exit()
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
break
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
|
||||||
|
def get_path(path, query=console_query):
|
||||||
|
path = os.path.abspath(path)
|
||||||
|
ext = os.path.splitext(path)[1][1:].lower()
|
||||||
|
if ext in ['htm', 'html', 'xhtml']:
|
||||||
|
return None, path
|
||||||
|
dirpath = mkdtemp('','html2lrf')
|
||||||
|
extract(path, dirpath)
|
||||||
|
candidate, docs = None, []
|
||||||
|
for root, dirs, files in os.walk(dirpath):
|
||||||
|
for name in files:
|
||||||
|
ext = os.path.splitext(name)[1][1:].lower()
|
||||||
|
if ext not in ['html', 'xhtml', 'htm', 'xhtm']:
|
||||||
|
continue
|
||||||
|
docs.append((name, root, os.stat(os.path.join(root, name)).st_size))
|
||||||
|
if 'toc' in name.lower():
|
||||||
|
candidate = name
|
||||||
|
docs.sort(key=itemgetter(2))
|
||||||
|
if candidate:
|
||||||
|
for i in range(len(docs)):
|
||||||
|
if docs[i][0] == candidate:
|
||||||
|
candidate = i
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
candidate = len(docs) - 1
|
||||||
|
if len(docs) == 0:
|
||||||
|
raise ConversionError('No suitable files found in archive')
|
||||||
|
if len(docs) > 0:
|
||||||
|
candidate = query(dirpath, candidate, docs)
|
||||||
|
return dirpath, os.path.join(docs[candidate][1], docs[candidate][0])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
@ -1,123 +0,0 @@
|
|||||||
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
|
|
||||||
## This program is free software; you can redistribute it and/or modify
|
|
||||||
## it under the terms of the GNU General Public License as published by
|
|
||||||
## the Free Software Foundation; either version 2 of the License, or
|
|
||||||
## (at your option) any later version.
|
|
||||||
##
|
|
||||||
## This program is distributed in the hope that it will be useful,
|
|
||||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
## GNU General Public License for more details.
|
|
||||||
##
|
|
||||||
## You should have received a copy of the GNU General Public License along
|
|
||||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
import sys
|
|
||||||
import hashlib
|
|
||||||
import re
|
|
||||||
import pkg_resources
|
|
||||||
import subprocess
|
|
||||||
from tempfile import mkdtemp
|
|
||||||
from optparse import OptionParser
|
|
||||||
from libprs500.lrf import ConversionError
|
|
||||||
from libprs500.lrf.meta import LRFException, LRFMetaFile
|
|
||||||
from libprs500.ptempfile import PersistentTemporaryFile
|
|
||||||
|
|
||||||
def generate_thumbnail(path):
|
|
||||||
""" Generate a JPEG thumbnail of size ~ 128x128 (aspect ratio preserved)"""
|
|
||||||
try:
|
|
||||||
from PIL import Image
|
|
||||||
except ImportError:
|
|
||||||
raise LRFException("Unable to initialize Python Imaging Library." \
|
|
||||||
"Thumbnail generation is disabled")
|
|
||||||
im = Image.open(path)
|
|
||||||
im.thumbnail((128, 128), Image.ANTIALIAS)
|
|
||||||
thumb = PersistentTemporaryFile(prefix="makelrf_", suffix=".jpeg")
|
|
||||||
thumb.close()
|
|
||||||
im = im.convert()
|
|
||||||
im.save(thumb.name)
|
|
||||||
return thumb
|
|
||||||
|
|
||||||
|
|
||||||
def makelrf(author=None, title=None, \
|
|
||||||
thumbnail=None, src=None, odir=".",\
|
|
||||||
rasterize=True, cover=None):
|
|
||||||
src = os.path.normpath(os.path.abspath(src))
|
|
||||||
bbebook = pkg_resources.resource_filename(__name__, _bbebook)
|
|
||||||
if not os.access(src, os.R_OK):
|
|
||||||
raise LRFException("Unable to read from file: " + src)
|
|
||||||
if thumbnail:
|
|
||||||
thumb = os.path.abspath(thumbnail)
|
|
||||||
if not os.access(thumb, os.R_OK):
|
|
||||||
raise LRFException("Unable to read from " + thumb)
|
|
||||||
else:
|
|
||||||
thumb = pkg_resources.resource_filename(__name__, 'cover.jpg')
|
|
||||||
|
|
||||||
if not author:
|
|
||||||
author = "Unknown"
|
|
||||||
if not title:
|
|
||||||
title = os.path.basename(src)
|
|
||||||
label = os.path.basename(src)
|
|
||||||
id = 'FB' + hashlib.md5(os.path.basename(label)).hexdigest()[:14]
|
|
||||||
name, ext = os.path.splitext(label)
|
|
||||||
cwd = os.path.dirname(src)
|
|
||||||
dirpath = None
|
|
||||||
try:
|
|
||||||
if ext == ".rar":
|
|
||||||
dirpath = mkdtemp('','makelrf')
|
|
||||||
cwd = dirpath
|
|
||||||
cmd = " ".join(["unrar", "e", '"'+src+'"'])
|
|
||||||
proc = subprocess.Popen(cmd, cwd=cwd, shell=True, stderr=subprocess.PIPE)
|
|
||||||
if proc.wait():
|
|
||||||
raise LRFException("unrar failed with error:\n\n" + \
|
|
||||||
proc.stderr.read())
|
|
||||||
path, msize = None, 0
|
|
||||||
for root, dirs, files in os.walk(dirpath):
|
|
||||||
for name in files:
|
|
||||||
if os.path.splitext(name)[1] == ".html":
|
|
||||||
size = os.stat(os.path.join(root, name)).st_size
|
|
||||||
if size > msize:
|
|
||||||
msize, path = size, os.path.join(root, name)
|
|
||||||
if not path:
|
|
||||||
raise LRFException("Could not find .html file in rar archive")
|
|
||||||
src = path
|
|
||||||
|
|
||||||
name = re.sub("\s", "_", name)
|
|
||||||
name = os.path.abspath(os.path.join(odir, name)) + ".lrf"
|
|
||||||
cfg = { 'File' : src, 'Output' : name, 'Label' : label, 'BookID' : id, \
|
|
||||||
'Author' : author, 'Title' : title, 'Publisher' : 'Unknown' \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if cover:
|
|
||||||
cover = os.path.normpath(os.path.abspath(cover))
|
|
||||||
try:
|
|
||||||
thumbf = generate_thumbnail(cover)
|
|
||||||
thumb = thumbf.name
|
|
||||||
except Exception, e:
|
|
||||||
print >> sys.stderr, "WARNING: Unable to generate thumbnail:\n", \
|
|
||||||
str(e)
|
|
||||||
thumb = cover
|
|
||||||
cfg['Cover'] = cover
|
|
||||||
cfg['Icon'] = thumb
|
|
||||||
config = PersistentTemporaryFile(prefix='makelrf_', suffix='.xml')
|
|
||||||
config.write(create_xml(cfg))
|
|
||||||
config.close()
|
|
||||||
jar = '-jar "' + bbebook + '"'
|
|
||||||
cmd = " ".join(["java", jar, "-r" if rasterize else "", '"'+config.name+'"'])
|
|
||||||
proc = subprocess.Popen(cmd, \
|
|
||||||
cwd=cwd, shell=True, stderr=subprocess.PIPE)
|
|
||||||
if proc.wait():
|
|
||||||
raise LRFException("BBeBook failed with error:\n\n" + \
|
|
||||||
proc.stderr.read())
|
|
||||||
# Needed as BBeBook-0.2 doesn't handle non GIF thumbnails correctly.
|
|
||||||
lrf = open(name, "r+b")
|
|
||||||
LRFMetaFile(lrf).fix_thumbnail_type()
|
|
||||||
lrf.close()
|
|
||||||
return name
|
|
||||||
finally:
|
|
||||||
if dirpath:
|
|
||||||
shutil.rmtree(dirpath, True)
|
|
||||||
|
|
21
upload
Normal file
21
upload
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Script to upload libprs500 to pypi, the installer to libprs500.kovidgoyal.net and update the api documentation
|
||||||
|
|
||||||
|
PREFIX=/var/www/vhosts/kovidgoyal.net/subdomains/libprs500
|
||||||
|
DOWNLOADS=$PREFIX/httpdocs/downloads
|
||||||
|
DOCS=$PREFIX/httpdocs/apidocs
|
||||||
|
exe=`cd dist && ls -1 libprs500-*.exe | tail -n1 && cd ..`
|
||||||
|
|
||||||
|
ssh castalia rm -f $DOWNLOADS/libprs500\*.exe
|
||||||
|
scp dist/$exe castalia:$DOWNLOADS/
|
||||||
|
ssh castalia chmod a+r $DOWNLOADS/\*
|
||||||
|
ssh castalia /root/bin/update-installer-link $exe
|
||||||
|
|
||||||
|
epydoc --config epydoc.conf
|
||||||
|
scp -r docs/html castalia:$DOCS/
|
||||||
|
epydoc -v --config epydoc-pdf.conf
|
||||||
|
scp docs/pdf/api.pdf castalia:$DOCS/
|
||||||
|
|
||||||
|
python setup.py register sdist bdist_egg upload
|
||||||
|
rm -rf dist/* build/*
|
Loading…
x
Reference in New Issue
Block a user