Merge upstream changes

This commit is contained in:
Marshall T. Vandegrift 2009-01-17 11:17:48 -05:00
commit 7449870919
84 changed files with 28093 additions and 18947 deletions

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.4.126'
__version__ = '0.4.128'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
'''
Various run time constants.

View File

@ -43,7 +43,11 @@ def update_module(mod, path):
zp = os.path.join(os.path.dirname(sys.executable), 'library.zip')
elif isosx:
zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')),
'Resources', 'lib', 'python2.5', 'site-packages.zip')
'Resources', 'lib',
'python'+'.'.join(map(str, sys.version_info[:2])),
'site-packages.zip')
else:
zp = os.path.join(getattr(sys, 'frozen_path'), 'loader.zip')
if zp is not None:
update_zipfile(zp, mod, path)
else:

View File

@ -9,31 +9,26 @@ import os, fnmatch
from calibre.devices.usbms.driver import USBMS
class CYBOOKG3(USBMS):
MIME_MAP = {
'mobi' : 'application/mobi',
'prc' : 'application/prc',
'html' : 'application/html',
'pdf' : 'application/pdf',
'rtf' : 'application/rtf',
'txt' : 'text/plain',
}
# Ordered list of supported formats
FORMATS = MIME_MAP.keys()
# Be sure these have an entry in calibre.devices.mime
FORMATS = ['mobi', 'prc', 'html', 'pdf', 'rtf', 'txt']
VENDOR_ID = 0x0bda
PRODUCT_ID = 0x0703
BCD = [0x110, 0x132]
VENDOR_NAME = 'BOOKEEN'
PRODUCT_NAME = 'CYBOOK_GEN3'
WINDOWS_MAIN_MEM = 'CYBOOK_GEN3__-FD'
WINDOWS_CARD_MEM = 'CYBOOK_GEN3__-SD'
OSX_NAME_MAIN_MEM = 'Bookeen Cybook Gen3 -FD Media'
OSX_NAME_CARD_MEM = 'Bookeen Cybook Gen3 -SD Media'
OSX_MAIN_MEM = 'Bookeen Cybook Gen3 -FD Media'
OSX_CARD_MEM = 'Bookeen Cybook Gen3 -SD Media'
MAIN_MEMORY_VOLUME_LABEL = 'Cybook Gen 3 Main Memory'
STORAGE_CARD_VOLUME_LABEL = 'Cybook Gen 3 Storage Card'
EBOOK_DIR = "eBooks"
EBOOK_DIR_MAIN = "eBooks"
SUPPORTS_SUB_DIRS = True
def delete_books(self, paths, end_session=True):
for path in paths:
@ -52,3 +47,8 @@ class CYBOOKG3(USBMS):
for filen in fnmatch.filter(files, filename + "*.t2b"):
os.unlink(os.path.join(p, filen))
try:
os.removedirs(os.path.dirname(path))
except:
pass

View File

@ -41,6 +41,20 @@ class Device(object):
'''Return the FDI description of this device for HAL on linux.'''
return ''
@classmethod
def can_handle(cls, device_info):
'''
Optional method to perform further checks on a device to see if this driver
is capable of handling it. If it is not it should return False. This method
is only called after the vendor, product ids and the bcd have matched, so
it can do some relatively time intensive checks. The default implementation
returns True.
:param device_info: On windows a device ID string. On Unix a tuple of
``(vendor_id, product_id, bcd)``.
'''
return True
def open(self):
'''
Perform any device specific initialization. Called after the device is
@ -109,7 +123,8 @@ class Device(object):
"""
raise NotImplementedError()
def upload_books(self, files, names, on_card=False, end_session=True):
def upload_books(self, files, names, on_card=False, end_session=True,
metadata=None):
'''
Upload a list of books to the device. If a file already
exists on the device, it should be replaced.
@ -121,6 +136,10 @@ class Device(object):
once uploaded to the device. len(names) == len(files)
@return: A list of 3-element tuples. The list is meant to be passed
to L{add_books_to_metadata}.
@param metadata: If not None, it is a list of dictionaries. Each dictionary
will have at least the key tags to allow the driver to choose book location
based on tags. len(metadata) == len(files). If your device does not support
hierarchical ebook folders, you can safely ignore this parameter.
'''
raise NotImplementedError()

View File

@ -9,24 +9,30 @@ import os, fnmatch
from calibre.devices.usbms.driver import USBMS
class KINDLE(USBMS):
MIME_MAP = {
'azw' : 'application/azw',
'mobi' : 'application/mobi',
'prc' : 'application/prc',
'txt' : 'text/plain',
}
# Ordered list of supported formats
FORMATS = MIME_MAP.keys()
FORMATS = ['azw', 'mobi', 'prc', 'txt']
VENDOR_ID = 0x1949
PRODUCT_ID = 0x0001
BCD = 0x399
BCD = [0x399]
VENDOR_NAME = 'AMAZON'
PRODUCT_NAME = 'KINDLE'
WINDOWS_MAIN_MEM = 'KINDLE'
MAIN_MEMORY_VOLUME_LABEL = 'Kindle Main Memory'
STORAGE_CARD_VOLUME_LABEL = 'Kindle Storage Card'
EBOOK_DIR = "documents"
EBOOK_DIR_MAIN = "documents"
def delete_books(self, paths, end_session=True):
for path in paths:
if os.path.exists(path):
os.unlink(path)
filepath, ext = os.path.splitext(path)
basepath, filename = os.path.split(filepath)
# Delete the ebook auxiliary file
if os.path.exists(filepath + '.mbp'):
os.unlink(filepath + '.mbp')

View File

@ -0,0 +1,19 @@
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john at nachtimwald.com>'
'''
Global Mime mapping of ebook types.
'''
MIME_MAP = {
'azw' : 'application/azw',
'epub' : 'application/epub+zip',
'html' : 'text/html',
'lrf' : 'application/x-sony-bbeb',
'lrx' : 'application/x-sony-bbeb',
'mobi' : 'application/mobi',
'pdf' : 'application/pdf',
'prc' : 'application/prc',
'rtf' : 'application/rtf',
'txt' : 'text/plain',
}

View File

@ -841,7 +841,8 @@ class PRS500(Device):
self.upload_book_list(booklists[1], end_session=False)
@safe
def upload_books(self, files, names, on_card=False, end_session=True):
def upload_books(self, files, names, on_card=False, end_session=True,
metadata=None):
card = self.card(end_session=False)
prefix = card + '/' + self.CARD_PATH_PREFIX +'/' if on_card else '/Data/media/books/'
if on_card and not self._exists(prefix)[0]:

View File

@ -407,7 +407,8 @@ class PRS505(Device):
if not os.path.isdir(path):
os.utime(path, None)
def upload_books(self, files, names, on_card=False, end_session=True):
def upload_books(self, files, names, on_card=False, end_session=True,
metadata=None):
if on_card and not self._card_prefix:
raise ValueError(_('The reader has no storage card connected.'))
path = os.path.join(self._card_prefix, self.CARD_PATH_PREFIX) if on_card \

View File

@ -60,15 +60,18 @@ class DeviceScanner(object):
def is_device_connected(self, device):
if iswindows:
vid, pid = 'vid_%4.4x'%device.VENDOR_ID, 'pid_%4.4x'%device.PRODUCT_ID
vidd, pidd = 'vid_%i'%device.VENDOR_ID, 'pid_%i'%device.PRODUCT_ID
for device_id in self.devices:
if vid in device_id and pid in device_id:
if (vid in device_id or vidd in device_id) and (pid in device_id or pidd in device_id):
if self.test_bcd_windows(device_id, getattr(device, 'BCD', None)):
return True
if device.can_handle(device_id):
return True
else:
for vendor, product, bcdDevice in self.devices:
if device.VENDOR_ID == vendor and device.PRODUCT_ID == product:
if self.test_bcd(bcdDevice, getattr(device, 'BCD', None)):
return True
if device.can_handle((vendor, product, bcdDevice)):
return True
return False

View File

@ -6,7 +6,7 @@ intended to be subclassed with the relevant parts implemented for a particular
device. This class handles devive detection.
'''
import os, time
import os, subprocess, time
from calibre.devices.interface import Device as _Device
from calibre.devices.errors import DeviceError
@ -23,11 +23,12 @@ class Device(_Device):
PRODUCT_ID = 0x0
BCD = None
VENDOR_NAME = ''
PRODUCT_NAME = ''
VENDOR_NAME = None
WINDOWS_MAIN_MEM = None
WINDOWS_CARD_MEM = None
OSX_NAME_MAIN_MEM = ''
OSX_NAME_CARD_MEM = ''
OSX_MAIN_MEM = None
OSX_CARD_MEM = None
MAIN_MEMORY_VOLUME_LABEL = ''
STORAGE_CARD_VOLUME_LABEL = ''
@ -148,43 +149,47 @@ class Device(_Device):
return (msz, 0, csz)
@classmethod
def windows_match_device(cls, device_id):
device_id = device_id.upper()
if 'VEN_'+cls.VENDOR_NAME in device_id and \
'PROD_'+cls.PRODUCT_NAME in device_id:
return True
vid, pid = hex(cls.VENDOR_ID)[2:], hex(cls.PRODUCT_ID)[2:]
while len(vid) < 4: vid = '0' + vid
while len(pid) < 4: pid = '0' + pid
if 'VID_'+vid in device_id and 'PID_'+pid in device_id:
return True
def windows_match_device(self, pnp_id, device_id):
pnp_id = pnp_id.upper()
if device_id and pnp_id is not None:
device_id = device_id.upper()
if 'VEN_' + self.VENDOR_NAME in pnp_id and 'PROD_' + device_id in pnp_id:
return True
return False
# This only supports Windows >= 2000
def windows_get_drive_prefix(self, drive):
prefix = None
try:
partition = drive.associators("Win32_DiskDriveToDiskPartition")[0]
logical_disk = partition.associators('Win32_LogicalDiskToPartition')[0]
prefix = logical_disk.DeviceID + os.sep
except IndexError:
pass
return prefix
def open_windows(self):
drives = []
drives = {}
wmi = __import__('wmi', globals(), locals(), [], -1)
c = wmi.WMI()
for drive in c.Win32_DiskDrive():
if self.__class__.windows_match_device(str(drive.PNPDeviceID)):
if drive.Partitions == 0:
continue
try:
partition = drive.associators("Win32_DiskDriveToDiskPartition")[0]
logical_disk = partition.associators('Win32_LogicalDiskToPartition')[0]
prefix = logical_disk.DeviceID+os.sep
drives.append((drive.Index, prefix))
except IndexError:
continue
if self.windows_match_device(str(drive.PNPDeviceID), WINDOWS_MAIN_MEM):
drives['main'] = self.windows_get_drive_prefix(drive)
elif self.windows_match_device(str(drive.PNPDeviceID), WINDOWS_CARD_MEM):
drives['card'] = self.windows_get_drive_prefix(drive)
if 'main' and 'card' in drives.keys():
break
if not drives:
raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.')%self.__class__.__name__)
drives.sort(cmp=lambda a, b: cmp(a[0], b[0]))
self._main_prefix = drives[0][1]
if len(drives) > 1:
self._card_prefix = drives[1][1]
raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.') % self.__class__.__name__)
self._main_prefix = drives['main'] if 'main' in names.keys() else None
self._card_prefix = drives['card'] if 'card' in names.keys() else None
@classmethod
def get_osx_mountpoints(self, raw=None):
@ -207,9 +212,9 @@ class Device(_Device):
break
for i, line in enumerate(lines):
if line.strip().endswith('<class IOMedia>') and self.OSX_NAME_MAIN_MEM in line:
if self.OSX_MAIN_MEM is not None and line.strip().endswith('<class IOMedia>') and self.OSX_MAIN_MEM in line:
get_dev_node(lines[i+1:], 'main')
if line.strip().endswith('<class IOMedia>') and self.OSX_NAME_CARD_MEM in line:
if self.OSX_CARD_MEM is not None and line.strip().endswith('<class IOMedia>') and self.OSX_CARD_MEM in line:
get_dev_node(lines[i+1:], 'card')
if len(names.keys()) == 2:
break

View File

@ -12,11 +12,13 @@ from itertools import cycle
from calibre.devices.usbms.device import Device
from calibre.devices.usbms.books import BookList, Book
from calibre.devices.errors import FreeSpaceError
from calibre.devices.mime import MIME_MAP
class USBMS(Device):
EBOOK_DIR = ''
MIME_MAP = {}
FORMATS = []
EBOOK_DIR_MAIN = ''
EBOOK_DIR_CARD = ''
SUPPORTS_SUB_DIRS = False
def __init__(self, key='-1', log_packets=False, report_progress=None):
pass
@ -35,29 +37,39 @@ class USBMS(Device):
return bl
prefix = self._card_prefix if oncard else self._main_prefix
ebook_dir = self.EBOOK_DIR_CARD if oncard else self.EBOOK_DIR_MAIN
# Get all books in all directories under the root EBOOK_DIR directory
for path, dirs, files in os.walk(os.path.join(prefix, self.EBOOK_DIR)):
# Get all books in all directories under the root ebook_dir directory
for path, dirs, files in os.walk(os.path.join(prefix, ebook_dir)):
# Filter out anything that isn't in the list of supported ebook types
for book_type in self.MIME_MAP.keys():
for book_type in self.FORMATS:
for filename in fnmatch.filter(files, '*.%s' % (book_type)):
title, author, mime = self.__class__.extract_book_metadata_by_filename(filename)
bl.append(Book(os.path.join(path, filename), title, author, mime))
return bl
def upload_books(self, files, names, on_card=False, end_session=True):
def upload_books(self, files, names, on_card=False, end_session=True,
metadata=None):
if on_card and not self._card_prefix:
raise ValueError(_('The reader has no storage card connected.'))
if not on_card:
path = os.path.join(self._main_prefix, self.EBOOK_DIR)
path = os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN)
else:
path = os.path.join(self._card_prefix, self.EBOOK_DIR)
sizes = map(os.path.getsize, files)
path = os.path.join(self._card_prefix, self.EBOOK_DIR_CARD)
def get_size(obj):
if hasattr(obj, 'seek'):
obj.seek(0, os.SEEK_END)
size = obj.tell()
obj.seek(0)
return size
return os.path.getsize(obj)
sizes = map(get_size, files)
size = sum(sizes)
if on_card and size > self.free_space()[2] - 1024*1024:
raise FreeSpaceError(_("There is insufficient free space on the storage card"))
if not on_card and size > self.free_space()[0] - 2*1024*1024:
@ -65,17 +77,42 @@ class USBMS(Device):
paths = []
names = iter(names)
metadata = iter(metadata)
for infile in files:
filepath = os.path.join(path, names.next())
newpath = path
if self.SUPPORTS_SUB_DIRS:
mdata = metadata.next()
if 'tags' in mdata.keys():
for tag in mdata['tags']:
if tag.startswith('/'):
newpath += tag
newpath = os.path.normpath(newpath)
break
if not os.path.exists(newpath):
os.makedirs(newpath)
filepath = os.path.join(newpath, names.next())
paths.append(filepath)
shutil.copy2(infile, filepath)
if hasattr(infile, 'read'):
infile.seek(0)
dest = open(filepath, 'wb')
shutil.copyfileobj(infile, dest, 10*1024*1024)
dest.flush()
dest.close()
else:
shutil.copy2(infile, filepath)
return zip(paths, cycle([on_card]))
@classmethod
def add_books_to_metadata(cls, locations, metadata, booklists):
def add_books_to_metadata(cls, locations, metadata, booklists):
for location in locations:
path = location[0]
on_card = 1 if location[1] else 0
@ -88,6 +125,10 @@ class USBMS(Device):
if os.path.exists(path):
# Delete the ebook
os.unlink(path)
try:
os.removedirs(os.path.dirname(path))
except:
pass
@classmethod
def remove_books_from_metadata(cls, paths, booklists):
@ -96,7 +137,6 @@ class USBMS(Device):
for book in bl:
if path.endswith(book.path):
bl.remove(book)
break
def sync_booklists(self, booklists, end_session=True):
# There is no meta data on the device to update. The device is treated
@ -136,10 +176,11 @@ class USBMS(Device):
else:
book_title = os.path.splitext(filename)[0].replace('_', ' ')
fileext = os.path.splitext(filename)[1]
if fileext in cls.MIME_MAP.keys():
book_mime = cls.MIME_MAP[fileext]
fileext = os.path.splitext(filename)[1][1:]
if fileext in cls.FORMATS:
book_mime = MIME_MAP[fileext] if fileext in MIME_MAP.keys() else 'Unknown'
return book_title, book_author, book_mime
# ls, rm, cp, mkdir, touch, cat

View File

@ -67,6 +67,7 @@ def txt2opf(path, tdir, opts):
def pdf2opf(path, tdir, opts):
from calibre.ebooks.lrf.pdf.convert_from import generate_html
generate_html(path, tdir)
opts.dont_split_on_page_breaks = True
return os.path.join(tdir, 'metadata.opf')
def epub2opf(path, tdir, opts):

View File

@ -77,6 +77,8 @@ def check_links(opf_path, pretty_print):
html_files.append(os.path.abspath(content(f)))
for path in html_files:
if not os.access(path, os.R_OK):
continue
base = os.path.dirname(path)
root = html.fromstring(open(content(path), 'rb').read(), parser=parser)
for element, attribute, link, pos in list(root.iterlinks()):

View File

@ -335,7 +335,7 @@ class PreProcessor(object):
# Fix pdftohtml markup
PDFTOHTML = [
# Remove <hr> tags
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
# Remove page numbers
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
# Remove <br> and replace <br><br> with <p>
@ -560,7 +560,7 @@ class Processor(Parser):
hr = etree.Element('hr')
if elem.getprevious() is None:
elem.getparent()[:0] = [hr]
else:
elif elem.getparent() is not None:
insert = None
for i, c in enumerate(elem.getparent()):
if c is elem:
@ -796,7 +796,19 @@ class Processor(Parser):
setting = ''
face = font.attrib.pop('face', None)
if face is not None:
setting += 'font-face:%s;'%face
faces = []
for face in face.split(','):
face = face.strip()
if ' ' in face and not (face[0] == face[-1] == '"'):
face = '"%s"' % face.replace('"', r'\"')
faces.append(face)
for generic in ('serif', 'sans-serif', 'monospace'):
if generic in faces:
break
else:
faces.append('serif')
family = ', '.join(faces)
setting += 'font-family: %s;' % family
color = font.attrib.pop('color', None)
if color is not None:
setting += 'color:%s'%color

View File

@ -7,24 +7,20 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
'and Marshall T. Vandegrift <llasram@gmail.com>'
import sys, struct, os
import sys, struct, cStringIO, os
import functools
import re
from urlparse import urldefrag
from cStringIO import StringIO
from urllib import unquote as urlunquote
from lxml import etree
from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
import calibre.ebooks.lit.mssha1 as mssha1
from calibre.ebooks.oeb.base import XML_PARSER, urlnormalize
from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks import DRMError
from calibre import plugins
lzx, lxzerror = plugins['lzx']
msdes, msdeserror = plugins['msdes']
__all__ = ["LitReader"]
XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
"""
OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
@ -112,9 +108,6 @@ def consume_sized_utf8_string(bytes, zpad=False):
pos += 1
return u''.join(result), bytes[pos:]
def encode(string):
return unicode(string).encode('ascii', 'xmlcharrefreplace')
class UnBinary(object):
AMPERSAND_RE = re.compile(
r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
@ -125,13 +118,13 @@ class UnBinary(object):
def __init__(self, bin, path, manifest={}, map=HTML_MAP):
self.manifest = manifest
self.tag_map, self.attr_map, self.tag_to_attr_map = map
self.is_html = map is HTML_MAP
self.opf = map is OPF_MAP
self.bin = bin
self.dir = os.path.dirname(path)
buf = StringIO()
self.binary_to_text(bin, buf)
self.raw = buf.getvalue().lstrip()
self.buf = cStringIO.StringIO()
self.binary_to_text()
self.raw = self.buf.getvalue().lstrip().decode('utf-8')
self.escape_reserved()
self._tree = None
def escape_reserved(self):
raw = self.raw
@ -158,28 +151,18 @@ class UnBinary(object):
return '/'.join(relpath)
def __unicode__(self):
return self.raw.decode('utf-8')
def __str__(self):
return self.raw
def tree():
def fget(self):
if not self._tree:
self._tree = etree.fromstring(self.raw, parser=XML_PARSER)
return self._tree
return property(fget=fget)
tree = tree()
def binary_to_text(self, bin, buf, index=0, depth=0):
def binary_to_text(self, base=0, depth=0):
tag_name = current_map = None
dynamic_tag = errors = 0
in_censorship = is_goingdown = False
state = 'text'
index = base
flags = 0
while index < len(bin):
c, index = read_utf8_char(bin, index)
while index < len(self.bin):
c, index = read_utf8_char(self.bin, index)
oc = ord(c)
if state == 'text':
@ -192,7 +175,7 @@ class UnBinary(object):
c = '>>'
elif c == '<':
c = '<<'
buf.write(encode(c))
self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
elif state == 'get flags':
if oc == 0:
@ -205,7 +188,7 @@ class UnBinary(object):
state = 'text' if oc == 0 else 'get attr'
if flags & FLAG_OPENING:
tag = oc
buf.write('<')
self.buf.write('<')
if not (flags & FLAG_CLOSING):
is_goingdown = True
if tag == 0x8000:
@ -222,7 +205,7 @@ class UnBinary(object):
tag_name = '?'+unichr(tag)+'?'
current_map = self.tag_to_attr_map[tag]
print 'WARNING: tag %s unknown' % unichr(tag)
buf.write(encode(tag_name))
self.buf.write(unicode(tag_name).encode('utf-8'))
elif flags & FLAG_CLOSING:
if depth == 0:
raise LitError('Extra closing tag')
@ -234,14 +217,15 @@ class UnBinary(object):
if not is_goingdown:
tag_name = None
dynamic_tag = 0
buf.write(' />')
self.buf.write(' />')
else:
buf.write('>')
index = self.binary_to_text(bin, buf, index, depth+1)
self.buf.write('>')
index = self.binary_to_text(base=index, depth=depth+1)
is_goingdown = False
if not tag_name:
raise LitError('Tag ends before it begins.')
buf.write(encode(u''.join(('</', tag_name, '>'))))
self.buf.write(u''.join(
('</', tag_name, '>')).encode('utf-8'))
dynamic_tag = 0
tag_name = None
state = 'text'
@ -261,7 +245,7 @@ class UnBinary(object):
in_censorship = True
state = 'get value length'
continue
buf.write(' ' + encode(attr) + '=')
self.buf.write(' ' + unicode(attr).encode('utf-8') + '=')
if attr in ['href', 'src']:
state = 'get href length'
else:
@ -269,39 +253,40 @@ class UnBinary(object):
elif state == 'get value length':
if not in_censorship:
buf.write('"')
self.buf.write('"')
count = oc - 1
if count == 0:
if not in_censorship:
buf.write('"')
self.buf.write('"')
in_censorship = False
state = 'get attr'
continue
state = 'get value'
if oc == 0xffff:
continue
if count < 0 or count > (len(bin) - index):
if count < 0 or count > (len(self.bin) - index):
raise LitError('Invalid character count %d' % count)
elif state == 'get value':
if count == 0xfffe:
if not in_censorship:
buf.write('%s"' % (oc - 1))
self.buf.write('%s"' % (oc - 1))
in_censorship = False
state = 'get attr'
elif count > 0:
if not in_censorship:
buf.write(encode(c))
self.buf.write(c.encode(
'ascii', 'xmlcharrefreplace'))
count -= 1
if count == 0:
if not in_censorship:
buf.write('"')
self.buf.write('"')
in_censorship = False
state = 'get attr'
elif state == 'get custom length':
count = oc - 1
if count <= 0 or count > len(bin)-index:
if count <= 0 or count > len(self.bin)-index:
raise LitError('Invalid character count %d' % count)
dynamic_tag += 1
state = 'get custom'
@ -311,26 +296,26 @@ class UnBinary(object):
tag_name += c
count -= 1
if count == 0:
buf.write(encode(tag_name))
self.buf.write(unicode(tag_name).encode('utf-8'))
state = 'get attr'
elif state == 'get attr length':
count = oc - 1
if count <= 0 or count > (len(bin) - index):
if count <= 0 or count > (len(self.bin) - index):
raise LitError('Invalid character count %d' % count)
buf.write(' ')
self.buf.write(' ')
state = 'get custom attr'
elif state == 'get custom attr':
buf.write(encode(c))
self.buf.write(unicode(c).encode('utf-8'))
count -= 1
if count == 0:
buf.write('=')
self.buf.write('=')
state = 'get value length'
elif state == 'get href length':
count = oc - 1
if count <= 0 or count > (len(bin) - index):
if count <= 0 or count > (len(self.bin) - index):
raise LitError('Invalid character count %d' % count)
href = ''
state = 'get href'
@ -344,11 +329,10 @@ class UnBinary(object):
if frag:
path = '#'.join((path, frag))
path = urlnormalize(path)
buf.write(encode(u'"%s"' % path))
self.buf.write((u'"%s"' % path).encode('utf-8'))
state = 'get attr'
return index
class DirectoryEntry(object):
def __init__(self, name, section, offset, size):
self.name = name
@ -363,7 +347,6 @@ class DirectoryEntry(object):
def __str__(self):
return repr(self)
class ManifestItem(object):
def __init__(self, original, internal, mime_type, offset, root, state):
self.original = original
@ -391,87 +374,65 @@ class ManifestItem(object):
% (self.internal, self.path, self.mime_type, self.offset,
self.root, self.state)
def preserve(function):
def wrapper(self, *args, **kwargs):
opos = self.stream.tell()
opos = self._stream.tell()
try:
return function(self, *args, **kwargs)
finally:
self.stream.seek(opos)
self._stream.seek(opos)
functools.update_wrapper(wrapper, function)
return wrapper
class LitFile(object):
class LitReader(object):
PIECE_SIZE = 16
def __init__(self, filename_or_stream):
if hasattr(filename_or_stream, 'read'):
self.stream = filename_or_stream
else:
self.stream = open(filename_or_stream, 'rb')
try:
self.opf_path = os.path.splitext(
os.path.basename(self.stream.name))[0] + '.opf'
except AttributeError:
self.opf_path = 'content.opf'
if self.magic != 'ITOLITLS':
raise LitError('Not a valid LIT file')
if self.version != 1:
raise LitError('Unknown LIT version %d' % (self.version,))
self.read_secondary_header()
self.read_header_pieces()
self.read_section_names()
self.read_manifest()
self.read_drm()
def warn(self, msg):
print "WARNING: %s" % (msg,)
XML_PARSER = etree.XMLParser(
recover=True, resolve_entities=False)
def magic():
@preserve
def fget(self):
self.stream.seek(0)
return self.stream.read(8)
self._stream.seek(0)
return self._stream.read(8)
return property(fget=fget)
magic = magic()
def version():
def fget(self):
self.stream.seek(8)
return u32(self.stream.read(4))
self._stream.seek(8)
return u32(self._stream.read(4))
return property(fget=fget)
version = version()
def hdr_len():
@preserve
def fget(self):
self.stream.seek(12)
return int32(self.stream.read(4))
self._stream.seek(12)
return int32(self._stream.read(4))
return property(fget=fget)
hdr_len = hdr_len()
def num_pieces():
@preserve
def fget(self):
self.stream.seek(16)
return int32(self.stream.read(4))
self._stream.seek(16)
return int32(self._stream.read(4))
return property(fget=fget)
num_pieces = num_pieces()
def sec_hdr_len():
@preserve
def fget(self):
self.stream.seek(20)
return int32(self.stream.read(4))
self._stream.seek(20)
return int32(self._stream.read(4))
return property(fget=fget)
sec_hdr_len = sec_hdr_len()
def guid():
@preserve
def fget(self):
self.stream.seek(24)
return self.stream.read(16)
self._stream.seek(24)
return self._stream.read(16)
return property(fget=fget)
guid = guid()
@ -481,27 +442,44 @@ class LitFile(object):
size = self.hdr_len \
+ (self.num_pieces * self.PIECE_SIZE) \
+ self.sec_hdr_len
self.stream.seek(0)
return self.stream.read(size)
self._stream.seek(0)
return self._stream.read(size)
return property(fget=fget)
header = header()
def __init__(self, filename_or_stream):
if hasattr(filename_or_stream, 'read'):
self._stream = filename_or_stream
else:
self._stream = open(filename_or_stream, 'rb')
if self.magic != 'ITOLITLS':
raise LitError('Not a valid LIT file')
if self.version != 1:
raise LitError('Unknown LIT version %d' % (self.version,))
self.entries = {}
self._read_secondary_header()
self._read_header_pieces()
self._read_section_names()
self._read_manifest()
self._read_meta()
self._read_drm()
@preserve
def __len__(self):
self.stream.seek(0, 2)
return self.stream.tell()
self._stream.seek(0, 2)
return self._stream.tell()
@preserve
def read_raw(self, offset, size):
self.stream.seek(offset)
return self.stream.read(size)
def _read_raw(self, offset, size):
self._stream.seek(offset)
return self._stream.read(size)
def read_content(self, offset, size):
return self.read_raw(self.content_offset + offset, size)
def _read_content(self, offset, size):
return self._read_raw(self.content_offset + offset, size)
def read_secondary_header(self):
def _read_secondary_header(self):
offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
bytes = self.read_raw(offset, self.sec_hdr_len)
bytes = self._read_raw(offset, self.sec_hdr_len)
offset = int32(bytes[4:])
while offset < len(bytes):
blocktype = bytes[offset:offset+4]
@ -529,21 +507,21 @@ class LitFile(object):
if not hasattr(self, 'content_offset'):
raise LitError('Could not figure out the content offset')
def read_header_pieces(self):
def _read_header_pieces(self):
src = self.header[self.hdr_len:]
for i in xrange(self.num_pieces):
piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
raise LitError('Piece %s has 64bit value' % repr(piece))
offset, size = u32(piece), int32(piece[8:])
piece = self.read_raw(offset, size)
piece = self._read_raw(offset, size)
if i == 0:
continue # Dont need this piece
elif i == 1:
if u32(piece[8:]) != self.entry_chunklen or \
u32(piece[12:]) != self.entry_unknown:
raise LitError('Secondary header does not match piece')
self.read_directory(piece)
self._read_directory(piece)
elif i == 2:
if u32(piece[8:]) != self.count_chunklen or \
u32(piece[12:]) != self.count_unknown:
@ -554,13 +532,12 @@ class LitFile(object):
elif i == 4:
self.piece4_guid = piece
def read_directory(self, piece):
def _read_directory(self, piece):
if not piece.startswith('IFCM'):
raise LitError('Header piece #1 is not main directory.')
chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
if (32 + (num_chunks * chunk_size)) != len(piece):
raise LitError('IFCM header has incorrect length')
self.entries = {}
raise LitError('IFCM HEADER has incorrect length')
for i in xrange(num_chunks):
offset = 32 + (i * chunk_size)
chunk = piece[offset:offset + chunk_size]
@ -594,17 +571,17 @@ class LitFile(object):
entry = DirectoryEntry(name, section, offset, size)
self.entries[name] = entry
def read_section_names(self):
def _read_section_names(self):
if '::DataSpace/NameList' not in self.entries:
raise LitError('Lit file does not have a valid NameList')
raw = self.get_file('::DataSpace/NameList')
if len(raw) < 4:
raise LitError('Invalid Namelist section')
pos = 4
num_sections = u16(raw[2:pos])
self.section_names = [""] * num_sections
self.section_data = [None] * num_sections
for section in xrange(num_sections):
self.num_sections = u16(raw[2:pos])
self.section_names = [""]*self.num_sections
self.section_data = [None]*self.num_sections
for section in xrange(self.num_sections):
size = u16(raw[pos:pos+2])
pos += 2
size = size*2 + 2
@ -614,12 +591,11 @@ class LitFile(object):
raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
pos += size
def read_manifest(self):
def _read_manifest(self):
if '/manifest' not in self.entries:
raise LitError('Lit file does not have a valid manifest')
raw = self.get_file('/manifest')
self.manifest = {}
self.paths = {self.opf_path: None}
while raw:
slen, raw = ord(raw[0]), raw[1:]
if slen == 0: break
@ -658,9 +634,28 @@ class LitFile(object):
for item in mlist:
if item.path[0] == '/':
item.path = os.path.basename(item.path)
self.paths[item.path] = item
def read_drm(self):
def _pretty_print(self, xml):
f = cStringIO.StringIO(xml.encode('utf-8'))
doc = etree.parse(f, parser=self.XML_PARSER)
pretty = etree.tostring(doc, encoding='ascii', pretty_print=True)
return XML_DECL + unicode(pretty)
def _read_meta(self):
path = 'content.opf'
raw = self.get_file('/meta')
xml = OPF_DECL
try:
xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
except LitError:
if 'PENGUIN group' not in raw: raise
print "WARNING: attempting PENGUIN malformed OPF fix"
raw = raw.replace(
'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
self.meta = xml
def _read_drm(self):
self.drmlevel = 0
if '/DRMStorage/Licenses/EUL' in self.entries:
self.drmlevel = 5
@ -671,7 +666,7 @@ class LitFile(object):
else:
return
if self.drmlevel < 5:
msdes.deskey(self.calculate_deskey(), msdes.DE1)
msdes.deskey(self._calculate_deskey(), msdes.DE1)
bookkey = msdes.des(self.get_file('/DRMStorage/DRMSealed'))
if bookkey[0] != '\000':
raise LitError('Unable to decrypt title key!')
@ -679,7 +674,7 @@ class LitFile(object):
else:
raise DRMError("Cannot access DRM-protected book")
def calculate_deskey(self):
def _calculate_deskey(self):
hashfiles = ['/meta', '/DRMStorage/DRMSource']
if self.drmlevel == 3:
hashfiles.append('/DRMStorage/DRMBookplate')
@ -703,18 +698,18 @@ class LitFile(object):
def get_file(self, name):
entry = self.entries[name]
if entry.section == 0:
return self.read_content(entry.offset, entry.size)
return self._read_content(entry.offset, entry.size)
section = self.get_section(entry.section)
return section[entry.offset:entry.offset+entry.size]
def get_section(self, section):
data = self.section_data[section]
if not data:
data = self.get_section_uncached(section)
data = self._get_section(section)
self.section_data[section] = data
return data
def get_section_uncached(self, section):
def _get_section(self, section):
name = self.section_names[section]
path = '::DataSpace/Storage/' + name
transform = self.get_file(path + '/Transform/List')
@ -726,29 +721,29 @@ class LitFile(object):
raise LitError("ControlData is too short")
guid = msguid(transform)
if guid == DESENCRYPT_GUID:
content = self.decrypt(content)
content = self._decrypt(content)
control = control[csize:]
elif guid == LZXCOMPRESS_GUID:
reset_table = self.get_file(
'/'.join(('::DataSpace/Storage', name, 'Transform',
LZXCOMPRESS_GUID, 'InstanceData/ResetTable')))
content = self.decompress(content, control, reset_table)
content = self._decompress(content, control, reset_table)
control = control[csize:]
else:
raise LitError("Unrecognized transform: %s." % repr(guid))
transform = transform[16:]
return content
def decrypt(self, content):
def _decrypt(self, content):
length = len(content)
extra = length & 0x7
if extra > 0:
self.warn("content length not a multiple of block size")
self._warn("content length not a multiple of block size")
content += "\0" * (8 - extra)
msdes.deskey(self.bookkey, msdes.DE1)
return msdes.des(content)
def decompress(self, content, control, reset_table):
def _decompress(self, content, control, reset_table):
if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != "LZXC":
raise LitError("Invalid ControlData tag value")
if len(reset_table) < (RESET_INTERVAL + 8):
@ -789,7 +784,7 @@ class LitFile(object):
result.append(
lzx.decompress(content[base:size], window_bytes))
except lzx.LZXError:
self.warn("LZX decompression error; skipping chunk")
self._warn("LZX decompression error; skipping chunk")
bytes_remaining -= window_bytes
base = size
accum += int32(reset_table[RESET_INTERVAL:])
@ -799,88 +794,55 @@ class LitFile(object):
try:
result.append(lzx.decompress(content[base:], bytes_remaining))
except lzx.LZXError:
self.warn("LZX decompression error; skipping chunk")
self._warn("LZX decompression error; skipping chunk")
bytes_remaining = 0
if bytes_remaining > 0:
raise LitError("Failed to completely decompress section")
return ''.join(result)
class LitReader(object):
def __init__(self, filename_or_stream):
self._litfile = LitFile(filename_or_stream)
def namelist(self):
return self._litfile.paths.keys()
def exists(self, name):
return urlunquote(name) in self._litfile.paths
def read_xml(self, name):
entry = self._litfile.paths[urlunquote(name)] if name else None
if entry is None:
content = self._read_meta()
elif 'spine' in entry.state:
internal = '/'.join(('/data', entry.internal, 'content'))
raw = self._litfile.get_file(internal)
unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
content = unbin.tree
else:
raise LitError('Requested non-XML content as XML')
return content
def read(self, name, pretty_print=False):
entry = self._litfile.paths[urlunquote(name)] if name else None
if entry is None:
meta = self._read_meta()
content = OPF_DECL + etree.tostring(
meta, encoding='ascii', pretty_print=pretty_print)
elif 'spine' in entry.state:
internal = '/'.join(('/data', entry.internal, 'content'))
raw = self._litfile.get_file(internal)
unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
content = HTML_DECL
def get_entry_content(self, entry, pretty_print=False):
if 'spine' in entry.state:
name = '/'.join(('/data', entry.internal, 'content'))
path = entry.path
raw = self.get_file(name)
decl, map = (OPF_DECL, OPF_MAP) \
if name == '/meta' else (HTML_DECL, HTML_MAP)
content = decl + unicode(UnBinary(raw, path, self.manifest, map))
if pretty_print:
content += etree.tostring(unbin.tree,
encoding='ascii', pretty_print=True)
else:
content += str(unbin)
content = self._pretty_print(content)
content = content.encode('utf-8')
else:
internal = '/'.join(('/data', entry.internal))
content = self._litfile.get_file(internal)
name = '/'.join(('/data', entry.internal))
content = self.get_file(name)
return content
def meta():
def fget(self):
return self.read(self._litfile.opf_path)
return property(fget=fget)
meta = meta()
def extract_content(self, output_dir=os.getcwdu(), pretty_print=False):
output_dir = os.path.abspath(output_dir)
try:
opf_path = os.path.splitext(
os.path.basename(self._stream.name))[0] + '.opf'
except AttributeError:
opf_path = 'content.opf'
opf_path = os.path.join(output_dir, opf_path)
self._ensure_dir(opf_path)
with open(opf_path, 'wb') as f:
xml = self.meta
if pretty_print:
xml = self._pretty_print(xml)
f.write(xml.encode('utf-8'))
for entry in self.manifest.values():
path = os.path.join(output_dir, entry.path)
self._ensure_dir(path)
with open(path, 'wb') as f:
f.write(self.get_entry_content(entry, pretty_print))
def _ensure_dir(self, path):
dir = os.path.dirname(path)
if not os.path.isdir(dir):
os.makedirs(dir)
def extract_content(self, output_dir=os.getcwdu(), pretty_print=False):
for name in self.namelist():
path = os.path.join(output_dir, name)
self._ensure_dir(path)
with open(path, 'wb') as f:
f.write(self.read(name, pretty_print=pretty_print))
def _read_meta(self):
path = 'content.opf'
raw = self._litfile.get_file('/meta')
try:
unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
except LitError:
if 'PENGUIN group' not in raw: raise
print "WARNING: attempting PENGUIN malformed OPF fix"
raw = raw.replace(
'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
return unbin.tree
def _warn(self, msg):
print "WARNING: %s" % (msg,)
def option_parser():
from calibre.utils.config import OptionParser
@ -890,8 +852,7 @@ def option_parser():
help=_('Output directory. Defaults to current directory.'))
parser.add_option(
'-p', '--pretty-print', default=False, action='store_true',
help=_('Legibly format extracted markup.' \
' May modify meaningful whitespace.'))
help=_('Legibly format extracted markup. May modify meaningful whitespace.'))
parser.add_option(
'--verbose', default=False, action='store_true',
help=_('Useful for debugging.'))

View File

@ -27,11 +27,16 @@ from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
CSS_MIME, OPF_MIME, XML_NS, XML
from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
urlnormalize, xpath
from calibre.ebooks.oeb.base import FauxLogger, OEBBook
from calibre.ebooks.oeb.base import Logger, OEBBook
from calibre.ebooks.oeb.profile import Context
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
from calibre.ebooks.lit.lzx import Compressor
import calibre
from calibre import LoggingInterface
from calibre import plugins
msdes, msdeserror = plugins['msdes']
import calibre.ebooks.lit.mssha1 as mssha1
@ -138,17 +143,16 @@ def warn(x):
class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, path, oeb, map=HTML_MAP, logger=FauxLogger()):
self.path = path
self.logger = logger
self.dir = os.path.dirname(path)
def __init__(self, root, path, oeb, map=HTML_MAP):
self.item = item
self.logger = oeb.logger
self.manifest = oeb.manifest
self.tags, self.tattrs = map
self.buf = StringIO()
self.anchors = []
self.page_breaks = []
self.is_html = is_html = map is HTML_MAP
self.stylizer = Stylizer(root, path, oeb) if is_html else None
self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
self.tree_to_binary(root)
self.content = self.buf.getvalue()
self.ahc = self.build_ahc() if is_html else None
@ -205,6 +209,8 @@ class ReBinary(object):
if attr in ('href', 'src'):
value = urlnormalize(value)
path, frag = urldefrag(value)
if self.item:
path = self.item.abshref(path)
prefix = unichr(3)
if path in self.manifest.hrefs:
prefix = unichr(2)
@ -217,7 +223,7 @@ class ReBinary(object):
elif attr.startswith('ms--'):
attr = '%' + attr[4:]
elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
value = OEB_CSS_MIME
value = CSS_MIME
if attr in tattrs:
self.write(tattrs[attr])
else:
@ -270,7 +276,7 @@ class ReBinary(object):
def build_ahc(self):
if len(self.anchors) > 6:
self.logger.log_warn("More than six anchors in file %r. " \
"Some links may not work properly." % self.path)
"Some links may not work properly." % self.item.href)
data = StringIO()
data.write(unichr(len(self.anchors)).encode('utf-8'))
for anchor, offset in self.anchors:
@ -294,10 +300,9 @@ def preserve(function):
return wrapper
class LitWriter(object):
def __init__(self, oeb, logger=FauxLogger()):
self._oeb = oeb
self._logger = logger
self._litize_oeb()
def __init__(self):
# Wow, no options
pass
def _litize_oeb(self):
oeb = self._oeb
@ -306,32 +311,27 @@ class LitWriter(object):
if oeb.metadata.cover:
id = str(oeb.metadata.cover[0])
cover = oeb.manifest[id]
elif MS_COVER_TYPE in oeb.guide:
href = oeb.guide[MS_COVER_TYPE].href
cover = oeb.manifest.hrefs[href]
elif 'cover' in oeb.guide:
href = oeb.guide['cover'].href
cover = oeb.manifest.hrefs[href]
else:
html = oeb.spine[0].data
imgs = xpath(html, '//img[position()=1]')
href = imgs[0].get('src') if imgs else None
cover = oeb.manifest.hrefs[href] if href else None
if cover:
if not oeb.metadata.cover:
oeb.metadata.add('cover', cover.id)
for type, title in ALL_MS_COVER_TYPES:
if type not in oeb.guide:
oeb.guide.add(type, title, cover.href)
else:
self._logger.log_warn('No suitable cover image found.')
self._logger.warn('No suitable cover image found.')
def dump(self, stream):
def dump(self, oeb, path):
if hasattr(path, 'write'):
return self._dump_stream(oeb, path)
with open(path, 'w+b') as stream:
return self._dump_stream(oeb, stream)
def _dump_stream(self, oeb, stream):
self._oeb = oeb
self._logger = oeb.logger
self._stream = stream
self._sections = [StringIO() for i in xrange(4)]
self._directory = []
self._meta = None
self._dump()
self._litize_oeb()
self._write_content()
def _write(self, *data):
for datum in data:
@ -345,7 +345,7 @@ class LitWriter(object):
def _tell(self):
return self._stream.tell()
def _dump(self):
def _write_content(self):
# Build content sections
self._build_sections()
@ -474,8 +474,7 @@ class LitWriter(object):
secnum = 0
if not isinstance(data, basestring):
self._add_folder(name)
rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP,
logger=self._logger)
rebin = ReBinary(data, item, self._oeb, map=HTML_MAP)
self._add_file(name + '/ahc', rebin.ahc, 0)
self._add_file(name + '/aht', rebin.aht, 0)
item.page_breaks = rebin.page_breaks
@ -554,8 +553,7 @@ class LitWriter(object):
meta.attrib['ms--minimum_level'] = '0'
meta.attrib['ms--attr5'] = '1'
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP,
logger=self._logger)
rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP)
meta = rebin.content
self._meta = meta
self._add_file('/meta', meta)
@ -719,19 +717,31 @@ def option_parser():
help=_('Useful for debugging.'))
return parser
def oeb2lit(opts, opfpath):
logger = LoggingInterface(logging.getLogger('oeb2lit'))
def oeb2lit(opts, inpath):
logger = Logger(logging.getLogger('oeb2lit'))
logger.setup_cli_handler(opts.verbose)
litpath = opts.output
if litpath is None:
litpath = os.path.basename(opfpath)
litpath = os.path.splitext(litpath)[0] + '.lit'
litpath = os.path.abspath(litpath)
lit = LitWriter(OEBBook(opfpath, logger=logger), logger=logger)
with open(litpath, 'wb') as f:
lit.dump(f)
run_plugins_on_postprocess(litpath, 'lit')
logger.log_info(_('Output written to ')+litpath)
outpath = opts.output
if outpath is None:
outpath = os.path.basename(inpath)
outpath = os.path.splitext(outpath)[0] + '.lit'
outpath = os.path.abspath(outpath)
context = Context('Firefox', 'MSReader')
oeb = OEBBook(inpath, logger=logger)
tocadder = HTMLTOCAdder()
tocadder.transform(oeb, context)
mangler = CaseMangler()
mangler.transform(oeb, context)
fbase = context.dest.fbase
flattener = CSSFlattener(fbase=fbase, unfloat=True, untable=True)
flattener.transform(oeb, context)
rasterizer = SVGRasterizer()
rasterizer.transform(oeb, context)
trimmer = ManifestTrimmer()
trimmer.transform(oeb, context)
lit = LitWriter()
lit.dump(oeb, outpath)
run_plugins_on_postprocess(outpath, 'lit')
logger.info(_('Output written to ') + outpath)
def main(argv=sys.argv):
@ -740,8 +750,8 @@ def main(argv=sys.argv):
if len(args) != 1:
parser.print_help()
return 1
opfpath = args[0]
oeb2lit(opts, opfpath)
inpath = args[0]
oeb2lit(opts, inpath)
return 0
if __name__ == '__main__':

View File

@ -425,7 +425,7 @@ def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='l
thumbnail = None
if not pages:
raise ValueError('Could not find any pages in the comic: %s'%source)
if not opts.no_process:
if not getattr(opts, 'no_process', False):
pages, failures, tdir2 = process_pages(pages, opts, notification)
if not pages:
raise ValueError('Could not find any valid pages in the comic: %s'%source)
@ -443,7 +443,7 @@ def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='l
if output_format == 'pdf':
create_pdf(pages, opts.profile, opts, thumbnail=thumbnail)
shutil.rmtree(tdir)
if not opts.no_process:
if not getattr(opts, 'no_process', False):
shutil.rmtree(tdir2)
@ -457,7 +457,7 @@ def main(args=sys.argv, notification=None, output_format='lrf'):
if not callable(notification):
pb = ProgressBar(terminal_controller, _('Rendering comic pages...'),
no_progress_bar=opts.no_progress_bar)
no_progress_bar=opts.no_progress_bar or getattr(opts, 'no_process', False))
notification = pb.update
source = os.path.abspath(args[1])

View File

@ -109,6 +109,10 @@ class HTMLConverter(object, LoggingInterface):
# Remove self closing script tags as they also mess up BeautifulSoup
(re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
# BeautifulSoup treats self closing <div> tags as open <div> tags
(re.compile(r'(?i)<\s*div([^>]*)/\s*>'),
lambda match: '<div%s></div>'%match.group(1))
]
# Fix Baen markup
BAEN = [
@ -122,7 +126,7 @@ class HTMLConverter(object, LoggingInterface):
# Fix pdftohtml markup
PDFTOHTML = [
# Remove <hr> tags
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
# Remove page numbers
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
# Remove <br> and replace <br><br> with <p>
@ -576,20 +580,20 @@ class HTMLConverter(object, LoggingInterface):
if (css.has_key('display') and css['display'].lower() == 'none') or \
(css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
return ''
text = u''
text, alt_text = u'', u''
for c in tag.contents:
if limit != None and len(text) > limit:
break
if isinstance(c, HTMLConverter.IGNORED_TAGS):
return u''
continue
if isinstance(c, NavigableString):
text += unicode(c)
elif isinstance(c, Tag):
if c.name.lower() == 'img' and c.has_key('alt'):
text += c['alt']
return text
alt_text += c['alt']
continue
text += self.get_text(c)
return text
return text if text.strip() else alt_text
def process_links(self):
def add_toc_entry(text, target):

View File

@ -700,7 +700,7 @@ class Text(LRFStream):
def add_text(self, text):
s = unicode(text, "utf-16-le")
if s:
s = s.translate(self.text_map)
s = s.translate(self.text_map)
self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
def end_container(self, tag, stream):
@ -799,18 +799,39 @@ class Text(LRFStream):
length = len(self.stream)
style = self.style.as_dict()
current_style = style.copy()
text_tags = set(list(TextAttr.tag_map.keys()) + \
list(Text.text_tags.keys()) + \
list(ruby_tags.keys()))
text_tags -= set([0xf500+i for i in range(10)])
text_tags.add(0xf5cc)
while stream.tell() < length:
# Is there some text beofre a tag?
pos = self.stream.find('\xf5', stream.tell()) - 1
if pos > 0:
self.add_text(self.stream[stream.tell():pos])
stream.seek(pos)
elif pos == -2: # No tags in this stream
# Is there some text before a tag?
def find_first_tag(start):
pos = self.stream.find('\xf5', start)
if pos == -1:
return -1
try:
stream.seek(pos-1)
_t = Tag(stream)
if _t.id in text_tags:
return pos-1
return find_first_tag(pos+1)
except:
return find_first_tag(pos+1)
start_pos = stream.tell()
tag_pos = find_first_tag(start_pos)
if tag_pos >= start_pos:
if tag_pos > start_pos:
self.add_text(self.stream[start_pos:tag_pos])
stream.seek(tag_pos)
else: # No tags in this stream
self.add_text(self.stream)
stream.seek(0, 2)
print repr(self.stream)
break
tag = Tag(stream)
@ -1166,7 +1187,8 @@ class TOCObject(LRFStream):
refpage = struct.unpack("<I", stream.read(4))[0]
refobj = struct.unpack("<I", stream.read(4))[0]
cnt = struct.unpack("<H", stream.read(2))[0]
label = unicode(stream.read(cnt), "utf_16")
raw = stream.read(cnt)
label = raw.decode('utf_16_le')
self._contents.append(TocLabel(refpage, refobj, label))
c -= 1

View File

@ -249,7 +249,7 @@ class MetaInformation(object):
ans = u''
ans += u'Title : ' + unicode(self.title) + u'\n'
if self.authors:
ans += u'Author : ' + (', '.join(self.authors) if self.authors is not None else u'None')
ans += u'Author : ' + (' & '.join(self.authors) if self.authors is not None else _('Unknown'))
ans += ((' [' + self.author_sort + ']') if self.author_sort else '') + u'\n'
if self.publisher:
ans += u'Publisher: '+ unicode(self.publisher) + u'\n'

View File

@ -0,0 +1,63 @@
'''
Convert any ebook format to Mobipocket.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \
'and Marshall T. Vandegrift <llasram@gmail.com>'
__docformat__ = 'restructuredtext en'
import sys, os, glob, logging
from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
from calibre.ebooks.epub import config as common_config
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.mobi.writer import oeb2mobi, add_mobi_options
def config(defaults=None):
return common_config(defaults=defaults, name='mobi')
def option_parser(usage=USAGE):
usage = usage % ('Mobipocket', formats())
parser = config().option_parser(usage=usage)
add_mobi_options(parser)
return parser
def any2mobi(opts, path):
ext = os.path.splitext(path)[1]
if not ext:
raise ValueError('Unknown file type: '+path)
ext = ext.lower()[1:]
if opts.output is None:
opts.output = os.path.splitext(os.path.basename(path))[0]+'.mobi'
opts.output = os.path.abspath(opts.output)
orig_output = opts.output
with TemporaryDirectory('_any2mobi') as tdir:
oebdir = os.path.join(tdir, 'oeb')
os.mkdir(oebdir)
opts.output = os.path.join(tdir, 'dummy.epub')
opts.profile = 'None'
any2epub(opts, path, create_epub=False, oeb_cover=True, extract_to=oebdir)
opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
opts.output = orig_output
logging.getLogger('html2epub').info(_('Creating Mobipocket file from EPUB...'))
oeb2mobi(opts, opf)
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
print 'No input file specified.'
return 1
any2mobi(opts, args[1])
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -114,10 +114,10 @@ class MobiMLizer(object):
def mobimlize_measure(self, ptsize):
if isinstance(ptsize, basestring):
return ptsize
fbase = self.profile.fbase
if ptsize < fbase:
embase = self.profile.fbase
if round(ptsize) < embase:
return "%dpt" % int(round(ptsize))
return "%dem" % int(round(ptsize / fbase))
return "%dem" % int(round(ptsize / embase))
def preize_text(self, text):
text = unicode(text).replace(u' ', u'\xa0')
@ -171,8 +171,7 @@ class MobiMLizer(object):
para = etree.SubElement(para, XHTML('blockquote'))
emleft -= 1
else:
ptag = 'p' #tag if tag in HEADER_TAGS else 'p'
para = wrapper = etree.SubElement(parent, XHTML(ptag))
para = wrapper = etree.SubElement(parent, XHTML('p'))
bstate.inline = bstate.para = para
vspace = bstate.vpadding + bstate.vmargin
bstate.vpadding = bstate.vmargin = 0
@ -213,11 +212,11 @@ class MobiMLizer(object):
inline = etree.SubElement(inline, XHTML('sup'))
elif valign == 'sub':
inline = etree.SubElement(inline, XHTML('sub'))
if istate.family == 'monospace':
inline = etree.SubElement(inline, XHTML('tt'))
if fsize != 3:
elif fsize != 3:
inline = etree.SubElement(inline, XHTML('font'),
size=str(fsize))
if istate.family == 'monospace':
inline = etree.SubElement(inline, XHTML('tt'))
if istate.italic:
inline = etree.SubElement(inline, XHTML('i'))
if istate.bold:
@ -241,7 +240,8 @@ class MobiMLizer(object):
or namespace(elem.tag) != XHTML_NS:
return
style = stylizer.style(elem)
if style['display'] == 'none' \
# <mbp:frame-set/> does not exist lalalala
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden':
return
tag = barename(elem.tag)
@ -303,7 +303,7 @@ class MobiMLizer(object):
else:
istate.family = 'serif'
valign = style['vertical-align']
if valign in ('super', 'sup') or asfloat(valign) > 0:
if valign in ('super', 'text-top') or asfloat(valign) > 0:
istate.valign = 'super'
elif valign == 'sub' or asfloat(valign) < 0:
istate.valign = 'sub'

View File

@ -69,15 +69,15 @@ def compress_doc(data):
out.write(pack('>B', onch ^ 0x80))
i += 1
continue
if och == 0 or (och >= 9 and och < 0x80):
if och == 0 or (och > 8 and och < 0x80):
out.write(ch)
else:
j = i
binseq = [ch]
while j < ldata:
while j < ldata and len(binseq) < 8:
ch = data[j]
och = ord(ch)
if och < 1 or (och > 8 and och < 0x80):
if och == 0 or (och > 8 and och < 0x80):
break
binseq.append(ch)
j += 1

View File

@ -33,8 +33,7 @@ class EXTHHeader(object):
self.length, self.num_items = struct.unpack('>LL', raw[4:12])
raw = raw[12:]
pos = 0
self.mi = MetaInformation('Unknown', ['Unknown'])
self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
self.has_fake_cover = True
for i in range(self.num_items):
@ -49,14 +48,24 @@ class EXTHHeader(object):
self.cover_offset, = struct.unpack('>L', content)
elif id == 202:
self.thumbnail_offset, = struct.unpack('>L', content)
#else:
# print 'unknown record', id, repr(content)
title = re.search(r'\0+([^\0]+)\0+', raw[pos:])
if title:
self.mi.title = title.group(1).decode(codec, 'ignore')
title = title.group(1).decode(codec, 'replace')
if len(title) > 2:
self.mi.title = title
else:
title = re.search(r'\0+([^\0]+)\0+', ''.join(reversed(raw[pos:])))
if title:
self.mi.title = ''.join(reversed(title.group(1).decode(codec, 'replace')))
def process_metadata(self, id, content, codec):
if id == 100:
self.mi.authors = [content.decode(codec, 'ignore').strip()]
if self.mi.authors == [_('Unknown')]:
self.mi.authors = []
self.mi.authors.append(content.decode(codec, 'ignore').strip())
elif id == 101:
self.mi.publisher = content.decode(codec, 'ignore').strip()
elif id == 103:
@ -67,7 +76,8 @@ class EXTHHeader(object):
if not self.mi.tags:
self.mi.tags = []
self.mi.tags.append(content.decode(codec, 'ignore'))
#else:
# print 'unhandled metadata record', id, repr(content), codec
class BookHeader(object):
@ -466,6 +476,10 @@ def get_metadata(stream):
cover = os.path.join(tdir, mi.cover)
if os.access(cover, os.R_OK):
mi.cover_data = ('JPEG', open(os.path.join(tdir, mi.cover), 'rb').read())
else:
path = os.path.join(tdir, 'images', '00001.jpg')
if os.access(path, os.R_OK):
mi.cover_data = ('JPEG', open(path, 'rb').read())
return mi
def option_parser():

View File

@ -17,26 +17,30 @@ import re
from itertools import izip, count
from collections import defaultdict
from urlparse import urldefrag
import logging
from lxml import etree
from PIL import Image
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
OEB_RASTER_IMAGES
from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname
from calibre.ebooks.oeb.base import FauxLogger, OEBBook
from calibre.ebooks.oeb.base import Logger, OEBBook
from calibre.ebooks.oeb.profile import Context
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
from calibre.ebooks.mobi.palmdoc import compress_doc
from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer
from calibre.customize.ui import run_plugins_on_postprocess
from calibre.utils.config import OptionParser
from optparse import OptionGroup
# TODO:
# - Allow override CSS (?)
# - Generate index records
# - Generate in-content ToC
# - Command line options, etc.
# - Optionally rasterize tables
EXTH_CODES = {
'creator': 100,
@ -59,7 +63,8 @@ UNCOMPRESSED = 1
PALMDOC = 2
HUFFDIC = 17480
MAX_IMAGE_SIZE = 63 * 1024
PALM_MAX_IMAGE_SIZE = 63 * 1024
OTHER_MAX_IMAGE_SIZE = 10 * 1024 * 1024
MAX_THUMB_SIZE = 16 * 1024
MAX_THUMB_DIMEN = (180, 240)
@ -88,7 +93,6 @@ class Serializer(object):
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
def __init__(self, oeb, images):
oeb.logger.info('Serializing markup content...')
self.oeb = oeb
self.images = images
self.id_offsets = {}
@ -117,10 +121,16 @@ class Serializer(object):
path, frag = urldefrag(ref.href)
if hrefs[path].media_type not in OEB_DOCS:
continue
buffer.write('<reference title="%s" type="%s" '
% (ref.title, ref.type))
buffer.write('<reference type="')
self.serialize_text(ref.type, quot=True)
buffer.write('" ')
if ref.title is not None:
buffer.write('title="')
self.serialize_text(ref.title, quot=True)
buffer.write('" ')
self.serialize_href(ref.href)
buffer.write('/>')
# Space required or won't work, I kid you not
buffer.write(' />')
buffer.write('</guide>')
def serialize_href(self, href, base=None):
@ -144,6 +154,12 @@ class Serializer(object):
def serialize_body(self):
buffer = self.buffer
buffer.write('<body>')
# CybookG3 'Start Reading' link
if 'text' in self.oeb.guide:
href = self.oeb.guide['text'].href
buffer.write('<a ')
self.serialize_href(href)
buffer.write(' />')
spine = [item for item in self.oeb.spine if item.linear]
spine.extend([item for item in self.oeb.spine if not item.linear])
for item in spine:
@ -185,10 +201,12 @@ class Serializer(object):
if attr == 'href':
if self.serialize_href(val, item):
continue
elif attr == 'src' and val in hrefs:
index = self.images[val]
buffer.write('recindex="%05d"' % index)
continue
elif attr == 'src':
href = item.abshref(val)
if href in hrefs:
index = self.images[href]
buffer.write('recindex="%05d"' % index)
continue
buffer.write(attr)
buffer.write('="')
self.serialize_text(val, quot=True)
@ -223,9 +241,11 @@ class Serializer(object):
class MobiWriter(object):
def __init__(self, compression=None, logger=FauxLogger()):
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
def __init__(self, compression=None, imagemax=None):
self._compression = compression or UNCOMPRESSED
self._logger = logger
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
def dump(self, oeb, path):
if hasattr(path, 'write'):
@ -293,6 +313,7 @@ class MobiWriter(object):
return data, overlap
def _generate_text(self):
self._oeb.logger.info('Serializing markup content...')
serializer = Serializer(self._oeb, self._images)
breaks = serializer.breaks
text = serializer.text
@ -300,6 +321,8 @@ class MobiWriter(object):
text = StringIO(text)
nrecords = 0
offset = 0
if self._compression != UNCOMPRESSED:
self._oeb.logger.info('Compressing markup content...')
data, overlap = self._read_text_record(text)
while len(data) > 0:
if self._compression == PALMDOC:
@ -335,7 +358,9 @@ class MobiWriter(object):
format = image.format
changed = False
if image.format not in ('JPEG', 'GIF'):
format = 'GIF'
width, height = image.size
area = width * height
format = 'GIF' if area <= 40000 else 'JPEG'
changed = True
if dimen is not None:
image.thumbnail(dimen, Image.ANTIALIAS)
@ -368,13 +393,14 @@ class MobiWriter(object):
return data
def _generate_images(self):
self._oeb.logger.warn('Serializing images...')
images = [(index, href) for href, index in self._images.items()]
images.sort()
metadata = self._oeb.metadata
coverid = metadata.cover[0] if metadata.cover else None
for _, href in images:
item = self._oeb.manifest.hrefs[href]
data = self._rescale_image(item.data, MAX_IMAGE_SIZE)
data = self._rescale_image(item.data, self._imagemax)
self._records.append(data)
def _generate_record0(self):
@ -418,7 +444,8 @@ class MobiWriter(object):
if term not in EXTH_CODES: continue
code = EXTH_CODES[term]
for item in oeb.metadata[term]:
data = unicode(item).encode('utf-8')
data = self.COLLAPSE_RE.sub(' ', unicode(item))
data = data.encode('utf-8')
exth.write(pack('>II', code, len(data) + 8))
exth.write(data)
nrecs += 1
@ -467,29 +494,90 @@ class MobiWriter(object):
self._write(record)
def main(argv=sys.argv):
from calibre.ebooks.oeb.base import DirWriter
inpath, outpath = argv[1:]
context = Context('Firefox', 'MobiDesktop')
oeb = OEBBook(inpath)
#writer = MobiWriter(compression=PALMDOC)
writer = MobiWriter(compression=UNCOMPRESSED)
#writer = DirWriter()
def add_mobi_options(parser):
profiles = Context.PROFILES.keys()
profiles.sort()
profiles = ', '.join(profiles)
group = OptionGroup(parser, _('Mobipocket'),
_('Mobipocket-specific options.'))
group.add_option(
'-c', '--compress', default=False, action='store_true',
help=_('Compress file text using PalmDOC compression.'))
group.add_option(
'-r', '--rescale-images', default=False, action='store_true',
help=_('Modify images to meet Palm device size limitations.'))
parser.add_option_group(group)
group = OptionGroup(parser, _('Profiles'), _('Device renderer profiles. '
'Affects conversion of default font sizes and rasterization '
'resolution. Valid profiles are: %s.') % profiles)
group.add_option(
'--source-profile', default='Browser', metavar='PROFILE',
help=_("Source renderer profile. Default is 'Browser'."))
group.add_option(
'--dest-profile', default='CybookG3', metavar='PROFILE',
help=_("Destination renderer profile. Default is 'CybookG3'."))
parser.add_option_group(group)
return
def option_parser():
parser = OptionParser(usage=_('%prog [options] OPFFILE'))
parser.add_option(
'-o', '--output', default=None,
help=_('Output file. Default is derived from input filename.'))
parser.add_option(
'-v', '--verbose', default=False, action='store_true',
help=_('Useful for debugging.'))
add_mobi_options(parser)
return parser
def oeb2mobi(opts, inpath):
logger = Logger(logging.getLogger('oeb2mobi'))
logger.setup_cli_handler(opts.verbose)
outpath = opts.output
if outpath is None:
outpath = os.path.basename(inpath)
outpath = os.path.splitext(outpath)[0] + '.mobi'
source = opts.source_profile
if source not in Context.PROFILES:
logger.error(_('Unknown source profile %r') % source)
return 1
dest = opts.dest_profile
if dest not in Context.PROFILES:
logger.error(_('Unknown destination profile %r') % dest)
return 1
compression = PALMDOC if opts.compress else UNCOMPRESSED
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
context = Context(source, dest)
oeb = OEBBook(inpath, logger=logger)
tocadder = HTMLTOCAdder()
tocadder.transform(oeb, context)
mangler = CaseMangler()
mangler.transform(oeb, context)
fbase = context.dest.fbase
fkey = context.dest.fnums.values()
tocadder = HTMLTOCAdder()
flattener = CSSFlattener(
fbase=fbase, fkey=fkey, unfloat=True, untable=True)
rasterizer = SVGRasterizer()
trimmer = ManifestTrimmer()
mobimlizer = MobiMLizer()
tocadder.transform(oeb, context)
flattener.transform(oeb, context)
rasterizer = SVGRasterizer()
rasterizer.transform(oeb, context)
mobimlizer.transform(oeb, context)
trimmer = ManifestTrimmer()
trimmer.transform(oeb, context)
mobimlizer = MobiMLizer()
mobimlizer.transform(oeb, context)
writer = MobiWriter(compression=compression, imagemax=imagemax)
writer.dump(oeb, outpath)
return 0
run_plugins_on_postprocess(outpath, 'mobi')
logger.info(_('Output written to ') + outpath)
def main(argv=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(argv[1:])
if len(args) != 1:
parser.print_help()
return 1
inpath = args[0]
retval = oeb2mobi(opts, inpath)
return retval
if __name__ == '__main__':
sys.exit(main())

View File

@ -67,11 +67,13 @@ OEB_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME, SVG_MIME])
MS_COVER_TYPE = 'other.ms-coverimage-standard'
ENTITYDEFS = dict(htmlentitydefs.entitydefs)
recode = lambda s: s.decode('iso-8859-1').encode('ascii', 'xmlcharrefreplace')
ENTITYDEFS = dict((k, recode(v)) for k, v in htmlentitydefs.entitydefs.items())
del ENTITYDEFS['lt']
del ENTITYDEFS['gt']
del ENTITYDEFS['quot']
del ENTITYDEFS['amp']
del recode
def element(parent, *args, **kwargs):
@ -341,16 +343,19 @@ class Manifest(object):
self._data = None
return property(fget, fset, fdel)
data = data()
def __str__(self):
data = self.data
if isinstance(data, etree._Element):
return xml2str(data)
return str(data)
def __eq__(self, other):
return id(self) == id(other)
def __ne__(self, other):
return not self.__eq__(other)
def __cmp__(self, other):
result = cmp(self.spine_position, other.spine_position)
if result != 0:
@ -534,52 +539,81 @@ class Spine(object):
class Guide(object):
class Reference(object):
_TYPES_TITLES = [('cover', 'Cover'), ('title-page', 'Title Page'),
('toc', 'Table of Contents'), ('index', 'Index'),
('glossary', 'Glossary'), ('acknowledgements', 'Acknowledgements'),
('bibliography', 'Bibliography'), ('colophon', 'Colophon'),
('copyright-page', 'Copyright'), ('dedication', 'Dedication'),
('epigraph', 'Epigraph'), ('foreword', 'Foreword'),
('loi', 'List of Illustrations'), ('lot', 'List of Tables'),
('notes', 'Notes'), ('preface', 'Preface'),
('text', 'Main Text')]
TYPES = set(t for t, _ in _TYPES_TITLES)
TITLES = dict(_TYPES_TITLES)
ORDER = dict((t, i) for (t, _), i in izip(_TYPES_TITLES, count(0)))
def __init__(self, type, title, href):
if type.lower() in self.TYPES:
type = type.lower()
elif type not in self.TYPES and \
not type.startswith('other.'):
type = 'other.' + type
if not title:
title = self.TITLES.get(type, None)
self.type = type
self.title = title
self.href = urlnormalize(href)
def __repr__(self):
return 'Reference(type=%r, title=%r, href=%r)' \
% (self.type, self.title, self.href)
def _order():
def fget(self):
return self.ORDER.get(self.type, self.type)
return property(fget=fget)
_order = _order()
def __cmp__(self, other):
if not isinstance(other, Guide.Reference):
return NotImplemented
return cmp(self._order, other._order)
def __init__(self, oeb):
self.oeb = oeb
self.refs = {}
def add(self, type, title, href):
ref = self.Reference(type, title, href)
self.refs[type] = ref
return ref
def by_type(self, type):
return self.ref_types[type]
def iterkeys(self):
for type in self.refs:
yield type
__iter__ = iterkeys
def values(self):
for ref in self.refs.values():
yield ref
values = list(self.refs.values())
values.sort()
return values
def items(self):
for type, ref in self.refs.items():
yield type, ref
def __getitem__(self, key):
return self.refs[key]
def __delitem__(self, key):
del self.refs[key]
def __contains__(self, key):
return key in self.refs
def __len__(self):
return len(self.refs)
def to_opf1(self, parent=None):
elem = element(parent, 'guide')
for ref in self.refs.values():
@ -914,11 +948,11 @@ class OEBBook(object):
cover = self.manifest.hrefs[href]
elif xpath(html, '//h:img[position()=1]'):
img = xpath(html, '//h:img[position()=1]')[0]
href = img.get('src')
href = spine0.abshref(img.get('src'))
cover = self.manifest.hrefs[href]
elif xpath(html, '//h:object[position()=1]'):
object = xpath(html, '//h:object[position()=1]')[0]
href = object.get('data')
href = spine0.abshref(object.get('data'))
cover = self.manifest.hrefs[href]
elif xpath(html, '//svg:svg[position()=1]'):
svg = copy.deepcopy(xpath(html, '//svg:svg[position()=1]')[0])

View File

@ -36,26 +36,36 @@ PROFILES = {
fsizes=[7.5, 9, 10, 12, 15.5, 20, 22, 24]),
'MSReader':
Profile(width=480, height=652, dpi=100.0, fbase=13,
Profile(width=480, height=652, dpi=96, fbase=13,
fsizes=[10, 11, 13, 16, 18, 20, 22, 26]),
# Not really, but let's pretend
'MobiDesktop':
Profile(width=280, height=300, dpi=96, fbase=18,
fsizes=[14, 14, 16, 18, 20, 22, 22, 24]),
'Mobipocket':
Profile(width=600, height=800, dpi=96, fbase=18,
fsizes=[14, 14, 16, 18, 20, 22, 24, 26]),
# No clue on usable screen size and DPI
'CybookG3':
Profile(width=584, height=754, dpi=168.451, fbase=12,
fsizes=[9, 10, 11, 12, 14, 17, 20, 24]),
# No clue on usable screen size; DPI should be good
'HanlinV3':
Profile(width=584, height=754, dpi=168.451, fbase=16,
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
'Firefox':
'CybookG3':
Profile(width=600, height=800, dpi=168.451, fbase=16,
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
'Kindle':
Profile(width=525, height=640, dpi=168.451, fbase=16,
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
'Browser':
Profile(width=800, height=600, dpi=100.0, fbase=12,
fsizes=[5, 7, 9, 12, 13.5, 17, 20, 22, 24])
}
class Context(object):
PROFILES = PROFILES
def __init__(self, source, dest):
if source in PROFILES:
source = PROFILES[source]

View File

@ -23,7 +23,7 @@ from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
from lxml import etree
from lxml.cssselect import css_to_xpath, ExpressionError
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.oeb.base import barename, urlnormalize
from calibre.ebooks.oeb.base import XPNSMAP, xpath, barename, urlnormalize
from calibre.ebooks.oeb.profile import PROFILES
from calibre.resources import html_css
@ -87,10 +87,6 @@ FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
'x-large', 'xx-large'])
XPNSMAP = {'h': XHTML_NS,}
def xpath(elem, expr):
return elem.xpath(expr, namespaces=XPNSMAP)
class CSSSelector(etree.XPath):
MIN_SPACE_RE = re.compile(r' *([>~+]) *')
LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:")
@ -269,6 +265,7 @@ class Style(object):
self._fontSize = None
self._width = None
self._height = None
self._lineHeight = None
stylizer._styles[element] = self
def _update_cssdict(self, cssdict):
@ -288,13 +285,13 @@ class Style(object):
if elem is None:
return None
return self._stylizer.style(elem)
def __getitem__(self, name):
domname = cssproperties._toDOMname(name)
if hasattr(self, domname):
return getattr(self, domname)
return self._unit_convert(self._get(name))
def _get(self, name):
result = None
if name in self._style:
@ -324,7 +321,7 @@ class Style(object):
unit = m.group(2)
if unit == '%':
base = base or self.width
result = (value/100.0) * base
result = (value / 100.0) * base
elif unit == 'px':
result = value * 72.0 / self._profile.dpi
elif unit == 'in':
@ -388,7 +385,7 @@ class Style(object):
@property
def width(self):
if self._width is None:
result = None
width = None
base = None
parent = self._get_parent()
if parent is not None:
@ -399,9 +396,9 @@ class Style(object):
width = self._element.attrib['width']
elif 'width' in self._style:
width = self._style['width']
else:
if not width or width == 'auto':
result = base
if not result:
else:
result = self._unit_convert(width, base=base)
self._width = result
return self._width
@ -409,7 +406,7 @@ class Style(object):
@property
def height(self):
if self._height is None:
result = None
height = None
base = None
parent = self._get_parent()
if parent is not None:
@ -420,12 +417,53 @@ class Style(object):
height = self._element.attrib['height']
elif 'height' in self._style:
height = self._style['height']
else:
if not height or height == 'auto':
result = base
if not result:
else:
result = self._unit_convert(height, base=base)
self._height = result
return self._height
@property
def lineHeight(self):
if self._lineHeight is None:
result = None
parent = self._getparent()
if 'line-height' in self._style:
lineh = self._style['line-height']
try:
float(lineh)
except ValueError:
result = self._unit_convert(lineh, base=self.fontSize)
else:
result = float(lineh) * self.fontSize
elif parent is not None:
# TODO: proper inheritance
result = parent.lineHeight
else:
result = 1.2 * self.fontSize
self._lineHeight = result
return self._lineHeight
@property
def marginTop(self):
return self._unit_convert(
self._get('margin-top'), base=self.height)
@property
def marginBottom(self):
return self._unit_convert(
self._get('margin-bottom'), base=self.height)
@property
def paddingTop(self):
return self._unit_convert(
self._get('padding-top'), base=self.height)
@property
def paddingBottom(self):
return self._unit_convert(
self._get('padding-bottom'), base=self.height)
def __str__(self):
items = self._style.items()

View File

@ -33,12 +33,13 @@ class KeyMapper(object):
def relate(size, base):
size = float(size)
base = float(base)
if size == base: return 0
if abs(size - base) < 0.1: return 0
sign = -1 if size < base else 1
endp = 0 if size < base else 36
diff = (abs(base - size) * 3) + ((36 - size) / 100)
logb = abs(base - endp)
return sign * math.log(diff, logb)
result = sign * math.log(diff, logb)
return result
def __getitem__(self, ssize):
if ssize in self.cache:
@ -122,6 +123,8 @@ class CSSFlattener(object):
fsize = self.context.source.fbase
self.baseline_node(body, stylizer, sizes, fsize)
sbase = max(sizes.items(), key=operator.itemgetter(1))[0]
self.oeb.logger.info(
"Source base font size is %0.05fpt" % sbase)
return sbase
def clean_edges(self, cssdict, style, fsize):
@ -154,14 +157,14 @@ class CSSFlattener(object):
if node.tag == XHTML('font'):
node.tag = XHTML('span')
if 'size' in node.attrib:
size = node.attrib['size']
if size.startswith('+'):
cssdict['font-size'] = 'larger'
elif size.startswith('-'):
cssdict['font-size'] = 'smaller'
else:
size = node.attrib['size'].strip()
if size:
fnums = self.context.source.fnums
cssdict['font-size'] = fnums[int(size)]
if size[0] in ('+', '-'):
# Oh, the warcrimes
cssdict['font-size'] = fnums[3+int(size)]
else:
cssdict['font-size'] = fnums[int(size)]
del node.attrib['size']
if 'color' in node.attrib:
cssdict['color'] = node.attrib['color']
@ -182,10 +185,11 @@ class CSSFlattener(object):
percent = (margin - style['text-indent']) / style['width']
cssdict['margin-left'] = "%d%%" % (percent * 100)
left -= style['text-indent']
if 'display' in cssdict and cssdict['display'] == 'in-line':
cssdict['display'] = 'inline'
if self.unfloat and 'float' in cssdict \
and tag not in ('img', 'object') \
and cssdict.get('display', 'none') != 'none':
del cssdict['display']
del cssdict['display']
if self.untable and 'display' in cssdict \
and cssdict['display'].startswith('table'):
display = cssdict['display']
@ -218,7 +222,9 @@ class CSSFlattener(object):
for child in node:
self.flatten_node(child, stylizer, names, styles, psize, left)
def flatten_head(self, head, stylizer, href):
def flatten_head(self, item, stylizer, href):
html = item.data
head = html.find(XHTML('head'))
for node in head:
if node.tag == XHTML('link') \
and node.get('rel', 'stylesheet') == 'stylesheet' \
@ -227,6 +233,7 @@ class CSSFlattener(object):
elif node.tag == XHTML('style') \
and node.get('type', CSS_MIME) in OEB_STYLES:
head.remove(node)
href = item.relhref(href)
etree.SubElement(head, XHTML('link'),
rel='stylesheet', type=CSS_MIME, href=href)
if stylizer.page_rule:
@ -259,7 +266,5 @@ class CSSFlattener(object):
css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items)
href = self.replace_css(css)
for item in self.oeb.spine:
html = item.data
stylizer = self.stylizers[item]
head = html.find(XHTML('head'))
self.flatten_head(head, stylizer, href)
self.flatten_head(item, stylizer, href)

View File

@ -0,0 +1,87 @@
'''
HTML-TOC-adding transform.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
import os
from lxml import etree
from calibre.ebooks.oeb.base import XML, XHTML, XHTML_NS
from calibre.ebooks.oeb.base import XHTML_MIME, CSS_MIME
from calibre.ebooks.oeb.base import element
STYLE_CSS = {
'nested': """
.calibre_toc_header {
text-align: center;
}
.calibre_toc_block {
margin-left: 1.2em;
text-indent: -1.2em;
}
.calibre_toc_block .calibre_toc_block {
margin-left: 2.4em;
}
.calibre_toc_block .calibre_toc_block .calibre_toc_block {
margin-left: 3.6em;
}
""",
'centered': """
.calibre_toc_header {
text-align: center;
}
.calibre_toc_block {
text-align: center;
}
body > .calibre_toc_block {
margin-top: 1.2em;
}
"""
}
class HTMLTOCAdder(object):
def __init__(self, style='nested'):
self.style = style
def transform(self, oeb, context):
if 'toc' in oeb.guide:
return
oeb.logger.info('Generating in-line TOC...')
style = self.style
if style not in STYLE_CSS:
oeb.logger.error('Unknown TOC style %r' % style)
style = 'nested'
id, css_href = oeb.manifest.generate('tocstyle', 'tocstyle.css')
oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style])
language = str(oeb.metadata.language[0])
contents = element(None, XHTML('html'), nsmap={None: XHTML_NS},
attrib={XML('lang'): language})
head = element(contents, XHTML('head'))
title = element(head, XHTML('title'))
title.text = 'Table of Contents'
element(head, XHTML('link'), rel='stylesheet', type=CSS_MIME,
href=css_href)
body = element(contents, XHTML('body'),
attrib={'class': 'calibre_toc'})
h1 = element(body, XHTML('h1'),
attrib={'class': 'calibre_toc_header'})
h1.text = 'Table of Contents'
self.add_toc_level(body, oeb.toc)
id, href = oeb.manifest.generate('contents', 'contents.xhtml')
item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)
oeb.spine.add(item, linear=False)
oeb.guide.add('toc', 'Table of Contents', href)
def add_toc_level(self, elem, toc):
for node in toc:
block = element(elem, XHTML('div'),
attrib={'class': 'calibre_toc_block'})
line = element(block, XHTML('a'),
attrib={'href': node.href,
'class': 'calibre_toc_line'})
line.text = node.title
self.add_toc_level(block, node)

View File

@ -0,0 +1,112 @@
'''
CSS case-mangling transform.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
import os
import re
import operator
import math
from itertools import chain
from collections import defaultdict
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
from calibre.ebooks.oeb.base import CSS_MIME
from calibre.ebooks.oeb.base import namespace
from calibre.ebooks.oeb.stylizer import Stylizer
CASE_MANGLER_CSS = """
.calibre_lowercase {
font-variant: normal;
font-size: 0.65em;
}
"""
TEXT_TRANSFORMS = set(['capitalize', 'uppercase', 'lowercase'])
class CaseMangler(object):
def transform(self, oeb, context):
oeb.logger.info('Applying case-transforming CSS...')
self.oeb = oeb
self.profile = context.source
self.mangle_spine()
def mangle_spine(self):
id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS)
for item in self.oeb.spine:
html = item.data
relhref = item.relhref(href)
etree.SubElement(html.find(XHTML('head')), XHTML('link'),
rel='stylesheet', href=relhref, type=CSS_MIME)
stylizer = Stylizer(html, item.href, self.oeb, self.profile)
self.mangle_elem(html.find(XHTML('body')), stylizer)
def text_transform(self, transform, text):
if transform == 'capitalize':
return text.title()
elif transform == 'uppercase':
return text.upper()
elif transform == 'lowercase':
return text.lower()
return text
def split_text(self, text):
results = ['']
isupper = text[0].isupper()
for char in text:
if char.isupper() == isupper:
results[-1] += char
else:
isupper = not isupper
results.append(char)
return results
def smallcaps_elem(self, elem, attr):
texts = self.split_text(getattr(elem, attr))
setattr(elem, attr, None)
last = elem if attr == 'tail' else None
attrib = {'class': 'calibre_lowercase'}
for text in texts:
if text.isupper():
if last is None:
elem.text = text
else:
last.tail = text
else:
child = etree.Element(XHTML('span'), attrib=attrib)
child.text = text.upper()
if last is None:
elem.insert(0, child)
else:
# addnext() moves the tail for some reason
tail = last.tail
last.addnext(child)
last.tail = tail
child.tail = None
last = child
def mangle_elem(self, elem, stylizer):
if not isinstance(elem.tag, basestring) or \
namespace(elem.tag) != XHTML_NS:
return
children = list(elem)
style = stylizer.style(elem)
transform = style['text-transform']
variant = style['font-variant']
if elem.text:
if transform in TEXT_TRANSFORMS:
elem.text = self.text_transform(transform, elem.text)
if variant == 'small-caps':
self.smallcaps_elem(elem, 'text')
for child in children:
self.mangle_elem(child, stylizer)
if child.tail:
if transform in TEXT_TRANSFORMS:
child.tail = self.text_transform(transform, child.tail)
if variant == 'small-caps':
self.smallcaps_elem(child, 'tail')

View File

@ -21,11 +21,12 @@ from PyQt4.QtGui import QPainter
from PyQt4.QtSvg import QSvgRenderer
from PyQt4.QtGui import QApplication
from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK
from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME
from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME, JPEG_MIME
from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename
from calibre.ebooks.oeb.stylizer import Stylizer
IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align'])
class SVGRasterizer(object):
def __init__(self):
@ -41,7 +42,7 @@ class SVGRasterizer(object):
self.rasterize_spine()
self.rasterize_cover()
def rasterize_svg(self, elem, width=0, height=0):
def rasterize_svg(self, elem, width=0, height=0, format='PNG'):
data = QByteArray(xml2str(elem))
svg = QSvgRenderer(data)
size = svg.defaultSize()
@ -52,6 +53,9 @@ class SVGRasterizer(object):
size.setHeight(box[3] - box[1])
if width or height:
size.scale(width, height, Qt.KeepAspectRatio)
logger = self.oeb.logger
logger.info('Rasterizing %r to %dx%d'
% (elem, size.width(), size.height()))
image = QImage(size, QImage.Format_ARGB32_Premultiplied)
image.fill(QColor("white").rgb())
painter = QPainter(image)
@ -60,7 +64,7 @@ class SVGRasterizer(object):
array = QByteArray()
buffer = QBuffer(array)
buffer.open(QIODevice.WriteOnly)
image.save(buffer, 'PNG')
image.save(buffer, format)
return str(array)
def dataize_manifest(self):
@ -113,11 +117,7 @@ class SVGRasterizer(object):
def rasterize_inline(self, elem, style, item):
width = style['width']
if width == 'auto':
width = self.profile.width
height = style['height']
if height == 'auto':
height = self.profile.height
width = (width / 72) * self.profile.dpi
height = (height / 72) * self.profile.dpi
elem = self.dataize_svg(item, elem)
@ -134,11 +134,7 @@ class SVGRasterizer(object):
def rasterize_external(self, elem, style, item, svgitem):
width = style['width']
if width == 'auto':
width = self.profile.width
height = style['height']
if height == 'auto':
height = self.profile.height
width = (width / 72) * self.profile.dpi
height = (height / 72) * self.profile.dpi
data = QByteArray(str(svgitem))
@ -168,11 +164,16 @@ class SVGRasterizer(object):
manifest.add(id, href, PNG_MIME, data=data)
self.images[key] = href
elem.tag = XHTML('img')
for attr in elem.attrib:
if attr not in KEEP_ATTRS:
del elem.attrib[attr]
elem.attrib['src'] = item.relhref(href)
elem.text = None
if elem.text:
elem.attrib['alt'] = elem.text
elem.text = None
for child in elem:
elem.remove(child)
def rasterize_cover(self):
covers = self.oeb.metadata.cover
if not covers:
@ -180,9 +181,9 @@ class SVGRasterizer(object):
cover = self.oeb.manifest.ids[str(covers[0])]
if not cover.media_type == SVG_MIME:
return
logger = self.oeb.logger
logger.info('Rasterizing %r to %dx%d' % (cover.href, 600, 800))
data = self.rasterize_svg(cover.data, 600, 800)
width = (self.profile.width / 72) * self.profile.dpi
height = (self.profile.height / 72) * self.profile.dpi
data = self.rasterize_svg(cover.data, width, height)
href = os.path.splitext(cover.href)[0] + '.png'
id, href = self.oeb.manifest.generate(cover.id, href)
self.oeb.manifest.add(id, href, PNG_MIME, data=data)

View File

@ -9,6 +9,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
import os
from itertools import chain
from urlparse import urldefrag
from lxml import etree
import cssutils
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
@ -29,6 +30,11 @@ class ManifestTrimmer(object):
used.add(oeb.manifest.hrefs[item.value])
elif item.value in oeb.manifest.ids:
used.add(oeb.manifest.ids[item.value])
for ref in oeb.guide.values():
path, _ = urldefrag(ref.href)
if path in oeb.manifest.hrefs:
used.add(oeb.manifest.hrefs[path])
# TOC items are required to be in the spine
for item in oeb.spine:
used.add(item)
unchecked = used
@ -56,7 +62,6 @@ class ManifestTrimmer(object):
cssutils.replaceUrls(sheet, replacer)
used.update(new)
unchecked = new
# All guide and TOC items are required to be in the spine
for item in oeb.manifest.values():
if item not in used:
oeb.logger.info('Trimming %r from manifest' % item.href)

View File

@ -29,7 +29,7 @@ def config(defaults=None):
c.add_opt('top_right_y', [ '-w', '--righty'], default=default_crop,
help=_('Number of pixels to crop from the right most y (default is %d)')%default_crop )
c.add_opt('bounding', ['-b', '--bounding'],
help=_('A file generated by ghostscript which allows each page to be individually cropped'))
help=_('A file generated by ghostscript which allows each page to be individually cropped [gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox > bounding] '))
return c
@ -38,14 +38,28 @@ def option_parser():
return c.option_parser(usage=_('''\
%prog [options] file.pdf
Crop a pdf.
Crops a pdf.
'''))
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
source = os.path.abspath(args[1])
input_pdf = PdfFileReader(file(source, "rb"))
try:
source = os.path.abspath(args[1])
input_pdf = PdfFileReader(file(source, "rb"))
except:
print "Unable to read input"
return 2
title = _('Unknown')
author = _('Unknown')
try:
info = input_pdf.getDocumentInfo()
if info.title:
title = info.title
if info.author:
author = info.author
except:
pass
if opts.bounding != None:
try:
bounding = open( opts.bounding , 'r' )
@ -53,7 +67,7 @@ def main(args=sys.argv):
except:
print 'Error opening %s' % opts.bounding
return 1
output_pdf = PdfFileWriter()
output_pdf = PdfFileWriter(title=title,author=author)
for page_number in range (0, input_pdf.getNumPages() ):
page = input_pdf.getPage(page_number)
if opts.bounding != None:

View File

@ -136,16 +136,18 @@ class DeviceManager(Thread):
return self.create_job(self._sync_booklists, done, args=[booklists],
description=_('Send metadata to device'))
def _upload_books(self, files, names, on_card=False):
def _upload_books(self, files, names, on_card=False, metadata=None):
'''Upload books to device: '''
return self.device.upload_books(files, names, on_card, end_session=False)
return self.device.upload_books(files, names, on_card,
metadata=metadata, end_session=False)
def upload_books(self, done, files, names, on_card=False, titles=None):
def upload_books(self, done, files, names, on_card=False, titles=None,
metadata=None):
desc = _('Upload %d books to device')%len(names)
if titles:
desc += u':' + u', '.join(titles)
return self.create_job(self._upload_books, done, args=[files, names],
kwargs={'on_card':on_card}, description=desc)
kwargs={'on_card':on_card,'metadata':metadata}, description=desc)
def add_books_to_metadata(self, locations, metadata, booklists):
self.device.add_books_to_metadata(locations, metadata, booklists)

View File

@ -28,9 +28,6 @@
<property name="readOnly" >
<bool>true</bool>
</property>
<property name="maximumBlockCount" >
<number>400</number>
</property>
</widget>
</item>
</layout>

View File

@ -75,7 +75,13 @@ def save_recipes(recipes):
def load_recipes():
config.refresh()
return [Recipe().unpickle(r) for r in config.get('scheduled_recipes', [])]
recipes = []
for r in config.get('scheduled_recipes', []):
r = Recipe().unpickle(r)
if r.builtin and not str(r.id).startswith('recipe_'):
continue
recipes.append(r)
return recipes
class RecipeModel(QAbstractListModel, SearchQueryParser):
@ -438,7 +444,7 @@ class Scheduler(QObject):
self.lock.unlock()
def main(args=sys.argv):
app = QApplication([])
QApplication([])
from calibre.library.database2 import LibraryDatabase2
d = SchedulerDialog(LibraryDatabase2('/home/kovid/documents/library'))
d.exec_()

Binary file not shown.

After

Width:  |  Height:  |  Size: 586 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 992 B

View File

@ -385,13 +385,35 @@ class BooksModel(QAbstractTableModel):
metadata.append(mi)
return metadata
def get_preferred_formats_from_ids(self, ids, all_formats, mode='r+b'):
ans = []
for id in ids:
format = None
fmts = self.db.formats(id, index_is_id=True)
if not fmts:
fmts = ''
available_formats = set(fmts.lower().split(','))
for f in all_formats:
if f.lower() in available_formats:
format = f.lower()
break
if format is None:
ans.append(format)
else:
f = self.db.format(id, format, index_is_id=True, as_file=True,
mode=mode)
ans.append(f)
return ans
def get_preferred_formats(self, rows, formats, paths=False):
ans = []
for row in (row.row() for row in rows):
format = None
fmts = self.db.formats(row)
if not fmts:
return []
fmts = ''
db_formats = set(fmts.lower().split(','))
available_formats = set([f.lower() for f in formats])
u = available_formats.intersection(db_formats)

View File

@ -406,7 +406,8 @@ class Document(QGraphicsScene):
for font in lrf.font_map:
fdata = QByteArray(lrf.font_map[font].data)
id = QFontDatabase.addApplicationFontFromData(fdata)
font_map[font] = [str(i) for i in QFontDatabase.applicationFontFamilies(id)][0]
if id != -1:
font_map[font] = [str(i) for i in QFontDatabase.applicationFontFamilies(id)][0]
if load_substitutions:
from calibre.ebooks.lrf.fonts.liberation import LiberationMono_BoldItalic

View File

@ -745,8 +745,8 @@ class Main(MainWindow, Ui_MainWindow):
'''
titles = [i['title'] for i in metadata]
job = self.device_manager.upload_books(Dispatcher(self.books_uploaded),
files, names, on_card=on_card,
titles=titles
files, names, on_card=on_card,
metadata=metadata, titles=titles
)
self.upload_memory[job] = (metadata, on_card, memory, files)
@ -887,8 +887,12 @@ class Main(MainWindow, Ui_MainWindow):
if self.device_connected:
ids = list(dynamic.get('news_to_be_synced', set([])))
ids = [id for id in ids if self.library_view.model().db.has_id(id)]
files = [self.library_view.model().db.format(id, prefs['output_format'], index_is_id=True, as_file=True) for id in ids]
files = self.library_view.model().get_preferred_formats_from_ids(
ids, self.device_manager.device_class.FORMATS)
files = [f for f in files if f is not None]
if not files:
dynamic.set('news_to_be_synced', set([]))
return
metadata = self.library_view.model().get_metadata(ids, rows_are_ids=True)
names = []
for mi in metadata:
@ -919,7 +923,7 @@ class Main(MainWindow, Ui_MainWindow):
if cdata:
mi['cover'] = self.cover_to_thumbnail(cdata)
metadata = iter(metadata)
_files = self.library_view.model().get_preferred_formats(rows,
_files = self.library_view.model().get_preferred_formats(rows,
self.device_manager.device_class.FORMATS, paths=True)
files = [getattr(f, 'name', None) for f in _files]
bad, good, gf, names = [], [], [], []
@ -1479,8 +1483,9 @@ in which you want to store your books files. Any existing books will be automati
return True
def shutdown(self):
self.write_settings()
def shutdown(self, write_settings=True):
if write_settings:
self.write_settings()
self.job_manager.terminate_all_jobs()
self.device_manager.keep_going = False
self.cover_cache.stop()
@ -1500,6 +1505,7 @@ in which you want to store your books files. Any existing books will be automati
def closeEvent(self, e):
self.write_settings()
if self.system_tray_icon.isVisible():
if not dynamic['systray_msg'] and not isosx:
info_dialog(self, 'calibre', 'calibre '+_('will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray.')).exec_()
@ -1509,7 +1515,7 @@ in which you want to store your books files. Any existing books will be automati
else:
if self.confirm_quit():
try:
self.shutdown()
self.shutdown(write_settings=False)
except:
pass
e.accept()

View File

@ -1551,9 +1551,6 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
def has_book(self, mi):
return bool(self.conn.get('SELECT id FROM books where title=?', (mi.title,), all=False))
def has_id(self, id):
return self.conn.get('SELECT id FROM books where id=?', (id,), all=False) is not None

View File

@ -217,7 +217,11 @@ class ResultCache(SearchQueryParser):
return self.index(id)
def has_id(self, id):
return self._data[id] is not None
try:
return self._data[id] is not None
except IndexError:
pass
return False
def refresh_ids(self, conn, ids):
for id in ids:
@ -557,7 +561,15 @@ class LibraryDatabase2(LibraryDatabase):
img.loadFromData(f.read())
return img
return f if as_file else f.read()
def has_book(self, mi):
title = mi.title
if title:
if not isinstance(title, unicode):
title = title.decode(preferred_encoding, 'replace')
return bool(self.conn.get('SELECT id FROM books where title=?', (title,), all=False))
return False
def has_cover(self, index, index_is_id=False):
id = index if index_is_id else self.id(index)
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')

View File

@ -7,8 +7,8 @@ var column_titles = {
'rating' : 'Rating',
'date' : 'Date',
'tags' : 'Tags',
'series' : 'Series',
}
'series' : 'Series'
};
String.prototype.format = function() {
var pattern = /\{\d+\}/g;
@ -47,7 +47,7 @@ function render_book(book) {
// Render title cell
var title = '<i>{0}</i>'.format(book.attr("title")) + '<br /><span class="subtitle">';
var id = book.attr("id");
var comments = $.trim(book.text()).replace(/\n\n/, '<br/>');
var comments = $.trim(book.text()).replace(/\n\n/, '<br/>');
var formats = new Array();
var size = (parseFloat(book.attr('size'))/(1024*1024)).toFixed(1);
var tags = book.attr('tags').replace(/,/g, ', ');
@ -70,22 +70,22 @@ function render_book(book) {
authors += jQuery.trim(_authors[i]).replace(/ /g, '&nbsp;')+'<br />';
}
if (authors) { authors = authors.slice(0, authors.length-6); }
// Render rating cell
var _rating = parseFloat(book.attr('rating'))/2.;
var rating = '';
for (i = 0; i < _rating; i++) { rating += '&#9733;'}
// Render date cell
var _date = Date.parseExact(book.attr('timestamp'), 'yyyy/MM/dd HH:mm:ss');
var date = _date.toString('d MMM yyyy').replace(/ /g, '&nbsp;');
// Render series cell
var series = book.attr("series")
if (series) {
series += '&nbsp;[{0}]'.format(book.attr('series_index'));
}
var cells = {
'title' : title,
'authors' : authors,
@ -93,12 +93,12 @@ function render_book(book) {
'date' : date,
'series' : series
};
var row = '';
for (i = 0; i < cmap.length; i++) {
row += '<td class="{0}">{1}</td>'.format(cmap[i], cells[cmap[i]]);
}
return '<tr id="{0}">{1}</tr>'.format(id, row);
return '<tr id="{0}">{1}</tr>'.format(id, row);
}
function fetch_library_books(start, num, timeout, sort, order, search) {
@ -112,15 +112,15 @@ function fetch_library_books(start, num, timeout, sort, order, search) {
last_search = search;
last_sort = sort;
last_sort_order = order;
if (current_library_request != null) {
current_library_request.abort();
current_library_request = null;
}
$('#cover_pane').css('visibility', 'hidden');
$('#loading').css('visibility', 'visible');
current_library_request = $.ajax({
type: "GET",
url: "library",
@ -128,18 +128,18 @@ function fetch_library_books(start, num, timeout, sort, order, search) {
cache: false,
timeout: timeout, //milliseconds
dataType: "xml",
error : function(XMLHttpRequest, textStatus, errorThrown) {
alert('Error: '+textStatus+'\n\n'+errorThrown);
alert('Error: '+textStatus+'\n\n'+errorThrown);
},
success : function(xml, textStatus) {
var library = $(xml).find('library');
total = parseInt(library.attr('total'));
var num = parseInt(library.attr('num'));
var start = parseInt(library.attr('start'));
update_count_bar(start, num, total);
var display = '';
var display = '';
library.find('book').each( function() {
var book = $(this);
var row = render_book(book);
@ -170,18 +170,18 @@ function fetch_library_books(start, num, timeout, sort, order, search) {
$('#cover_pane').css('visibility', 'visible');
}
});
layout();
$('#book_list tbody tr:even()').css('background-color', '#eeeeee');
},
complete : function(XMLHttpRequest, textStatus) {
current_library_request = null;
document.getElementById('main').scrollTop = 0;
$('#loading').css('visibility', 'hidden');
}
});
}
@ -196,7 +196,7 @@ function update_count_bar(start, num, total) {
left.css('opacity', (start <= 0) ? 0.3 : 1);
var right = cb.find('#right');
right.css('opacity', (start + num >= total) ? 0.3 : 1);
}
function setup_count_bar() {
@ -205,7 +205,7 @@ function setup_count_bar() {
fetch_library_books(0, last_num, LIBRARY_FETCH_TIMEOUT, last_sort, last_sort_order, last_search);
}
});
$('#count_bar * img:eq(1)').click(function(){
if (last_start > 0) {
var new_start = last_start - last_num;
@ -215,14 +215,14 @@ function setup_count_bar() {
fetch_library_books(new_start, last_num, LIBRARY_FETCH_TIMEOUT, last_sort, last_sort_order, last_search);
}
});
$('#count_bar * img:eq(2)').click(function(){
if (last_start + last_num < total) {
var new_start = last_start + last_num;
fetch_library_books(new_start, last_num, LIBRARY_FETCH_TIMEOUT, last_sort, last_sort_order, last_search);
}
});
$('#count_bar * img:eq(3)').click(function(){
if (total - last_num > 0) {
fetch_library_books(total - last_num, last_num, LIBRARY_FETCH_TIMEOUT, last_sort, last_sort_order, last_search);
@ -234,7 +234,7 @@ function setup_count_bar() {
function search() {
var search = $.trim($('#search_box * #s').val());
fetch_library_books(0, last_num, LIBRARY_FETCH_TIMEOUT,
fetch_library_books(0, last_num, LIBRARY_FETCH_TIMEOUT,
last_sort, last_sort_order, search);
}
@ -245,11 +245,11 @@ function setup_sorting() {
$('table#book_list thead tr td').mouseover(function() {
this.style.backgroundColor = "#fff2a8";
});
$('table#book_list thead tr td').mouseout(function() {
this.style.backgroundColor = "inherit";
});
for (i = 0; i < cmap.length; i++) {
$('table#book_list span#{0}_sort'.format(cmap[i])).parent().click(function() {
var sort_indicator = $($(this).find('span'));
@ -258,7 +258,7 @@ function setup_sorting() {
var col = id.slice(0, id.indexOf("_"));
var order = 'ascending';
var html = '↑';
if (sort_indicator.html() == '↑') {
order = 'descending'; html = '↓';
}
@ -291,13 +291,13 @@ function layout() {
$(function() {
// document is ready
create_table_headers();
// Setup widgets
setup_sorting();
setup_count_bar();
$('#search_box * #s').val('');
$(window).resize(layout);
$($('#book_list * span#date_sort').parent()).click();
});

View File

@ -48,12 +48,14 @@ entry_points = {
'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
'any2epub = calibre.ebooks.epub.from_any:main',
'any2lit = calibre.ebooks.lit.from_any:main',
'any2mobi = calibre.ebooks.mobi.from_any:main',
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
'isbndb = calibre.ebooks.metadata.isbndb:main',
'librarything = calibre.ebooks.metadata.library_thing:main',
'mobi2oeb = calibre.ebooks.mobi.reader:main',
'oeb2mobi = calibre.ebooks.mobi.writer:main',
'lrf2html = calibre.ebooks.lrf.html.convert_to:main',
'lit2oeb = calibre.ebooks.lit.reader:main',
'oeb2lit = calibre.ebooks.lit.writer:main',

View File

@ -102,7 +102,7 @@ Device Integration
What devices does |app| support?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
At the moment |app| has full support for the SONY PRS 500/505/700 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -286,7 +286,7 @@ def write(socket, msg, timeout=5):
def read(socket, timeout=5):
'''
Read a message from `socket`. The message must have been sent with the :function:`write`
function. Raises a `RuntimeError` if the message is corrpted. Can return an
function. Raises a `RuntimeError` if the message is corrupted. Can return an
empty string.
'''
if isworker:
@ -299,7 +299,12 @@ def read(socket, timeout=5):
if not msg:
break
if length is None:
length, msg = int(msg[:12]), msg[12:]
try:
length, msg = int(msg[:12]), msg[12:]
except ValueError:
if DEBUG:
print >>sys.__stdout__, 'read(%s):'%('worker' if isworker else 'overseer'), 'no length in', msg
return ''
buf.write(msg)
if buf.tell() >= length:
break

View File

@ -217,8 +217,7 @@ class Server(object):
pos = pos.replace(month = 1)
else:
pos = pos.replace(month = pos.month + 1)
_months = list(months(self.earliest, self.latest))[:-1][:12]
_months = list(months(self.earliest, self.latest))[:-1][-12:]
_months = [range_for_month(*m) for m in _months]
_months = [self.get_slice(*m) for m in _months]
x = [m.min for m in _months]

View File

@ -35,7 +35,7 @@ class Distribution(object):
('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'),
('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'),
('lxml', '2.0.5', 'lxml', 'python-lxml', 'python-lxml'),
('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-beautifulsoup'),
('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'),
]
@ -205,23 +205,7 @@ select Install.</li>
<ol>
<li>Before trying to use the command line tools, you must run the app at least once. This will ask you for you password and then setup the symbolic links for the command line tools.</li>
<li>The app cannot be run from within the dmg. You must drag it to a folder on your filesystem (The Desktop, Applications, wherever).</li>
<li>In order for the conversion of RTF to LRF to support WMF images (common in older RTF files) you need to install ImageMagick.</li>
<li>In order for localization of the user interface in your language you must create the file <code>~/.MacOSX/environment.plist</code> as shown below:
<pre class="wiki">
&lt;?xml version="1.0" encoding="UTF-8"?&gt;
&lt;!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"&gt;
&lt;plist version="1.0"&gt;
&lt;dict&gt;
&lt;key&gt;LANG&lt;/key&gt;
&lt;string&gt;de_DE&lt;/string&gt;
&lt;/dict&gt;
&lt;/plist&gt;
</pre>
The example above is for the German language. Substitute the language code you need.
After creating the file you need to log out and log in again for the changes to become
active. Of course, this will only work if calibre has been translated for your language.
If not, head over to <a href="http://calibre.kovidgoyal.net/wiki/Development#Translations">Translations</a> to see how you can translate it.
</li>
<li>In order for localization of the user interface in your language, select your language in the configuration dialog (by clicking the hammer icon next to the search bar) and select your language.</li>
</ol>
'''))
return 'binary.html', data, None

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -338,7 +338,7 @@ class ZipInfo (object):
if isinstance(self.filename, unicode):
try:
return self.filename.encode('ascii'), self.flag_bits
except UnicodeEncodeError:
except:
return self.filename.encode('utf-8'), self.flag_bits | 0x800
else:
return self.filename, self.flag_bits

View File

@ -765,6 +765,8 @@ class BasicNewsRecipe(object, LoggingInterface):
self.log_debug(traceback.format_exc())
if cu is not None:
ext = cu.rpartition('.')[-1]
if '?' in ext:
ext = ''
ext = ext.lower() if ext else 'jpg'
self.report_progress(1, _('Downloading cover from %s')%cu)
cpath = os.path.join(self.output_dir, 'cover.'+ext)

View File

@ -21,7 +21,8 @@ recipe_modules = ['recipe_' + r for r in (
'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes',
'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik',
'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
'joelonsoftware',
'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de',
)]
import re, imp, inspect, time, os

View File

@ -42,3 +42,9 @@ class ChristianScienceMonitor(BasicNewsRecipe):
feeds[-1][1].append(art)
return feeds
def postprocess_html(self, soup, first_fetch):
html = soup.find('html')
if html is None:
return soup
html.extract()
return html

View File

@ -0,0 +1,16 @@
from calibre.web.feeds.news import BasicNewsRecipe
class CommonDreams(BasicNewsRecipe):
title = u'Common Dreams'
description = u'Progressive news and views'
__author__ = u'XanthanGum'
oldest_article = 7
max_articles_per_feed = 100
feeds = [
(u'Common Dreams Headlines',
u'http://www.commondreams.org/feed/headlines_rss'),
(u'Common Dreams Views', u'http://www.commondreams.org/feed/views_rss'),
(u'Common Dreams Newswire', u'http://www.commondreams.org/feed/newswire_rss')
]

View File

@ -49,8 +49,10 @@ class Economist(BasicNewsRecipe):
if not index_started:
continue
text = string.capwords(text)
feeds[text] = []
ans.append(text)
if text not in feeds.keys():
feeds[text] = []
if text not in ans:
ans.append(text)
key = text
continue
if key is None:

View File

@ -0,0 +1,55 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
nin.co.yu
'''
import re, urllib
from calibre.web.feeds.news import BasicNewsRecipe
class Nin(BasicNewsRecipe):
title = 'NIN online'
__author__ = 'Darko Miletic'
description = 'Nedeljne informativne novine'
no_stylesheets = True
oldest_article = 15
simultaneous_downloads = 1
delay = 1
encoding = 'utf8'
needs_subscription = True
PREFIX = 'http://www.nin.co.yu'
INDEX = PREFIX + '/?change_lang=ls'
LOGIN = PREFIX + '/?logout=true'
html2lrf_options = [
'--comment' , description
, '--category' , 'news, politics, Serbia'
, '--publisher' , 'NIN'
]
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.INDEX)
if self.username is not None and self.password is not None:
data = urllib.urlencode({ 'login_name':self.username
,'login_password':self.password
,'imageField.x':'32'
,'imageField.y':'15'
})
br.open(self.LOGIN,data)
return br
keep_only_tags =[dict(name='td', attrs={'width':'520'})]
remove_tags_after =dict(name='html')
feeds =[(u'NIN', u'http://www.nin.co.yu/misc/rss.php?feed=RSS2.0')]
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.INDEX)
link_item = soup.find('img',attrs={'width':'100','height':'137','border':'0'})
if link_item:
cover_url = self.PREFIX + link_item['src']
return cover_url

View File

@ -0,0 +1,34 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
www.heise.de/tp
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Telepolis(BasicNewsRecipe):
title = 'Telepolis'
__author__ = 'Darko Miletic'
description = 'News from Germany in German'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
html2lrf_options = [ '--comment' , description
, '--category' , 'blog,news'
]
keep_only_tags = [
dict(name='table', attrs={'class':'inhalt-table'})
,dict(name='table', attrs={'class':'blogtable' })
]
remove_tags = [
dict(name='table', attrs={'class':'img' })
,dict(name='img' , attrs={'src':'/tp/r4/icons/inline/extlink.gif'})
]
feeds = [(u'Telepolis Newsfeed', u'http://www.heise.de/tp/news.rdf')]

View File

@ -33,6 +33,7 @@ class TimesOnline(BasicNewsRecipe):
('Sports News', 'http://www.timesonline.co.uk/tol/feeds/rss/sport.xml'),
('Film News', 'http://www.timesonline.co.uk/tol/feeds/rss/film.xml'),
('Tech news', 'http://www.timesonline.co.uk/tol/feeds/rss/tech.xml'),
('Literary Supplement', 'http://www.timesonline.co.uk/tol/feeds/rss/thetls.xml'),
]
def print_version(self, url):

View File

@ -6,7 +6,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
tomshardware.com
'''
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.recipes import BasicNewsRecipe
class Tomshardware(BasicNewsRecipe):
@ -50,7 +49,7 @@ class Tomshardware(BasicNewsRecipe):
rmain, rsep, article_id = main.rpartition(',')
tmain, tsep, trest = rmain.rpartition('/reviews/')
if tsep:
return 'http://www.tomshardware.com/review_print.php?p1=' + article_id
return 'http://www.tomshardware.com/review_print.php?p1=' + article_id
return 'http://www.tomshardware.com/news_print.php?p1=' + article_id
def preprocess_html(self, soup):

View File

@ -0,0 +1,54 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetch tomshardware.
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class TomsHardwareDe(BasicNewsRecipe):
title = 'Tom\'s Hardware German'
description = 'Computer news in german'
__author__ = 'Oliver Niesner'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 50
no_stylesheets = True
encoding = 'utf-8'
#preprocess_regexps = \
# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
# [
# (r'<84>', lambda match: ''),
# (r'<93>', lambda match: ''),
# ]
# ]
remove_tags = [dict(id='outside-advert'),
dict(id='advertRightWhite'),
dict(id='header-advert'),
dict(id='header-banner'),
dict(id='header-menu'),
dict(id='header-top'),
dict(id='header-tools'),
dict(id='nbComment'),
dict(id='internalSidebar'),
dict(id='header-news-infos'),
dict(id='breadcrumbs'),
dict(id=''),
dict(name='div', attrs={'class':'pyjama'}),
dict(name='href', attrs={'class':'comment'}),
dict(name='div', attrs={'class':'greyBoxR clearfix'}),
dict(name='div', attrs={'class':'greyBoxL clearfix'}),
dict(name='div', attrs={'class':'greyBox clearfix'}),
dict(id='')]
#remove_tags_before = [dict(id='header-news-title')]
remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})]
#remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})]
feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ]

View File

@ -55,7 +55,7 @@ from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirt
# This class supports writing PDF files out, given pages produced by another
# class (typically {@link #PdfFileReader PdfFileReader}).
class PdfFileWriter(object):
def __init__(self):
def __init__(self,title=u"Unknown",author=u"Unknown"):
self._header = "%PDF-1.3"
self._objects = [] # array of indirect objects
@ -71,7 +71,9 @@ class PdfFileWriter(object):
# info object
info = DictionaryObject()
info.update({
NameObject("/Producer"): createStringObject(u"Python PDF Library - http://pybrary.net/pyPdf/")
NameObject("/Producer"): createStringObject(u"Python PDF Library - http://pybrary.net/pyPdf/"),
NameObject("/Author"): createStringObject(author),
NameObject("/Title"): createStringObject(title),
})
self._info = self._addObject(info)