Merge upstream changes

This commit is contained in:
Marshall T. Vandegrift 2009-01-17 11:17:48 -05:00
commit 7449870919
84 changed files with 28093 additions and 18947 deletions

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.4.126' __version__ = '0.4.128'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
''' '''
Various run time constants. Various run time constants.

View File

@ -43,7 +43,11 @@ def update_module(mod, path):
zp = os.path.join(os.path.dirname(sys.executable), 'library.zip') zp = os.path.join(os.path.dirname(sys.executable), 'library.zip')
elif isosx: elif isosx:
zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')), zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')),
'Resources', 'lib', 'python2.5', 'site-packages.zip') 'Resources', 'lib',
'python'+'.'.join(map(str, sys.version_info[:2])),
'site-packages.zip')
else:
zp = os.path.join(getattr(sys, 'frozen_path'), 'loader.zip')
if zp is not None: if zp is not None:
update_zipfile(zp, mod, path) update_zipfile(zp, mod, path)
else: else:

View File

@ -9,31 +9,26 @@ import os, fnmatch
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
class CYBOOKG3(USBMS): class CYBOOKG3(USBMS):
MIME_MAP = {
'mobi' : 'application/mobi',
'prc' : 'application/prc',
'html' : 'application/html',
'pdf' : 'application/pdf',
'rtf' : 'application/rtf',
'txt' : 'text/plain',
}
# Ordered list of supported formats # Ordered list of supported formats
FORMATS = MIME_MAP.keys() # Be sure these have an entry in calibre.devices.mime
FORMATS = ['mobi', 'prc', 'html', 'pdf', 'rtf', 'txt']
VENDOR_ID = 0x0bda VENDOR_ID = 0x0bda
PRODUCT_ID = 0x0703 PRODUCT_ID = 0x0703
BCD = [0x110, 0x132] BCD = [0x110, 0x132]
VENDOR_NAME = 'BOOKEEN' VENDOR_NAME = 'BOOKEEN'
PRODUCT_NAME = 'CYBOOK_GEN3' WINDOWS_MAIN_MEM = 'CYBOOK_GEN3__-FD'
WINDOWS_CARD_MEM = 'CYBOOK_GEN3__-SD'
OSX_NAME_MAIN_MEM = 'Bookeen Cybook Gen3 -FD Media' OSX_MAIN_MEM = 'Bookeen Cybook Gen3 -FD Media'
OSX_NAME_CARD_MEM = 'Bookeen Cybook Gen3 -SD Media' OSX_CARD_MEM = 'Bookeen Cybook Gen3 -SD Media'
MAIN_MEMORY_VOLUME_LABEL = 'Cybook Gen 3 Main Memory' MAIN_MEMORY_VOLUME_LABEL = 'Cybook Gen 3 Main Memory'
STORAGE_CARD_VOLUME_LABEL = 'Cybook Gen 3 Storage Card' STORAGE_CARD_VOLUME_LABEL = 'Cybook Gen 3 Storage Card'
EBOOK_DIR = "eBooks" EBOOK_DIR_MAIN = "eBooks"
SUPPORTS_SUB_DIRS = True
def delete_books(self, paths, end_session=True): def delete_books(self, paths, end_session=True):
for path in paths: for path in paths:
@ -52,3 +47,8 @@ class CYBOOKG3(USBMS):
for filen in fnmatch.filter(files, filename + "*.t2b"): for filen in fnmatch.filter(files, filename + "*.t2b"):
os.unlink(os.path.join(p, filen)) os.unlink(os.path.join(p, filen))
try:
os.removedirs(os.path.dirname(path))
except:
pass

View File

@ -41,6 +41,20 @@ class Device(object):
'''Return the FDI description of this device for HAL on linux.''' '''Return the FDI description of this device for HAL on linux.'''
return '' return ''
@classmethod
def can_handle(cls, device_info):
'''
Optional method to perform further checks on a device to see if this driver
is capable of handling it. If it is not it should return False. This method
is only called after the vendor, product ids and the bcd have matched, so
it can do some relatively time intensive checks. The default implementation
returns True.
:param device_info: On windows a device ID string. On Unix a tuple of
``(vendor_id, product_id, bcd)``.
'''
return True
def open(self): def open(self):
''' '''
Perform any device specific initialization. Called after the device is Perform any device specific initialization. Called after the device is
@ -109,7 +123,8 @@ class Device(object):
""" """
raise NotImplementedError() raise NotImplementedError()
def upload_books(self, files, names, on_card=False, end_session=True): def upload_books(self, files, names, on_card=False, end_session=True,
metadata=None):
''' '''
Upload a list of books to the device. If a file already Upload a list of books to the device. If a file already
exists on the device, it should be replaced. exists on the device, it should be replaced.
@ -121,6 +136,10 @@ class Device(object):
once uploaded to the device. len(names) == len(files) once uploaded to the device. len(names) == len(files)
@return: A list of 3-element tuples. The list is meant to be passed @return: A list of 3-element tuples. The list is meant to be passed
to L{add_books_to_metadata}. to L{add_books_to_metadata}.
@param metadata: If not None, it is a list of dictionaries. Each dictionary
will have at least the key tags to allow the driver to choose book location
based on tags. len(metadata) == len(files). If your device does not support
hierarchical ebook folders, you can safely ignore this parameter.
''' '''
raise NotImplementedError() raise NotImplementedError()

View File

@ -9,24 +9,30 @@ import os, fnmatch
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
class KINDLE(USBMS): class KINDLE(USBMS):
MIME_MAP = {
'azw' : 'application/azw',
'mobi' : 'application/mobi',
'prc' : 'application/prc',
'txt' : 'text/plain',
}
# Ordered list of supported formats # Ordered list of supported formats
FORMATS = MIME_MAP.keys() FORMATS = ['azw', 'mobi', 'prc', 'txt']
VENDOR_ID = 0x1949 VENDOR_ID = 0x1949
PRODUCT_ID = 0x0001 PRODUCT_ID = 0x0001
BCD = 0x399 BCD = [0x399]
VENDOR_NAME = 'AMAZON' VENDOR_NAME = 'AMAZON'
PRODUCT_NAME = 'KINDLE' WINDOWS_MAIN_MEM = 'KINDLE'
MAIN_MEMORY_VOLUME_LABEL = 'Kindle Main Memory' MAIN_MEMORY_VOLUME_LABEL = 'Kindle Main Memory'
STORAGE_CARD_VOLUME_LABEL = 'Kindle Storage Card' STORAGE_CARD_VOLUME_LABEL = 'Kindle Storage Card'
EBOOK_DIR = "documents" EBOOK_DIR_MAIN = "documents"
def delete_books(self, paths, end_session=True):
for path in paths:
if os.path.exists(path):
os.unlink(path)
filepath, ext = os.path.splitext(path)
basepath, filename = os.path.split(filepath)
# Delete the ebook auxiliary file
if os.path.exists(filepath + '.mbp'):
os.unlink(filepath + '.mbp')

View File

@ -0,0 +1,19 @@
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john at nachtimwald.com>'
'''
Global Mime mapping of ebook types.
'''
MIME_MAP = {
'azw' : 'application/azw',
'epub' : 'application/epub+zip',
'html' : 'text/html',
'lrf' : 'application/x-sony-bbeb',
'lrx' : 'application/x-sony-bbeb',
'mobi' : 'application/mobi',
'pdf' : 'application/pdf',
'prc' : 'application/prc',
'rtf' : 'application/rtf',
'txt' : 'text/plain',
}

View File

@ -841,7 +841,8 @@ class PRS500(Device):
self.upload_book_list(booklists[1], end_session=False) self.upload_book_list(booklists[1], end_session=False)
@safe @safe
def upload_books(self, files, names, on_card=False, end_session=True): def upload_books(self, files, names, on_card=False, end_session=True,
metadata=None):
card = self.card(end_session=False) card = self.card(end_session=False)
prefix = card + '/' + self.CARD_PATH_PREFIX +'/' if on_card else '/Data/media/books/' prefix = card + '/' + self.CARD_PATH_PREFIX +'/' if on_card else '/Data/media/books/'
if on_card and not self._exists(prefix)[0]: if on_card and not self._exists(prefix)[0]:

View File

@ -407,7 +407,8 @@ class PRS505(Device):
if not os.path.isdir(path): if not os.path.isdir(path):
os.utime(path, None) os.utime(path, None)
def upload_books(self, files, names, on_card=False, end_session=True): def upload_books(self, files, names, on_card=False, end_session=True,
metadata=None):
if on_card and not self._card_prefix: if on_card and not self._card_prefix:
raise ValueError(_('The reader has no storage card connected.')) raise ValueError(_('The reader has no storage card connected.'))
path = os.path.join(self._card_prefix, self.CARD_PATH_PREFIX) if on_card \ path = os.path.join(self._card_prefix, self.CARD_PATH_PREFIX) if on_card \

View File

@ -60,14 +60,17 @@ class DeviceScanner(object):
def is_device_connected(self, device): def is_device_connected(self, device):
if iswindows: if iswindows:
vid, pid = 'vid_%4.4x'%device.VENDOR_ID, 'pid_%4.4x'%device.PRODUCT_ID vid, pid = 'vid_%4.4x'%device.VENDOR_ID, 'pid_%4.4x'%device.PRODUCT_ID
vidd, pidd = 'vid_%i'%device.VENDOR_ID, 'pid_%i'%device.PRODUCT_ID
for device_id in self.devices: for device_id in self.devices:
if vid in device_id and pid in device_id: if (vid in device_id or vidd in device_id) and (pid in device_id or pidd in device_id):
if self.test_bcd_windows(device_id, getattr(device, 'BCD', None)): if self.test_bcd_windows(device_id, getattr(device, 'BCD', None)):
if device.can_handle(device_id):
return True return True
else: else:
for vendor, product, bcdDevice in self.devices: for vendor, product, bcdDevice in self.devices:
if device.VENDOR_ID == vendor and device.PRODUCT_ID == product: if device.VENDOR_ID == vendor and device.PRODUCT_ID == product:
if self.test_bcd(bcdDevice, getattr(device, 'BCD', None)): if self.test_bcd(bcdDevice, getattr(device, 'BCD', None)):
if device.can_handle((vendor, product, bcdDevice)):
return True return True
return False return False

View File

@ -6,7 +6,7 @@ intended to be subclassed with the relevant parts implemented for a particular
device. This class handles devive detection. device. This class handles devive detection.
''' '''
import os, time import os, subprocess, time
from calibre.devices.interface import Device as _Device from calibre.devices.interface import Device as _Device
from calibre.devices.errors import DeviceError from calibre.devices.errors import DeviceError
@ -23,11 +23,12 @@ class Device(_Device):
PRODUCT_ID = 0x0 PRODUCT_ID = 0x0
BCD = None BCD = None
VENDOR_NAME = '' VENDOR_NAME = None
PRODUCT_NAME = '' WINDOWS_MAIN_MEM = None
WINDOWS_CARD_MEM = None
OSX_NAME_MAIN_MEM = '' OSX_MAIN_MEM = None
OSX_NAME_CARD_MEM = '' OSX_CARD_MEM = None
MAIN_MEMORY_VOLUME_LABEL = '' MAIN_MEMORY_VOLUME_LABEL = ''
STORAGE_CARD_VOLUME_LABEL = '' STORAGE_CARD_VOLUME_LABEL = ''
@ -148,43 +149,47 @@ class Device(_Device):
return (msz, 0, csz) return (msz, 0, csz)
@classmethod def windows_match_device(self, pnp_id, device_id):
def windows_match_device(cls, device_id): pnp_id = pnp_id.upper()
if device_id and pnp_id is not None:
device_id = device_id.upper() device_id = device_id.upper()
if 'VEN_'+cls.VENDOR_NAME in device_id and \
'PROD_'+cls.PRODUCT_NAME in device_id: if 'VEN_' + self.VENDOR_NAME in pnp_id and 'PROD_' + device_id in pnp_id:
return True
vid, pid = hex(cls.VENDOR_ID)[2:], hex(cls.PRODUCT_ID)[2:]
while len(vid) < 4: vid = '0' + vid
while len(pid) < 4: pid = '0' + pid
if 'VID_'+vid in device_id and 'PID_'+pid in device_id:
return True return True
return False return False
# This only supports Windows >= 2000 def windows_get_drive_prefix(self, drive):
def open_windows(self): prefix = None
drives = []
wmi = __import__('wmi', globals(), locals(), [], -1)
c = wmi.WMI()
for drive in c.Win32_DiskDrive():
if self.__class__.windows_match_device(str(drive.PNPDeviceID)):
if drive.Partitions == 0:
continue
try: try:
partition = drive.associators("Win32_DiskDriveToDiskPartition")[0] partition = drive.associators("Win32_DiskDriveToDiskPartition")[0]
logical_disk = partition.associators('Win32_LogicalDiskToPartition')[0] logical_disk = partition.associators('Win32_LogicalDiskToPartition')[0]
prefix = logical_disk.DeviceID+os.sep prefix = logical_disk.DeviceID + os.sep
drives.append((drive.Index, prefix))
except IndexError: except IndexError:
continue pass
return prefix
def open_windows(self):
drives = {}
wmi = __import__('wmi', globals(), locals(), [], -1)
c = wmi.WMI()
for drive in c.Win32_DiskDrive():
if self.windows_match_device(str(drive.PNPDeviceID), WINDOWS_MAIN_MEM):
drives['main'] = self.windows_get_drive_prefix(drive)
elif self.windows_match_device(str(drive.PNPDeviceID), WINDOWS_CARD_MEM):
drives['card'] = self.windows_get_drive_prefix(drive)
if 'main' and 'card' in drives.keys():
break
if not drives: if not drives:
raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.')%self.__class__.__name__) raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.') % self.__class__.__name__)
drives.sort(cmp=lambda a, b: cmp(a[0], b[0])) self._main_prefix = drives['main'] if 'main' in names.keys() else None
self._main_prefix = drives[0][1] self._card_prefix = drives['card'] if 'card' in names.keys() else None
if len(drives) > 1:
self._card_prefix = drives[1][1]
@classmethod @classmethod
def get_osx_mountpoints(self, raw=None): def get_osx_mountpoints(self, raw=None):
@ -207,9 +212,9 @@ class Device(_Device):
break break
for i, line in enumerate(lines): for i, line in enumerate(lines):
if line.strip().endswith('<class IOMedia>') and self.OSX_NAME_MAIN_MEM in line: if self.OSX_MAIN_MEM is not None and line.strip().endswith('<class IOMedia>') and self.OSX_MAIN_MEM in line:
get_dev_node(lines[i+1:], 'main') get_dev_node(lines[i+1:], 'main')
if line.strip().endswith('<class IOMedia>') and self.OSX_NAME_CARD_MEM in line: if self.OSX_CARD_MEM is not None and line.strip().endswith('<class IOMedia>') and self.OSX_CARD_MEM in line:
get_dev_node(lines[i+1:], 'card') get_dev_node(lines[i+1:], 'card')
if len(names.keys()) == 2: if len(names.keys()) == 2:
break break

View File

@ -12,11 +12,13 @@ from itertools import cycle
from calibre.devices.usbms.device import Device from calibre.devices.usbms.device import Device
from calibre.devices.usbms.books import BookList, Book from calibre.devices.usbms.books import BookList, Book
from calibre.devices.errors import FreeSpaceError from calibre.devices.errors import FreeSpaceError
from calibre.devices.mime import MIME_MAP
class USBMS(Device): class USBMS(Device):
EBOOK_DIR = ''
MIME_MAP = {}
FORMATS = [] FORMATS = []
EBOOK_DIR_MAIN = ''
EBOOK_DIR_CARD = ''
SUPPORTS_SUB_DIRS = False
def __init__(self, key='-1', log_packets=False, report_progress=None): def __init__(self, key='-1', log_packets=False, report_progress=None):
pass pass
@ -35,27 +37,37 @@ class USBMS(Device):
return bl return bl
prefix = self._card_prefix if oncard else self._main_prefix prefix = self._card_prefix if oncard else self._main_prefix
ebook_dir = self.EBOOK_DIR_CARD if oncard else self.EBOOK_DIR_MAIN
# Get all books in all directories under the root EBOOK_DIR directory # Get all books in all directories under the root ebook_dir directory
for path, dirs, files in os.walk(os.path.join(prefix, self.EBOOK_DIR)): for path, dirs, files in os.walk(os.path.join(prefix, ebook_dir)):
# Filter out anything that isn't in the list of supported ebook types # Filter out anything that isn't in the list of supported ebook types
for book_type in self.MIME_MAP.keys(): for book_type in self.FORMATS:
for filename in fnmatch.filter(files, '*.%s' % (book_type)): for filename in fnmatch.filter(files, '*.%s' % (book_type)):
title, author, mime = self.__class__.extract_book_metadata_by_filename(filename) title, author, mime = self.__class__.extract_book_metadata_by_filename(filename)
bl.append(Book(os.path.join(path, filename), title, author, mime)) bl.append(Book(os.path.join(path, filename), title, author, mime))
return bl return bl
def upload_books(self, files, names, on_card=False, end_session=True): def upload_books(self, files, names, on_card=False, end_session=True,
metadata=None):
if on_card and not self._card_prefix: if on_card and not self._card_prefix:
raise ValueError(_('The reader has no storage card connected.')) raise ValueError(_('The reader has no storage card connected.'))
if not on_card: if not on_card:
path = os.path.join(self._main_prefix, self.EBOOK_DIR) path = os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN)
else: else:
path = os.path.join(self._card_prefix, self.EBOOK_DIR) path = os.path.join(self._card_prefix, self.EBOOK_DIR_CARD)
sizes = map(os.path.getsize, files) def get_size(obj):
if hasattr(obj, 'seek'):
obj.seek(0, os.SEEK_END)
size = obj.tell()
obj.seek(0)
return size
return os.path.getsize(obj)
sizes = map(get_size, files)
size = sum(sizes) size = sum(sizes)
if on_card and size > self.free_space()[2] - 1024*1024: if on_card and size > self.free_space()[2] - 1024*1024:
@ -65,11 +77,36 @@ class USBMS(Device):
paths = [] paths = []
names = iter(names) names = iter(names)
metadata = iter(metadata)
for infile in files: for infile in files:
filepath = os.path.join(path, names.next()) newpath = path
if self.SUPPORTS_SUB_DIRS:
mdata = metadata.next()
if 'tags' in mdata.keys():
for tag in mdata['tags']:
if tag.startswith('/'):
newpath += tag
newpath = os.path.normpath(newpath)
break
if not os.path.exists(newpath):
os.makedirs(newpath)
filepath = os.path.join(newpath, names.next())
paths.append(filepath) paths.append(filepath)
if hasattr(infile, 'read'):
infile.seek(0)
dest = open(filepath, 'wb')
shutil.copyfileobj(infile, dest, 10*1024*1024)
dest.flush()
dest.close()
else:
shutil.copy2(infile, filepath) shutil.copy2(infile, filepath)
return zip(paths, cycle([on_card])) return zip(paths, cycle([on_card]))
@ -88,6 +125,10 @@ class USBMS(Device):
if os.path.exists(path): if os.path.exists(path):
# Delete the ebook # Delete the ebook
os.unlink(path) os.unlink(path)
try:
os.removedirs(os.path.dirname(path))
except:
pass
@classmethod @classmethod
def remove_books_from_metadata(cls, paths, booklists): def remove_books_from_metadata(cls, paths, booklists):
@ -96,7 +137,6 @@ class USBMS(Device):
for book in bl: for book in bl:
if path.endswith(book.path): if path.endswith(book.path):
bl.remove(book) bl.remove(book)
break
def sync_booklists(self, booklists, end_session=True): def sync_booklists(self, booklists, end_session=True):
# There is no meta data on the device to update. The device is treated # There is no meta data on the device to update. The device is treated
@ -136,9 +176,10 @@ class USBMS(Device):
else: else:
book_title = os.path.splitext(filename)[0].replace('_', ' ') book_title = os.path.splitext(filename)[0].replace('_', ' ')
fileext = os.path.splitext(filename)[1] fileext = os.path.splitext(filename)[1][1:]
if fileext in cls.MIME_MAP.keys():
book_mime = cls.MIME_MAP[fileext] if fileext in cls.FORMATS:
book_mime = MIME_MAP[fileext] if fileext in MIME_MAP.keys() else 'Unknown'
return book_title, book_author, book_mime return book_title, book_author, book_mime

View File

@ -67,6 +67,7 @@ def txt2opf(path, tdir, opts):
def pdf2opf(path, tdir, opts): def pdf2opf(path, tdir, opts):
from calibre.ebooks.lrf.pdf.convert_from import generate_html from calibre.ebooks.lrf.pdf.convert_from import generate_html
generate_html(path, tdir) generate_html(path, tdir)
opts.dont_split_on_page_breaks = True
return os.path.join(tdir, 'metadata.opf') return os.path.join(tdir, 'metadata.opf')
def epub2opf(path, tdir, opts): def epub2opf(path, tdir, opts):

View File

@ -77,6 +77,8 @@ def check_links(opf_path, pretty_print):
html_files.append(os.path.abspath(content(f))) html_files.append(os.path.abspath(content(f)))
for path in html_files: for path in html_files:
if not os.access(path, os.R_OK):
continue
base = os.path.dirname(path) base = os.path.dirname(path)
root = html.fromstring(open(content(path), 'rb').read(), parser=parser) root = html.fromstring(open(content(path), 'rb').read(), parser=parser)
for element, attribute, link, pos in list(root.iterlinks()): for element, attribute, link, pos in list(root.iterlinks()):

View File

@ -335,7 +335,7 @@ class PreProcessor(object):
# Fix pdftohtml markup # Fix pdftohtml markup
PDFTOHTML = [ PDFTOHTML = [
# Remove <hr> tags # Remove <hr> tags
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'), (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
# Remove page numbers # Remove page numbers
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''), (re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
# Remove <br> and replace <br><br> with <p> # Remove <br> and replace <br><br> with <p>
@ -560,7 +560,7 @@ class Processor(Parser):
hr = etree.Element('hr') hr = etree.Element('hr')
if elem.getprevious() is None: if elem.getprevious() is None:
elem.getparent()[:0] = [hr] elem.getparent()[:0] = [hr]
else: elif elem.getparent() is not None:
insert = None insert = None
for i, c in enumerate(elem.getparent()): for i, c in enumerate(elem.getparent()):
if c is elem: if c is elem:
@ -796,7 +796,19 @@ class Processor(Parser):
setting = '' setting = ''
face = font.attrib.pop('face', None) face = font.attrib.pop('face', None)
if face is not None: if face is not None:
setting += 'font-face:%s;'%face faces = []
for face in face.split(','):
face = face.strip()
if ' ' in face and not (face[0] == face[-1] == '"'):
face = '"%s"' % face.replace('"', r'\"')
faces.append(face)
for generic in ('serif', 'sans-serif', 'monospace'):
if generic in faces:
break
else:
faces.append('serif')
family = ', '.join(faces)
setting += 'font-family: %s;' % family
color = font.attrib.pop('color', None) color = font.attrib.pop('color', None)
if color is not None: if color is not None:
setting += 'color:%s'%color setting += 'color:%s'%color

View File

@ -7,24 +7,20 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
'and Marshall T. Vandegrift <llasram@gmail.com>' 'and Marshall T. Vandegrift <llasram@gmail.com>'
import sys, struct, os import sys, struct, cStringIO, os
import functools import functools
import re import re
from urlparse import urldefrag from urlparse import urldefrag
from cStringIO import StringIO
from urllib import unquote as urlunquote
from lxml import etree from lxml import etree
from calibre.ebooks.lit import LitError from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
import calibre.ebooks.lit.mssha1 as mssha1 import calibre.ebooks.lit.mssha1 as mssha1
from calibre.ebooks.oeb.base import XML_PARSER, urlnormalize from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks import DRMError from calibre.ebooks import DRMError
from calibre import plugins from calibre import plugins
lzx, lxzerror = plugins['lzx'] lzx, lxzerror = plugins['lzx']
msdes, msdeserror = plugins['msdes'] msdes, msdeserror = plugins['msdes']
__all__ = ["LitReader"]
XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?> XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
""" """
OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?> OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
@ -112,9 +108,6 @@ def consume_sized_utf8_string(bytes, zpad=False):
pos += 1 pos += 1
return u''.join(result), bytes[pos:] return u''.join(result), bytes[pos:]
def encode(string):
return unicode(string).encode('ascii', 'xmlcharrefreplace')
class UnBinary(object): class UnBinary(object):
AMPERSAND_RE = re.compile( AMPERSAND_RE = re.compile(
r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)') r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
@ -125,13 +118,13 @@ class UnBinary(object):
def __init__(self, bin, path, manifest={}, map=HTML_MAP): def __init__(self, bin, path, manifest={}, map=HTML_MAP):
self.manifest = manifest self.manifest = manifest
self.tag_map, self.attr_map, self.tag_to_attr_map = map self.tag_map, self.attr_map, self.tag_to_attr_map = map
self.is_html = map is HTML_MAP self.opf = map is OPF_MAP
self.bin = bin
self.dir = os.path.dirname(path) self.dir = os.path.dirname(path)
buf = StringIO() self.buf = cStringIO.StringIO()
self.binary_to_text(bin, buf) self.binary_to_text()
self.raw = buf.getvalue().lstrip() self.raw = self.buf.getvalue().lstrip().decode('utf-8')
self.escape_reserved() self.escape_reserved()
self._tree = None
def escape_reserved(self): def escape_reserved(self):
raw = self.raw raw = self.raw
@ -158,28 +151,18 @@ class UnBinary(object):
return '/'.join(relpath) return '/'.join(relpath)
def __unicode__(self): def __unicode__(self):
return self.raw.decode('utf-8')
def __str__(self):
return self.raw return self.raw
def tree(): def binary_to_text(self, base=0, depth=0):
def fget(self):
if not self._tree:
self._tree = etree.fromstring(self.raw, parser=XML_PARSER)
return self._tree
return property(fget=fget)
tree = tree()
def binary_to_text(self, bin, buf, index=0, depth=0):
tag_name = current_map = None tag_name = current_map = None
dynamic_tag = errors = 0 dynamic_tag = errors = 0
in_censorship = is_goingdown = False in_censorship = is_goingdown = False
state = 'text' state = 'text'
index = base
flags = 0 flags = 0
while index < len(bin): while index < len(self.bin):
c, index = read_utf8_char(bin, index) c, index = read_utf8_char(self.bin, index)
oc = ord(c) oc = ord(c)
if state == 'text': if state == 'text':
@ -192,7 +175,7 @@ class UnBinary(object):
c = '>>' c = '>>'
elif c == '<': elif c == '<':
c = '<<' c = '<<'
buf.write(encode(c)) self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
elif state == 'get flags': elif state == 'get flags':
if oc == 0: if oc == 0:
@ -205,7 +188,7 @@ class UnBinary(object):
state = 'text' if oc == 0 else 'get attr' state = 'text' if oc == 0 else 'get attr'
if flags & FLAG_OPENING: if flags & FLAG_OPENING:
tag = oc tag = oc
buf.write('<') self.buf.write('<')
if not (flags & FLAG_CLOSING): if not (flags & FLAG_CLOSING):
is_goingdown = True is_goingdown = True
if tag == 0x8000: if tag == 0x8000:
@ -222,7 +205,7 @@ class UnBinary(object):
tag_name = '?'+unichr(tag)+'?' tag_name = '?'+unichr(tag)+'?'
current_map = self.tag_to_attr_map[tag] current_map = self.tag_to_attr_map[tag]
print 'WARNING: tag %s unknown' % unichr(tag) print 'WARNING: tag %s unknown' % unichr(tag)
buf.write(encode(tag_name)) self.buf.write(unicode(tag_name).encode('utf-8'))
elif flags & FLAG_CLOSING: elif flags & FLAG_CLOSING:
if depth == 0: if depth == 0:
raise LitError('Extra closing tag') raise LitError('Extra closing tag')
@ -234,14 +217,15 @@ class UnBinary(object):
if not is_goingdown: if not is_goingdown:
tag_name = None tag_name = None
dynamic_tag = 0 dynamic_tag = 0
buf.write(' />') self.buf.write(' />')
else: else:
buf.write('>') self.buf.write('>')
index = self.binary_to_text(bin, buf, index, depth+1) index = self.binary_to_text(base=index, depth=depth+1)
is_goingdown = False is_goingdown = False
if not tag_name: if not tag_name:
raise LitError('Tag ends before it begins.') raise LitError('Tag ends before it begins.')
buf.write(encode(u''.join(('</', tag_name, '>')))) self.buf.write(u''.join(
('</', tag_name, '>')).encode('utf-8'))
dynamic_tag = 0 dynamic_tag = 0
tag_name = None tag_name = None
state = 'text' state = 'text'
@ -261,7 +245,7 @@ class UnBinary(object):
in_censorship = True in_censorship = True
state = 'get value length' state = 'get value length'
continue continue
buf.write(' ' + encode(attr) + '=') self.buf.write(' ' + unicode(attr).encode('utf-8') + '=')
if attr in ['href', 'src']: if attr in ['href', 'src']:
state = 'get href length' state = 'get href length'
else: else:
@ -269,39 +253,40 @@ class UnBinary(object):
elif state == 'get value length': elif state == 'get value length':
if not in_censorship: if not in_censorship:
buf.write('"') self.buf.write('"')
count = oc - 1 count = oc - 1
if count == 0: if count == 0:
if not in_censorship: if not in_censorship:
buf.write('"') self.buf.write('"')
in_censorship = False in_censorship = False
state = 'get attr' state = 'get attr'
continue continue
state = 'get value' state = 'get value'
if oc == 0xffff: if oc == 0xffff:
continue continue
if count < 0 or count > (len(bin) - index): if count < 0 or count > (len(self.bin) - index):
raise LitError('Invalid character count %d' % count) raise LitError('Invalid character count %d' % count)
elif state == 'get value': elif state == 'get value':
if count == 0xfffe: if count == 0xfffe:
if not in_censorship: if not in_censorship:
buf.write('%s"' % (oc - 1)) self.buf.write('%s"' % (oc - 1))
in_censorship = False in_censorship = False
state = 'get attr' state = 'get attr'
elif count > 0: elif count > 0:
if not in_censorship: if not in_censorship:
buf.write(encode(c)) self.buf.write(c.encode(
'ascii', 'xmlcharrefreplace'))
count -= 1 count -= 1
if count == 0: if count == 0:
if not in_censorship: if not in_censorship:
buf.write('"') self.buf.write('"')
in_censorship = False in_censorship = False
state = 'get attr' state = 'get attr'
elif state == 'get custom length': elif state == 'get custom length':
count = oc - 1 count = oc - 1
if count <= 0 or count > len(bin)-index: if count <= 0 or count > len(self.bin)-index:
raise LitError('Invalid character count %d' % count) raise LitError('Invalid character count %d' % count)
dynamic_tag += 1 dynamic_tag += 1
state = 'get custom' state = 'get custom'
@ -311,26 +296,26 @@ class UnBinary(object):
tag_name += c tag_name += c
count -= 1 count -= 1
if count == 0: if count == 0:
buf.write(encode(tag_name)) self.buf.write(unicode(tag_name).encode('utf-8'))
state = 'get attr' state = 'get attr'
elif state == 'get attr length': elif state == 'get attr length':
count = oc - 1 count = oc - 1
if count <= 0 or count > (len(bin) - index): if count <= 0 or count > (len(self.bin) - index):
raise LitError('Invalid character count %d' % count) raise LitError('Invalid character count %d' % count)
buf.write(' ') self.buf.write(' ')
state = 'get custom attr' state = 'get custom attr'
elif state == 'get custom attr': elif state == 'get custom attr':
buf.write(encode(c)) self.buf.write(unicode(c).encode('utf-8'))
count -= 1 count -= 1
if count == 0: if count == 0:
buf.write('=') self.buf.write('=')
state = 'get value length' state = 'get value length'
elif state == 'get href length': elif state == 'get href length':
count = oc - 1 count = oc - 1
if count <= 0 or count > (len(bin) - index): if count <= 0 or count > (len(self.bin) - index):
raise LitError('Invalid character count %d' % count) raise LitError('Invalid character count %d' % count)
href = '' href = ''
state = 'get href' state = 'get href'
@ -344,11 +329,10 @@ class UnBinary(object):
if frag: if frag:
path = '#'.join((path, frag)) path = '#'.join((path, frag))
path = urlnormalize(path) path = urlnormalize(path)
buf.write(encode(u'"%s"' % path)) self.buf.write((u'"%s"' % path).encode('utf-8'))
state = 'get attr' state = 'get attr'
return index return index
class DirectoryEntry(object): class DirectoryEntry(object):
def __init__(self, name, section, offset, size): def __init__(self, name, section, offset, size):
self.name = name self.name = name
@ -363,7 +347,6 @@ class DirectoryEntry(object):
def __str__(self): def __str__(self):
return repr(self) return repr(self)
class ManifestItem(object): class ManifestItem(object):
def __init__(self, original, internal, mime_type, offset, root, state): def __init__(self, original, internal, mime_type, offset, root, state):
self.original = original self.original = original
@ -391,87 +374,65 @@ class ManifestItem(object):
% (self.internal, self.path, self.mime_type, self.offset, % (self.internal, self.path, self.mime_type, self.offset,
self.root, self.state) self.root, self.state)
def preserve(function): def preserve(function):
def wrapper(self, *args, **kwargs): def wrapper(self, *args, **kwargs):
opos = self.stream.tell() opos = self._stream.tell()
try: try:
return function(self, *args, **kwargs) return function(self, *args, **kwargs)
finally: finally:
self.stream.seek(opos) self._stream.seek(opos)
functools.update_wrapper(wrapper, function) functools.update_wrapper(wrapper, function)
return wrapper return wrapper
class LitFile(object): class LitReader(object):
PIECE_SIZE = 16 PIECE_SIZE = 16
XML_PARSER = etree.XMLParser(
def __init__(self, filename_or_stream): recover=True, resolve_entities=False)
if hasattr(filename_or_stream, 'read'):
self.stream = filename_or_stream
else:
self.stream = open(filename_or_stream, 'rb')
try:
self.opf_path = os.path.splitext(
os.path.basename(self.stream.name))[0] + '.opf'
except AttributeError:
self.opf_path = 'content.opf'
if self.magic != 'ITOLITLS':
raise LitError('Not a valid LIT file')
if self.version != 1:
raise LitError('Unknown LIT version %d' % (self.version,))
self.read_secondary_header()
self.read_header_pieces()
self.read_section_names()
self.read_manifest()
self.read_drm()
def warn(self, msg):
print "WARNING: %s" % (msg,)
def magic(): def magic():
@preserve @preserve
def fget(self): def fget(self):
self.stream.seek(0) self._stream.seek(0)
return self.stream.read(8) return self._stream.read(8)
return property(fget=fget) return property(fget=fget)
magic = magic() magic = magic()
def version(): def version():
def fget(self): def fget(self):
self.stream.seek(8) self._stream.seek(8)
return u32(self.stream.read(4)) return u32(self._stream.read(4))
return property(fget=fget) return property(fget=fget)
version = version() version = version()
def hdr_len(): def hdr_len():
@preserve @preserve
def fget(self): def fget(self):
self.stream.seek(12) self._stream.seek(12)
return int32(self.stream.read(4)) return int32(self._stream.read(4))
return property(fget=fget) return property(fget=fget)
hdr_len = hdr_len() hdr_len = hdr_len()
def num_pieces(): def num_pieces():
@preserve @preserve
def fget(self): def fget(self):
self.stream.seek(16) self._stream.seek(16)
return int32(self.stream.read(4)) return int32(self._stream.read(4))
return property(fget=fget) return property(fget=fget)
num_pieces = num_pieces() num_pieces = num_pieces()
def sec_hdr_len(): def sec_hdr_len():
@preserve @preserve
def fget(self): def fget(self):
self.stream.seek(20) self._stream.seek(20)
return int32(self.stream.read(4)) return int32(self._stream.read(4))
return property(fget=fget) return property(fget=fget)
sec_hdr_len = sec_hdr_len() sec_hdr_len = sec_hdr_len()
def guid(): def guid():
@preserve @preserve
def fget(self): def fget(self):
self.stream.seek(24) self._stream.seek(24)
return self.stream.read(16) return self._stream.read(16)
return property(fget=fget) return property(fget=fget)
guid = guid() guid = guid()
@ -481,27 +442,44 @@ class LitFile(object):
size = self.hdr_len \ size = self.hdr_len \
+ (self.num_pieces * self.PIECE_SIZE) \ + (self.num_pieces * self.PIECE_SIZE) \
+ self.sec_hdr_len + self.sec_hdr_len
self.stream.seek(0) self._stream.seek(0)
return self.stream.read(size) return self._stream.read(size)
return property(fget=fget) return property(fget=fget)
header = header() header = header()
def __init__(self, filename_or_stream):
if hasattr(filename_or_stream, 'read'):
self._stream = filename_or_stream
else:
self._stream = open(filename_or_stream, 'rb')
if self.magic != 'ITOLITLS':
raise LitError('Not a valid LIT file')
if self.version != 1:
raise LitError('Unknown LIT version %d' % (self.version,))
self.entries = {}
self._read_secondary_header()
self._read_header_pieces()
self._read_section_names()
self._read_manifest()
self._read_meta()
self._read_drm()
@preserve @preserve
def __len__(self): def __len__(self):
self.stream.seek(0, 2) self._stream.seek(0, 2)
return self.stream.tell() return self._stream.tell()
@preserve @preserve
def read_raw(self, offset, size): def _read_raw(self, offset, size):
self.stream.seek(offset) self._stream.seek(offset)
return self.stream.read(size) return self._stream.read(size)
def read_content(self, offset, size): def _read_content(self, offset, size):
return self.read_raw(self.content_offset + offset, size) return self._read_raw(self.content_offset + offset, size)
def read_secondary_header(self): def _read_secondary_header(self):
offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE) offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
bytes = self.read_raw(offset, self.sec_hdr_len) bytes = self._read_raw(offset, self.sec_hdr_len)
offset = int32(bytes[4:]) offset = int32(bytes[4:])
while offset < len(bytes): while offset < len(bytes):
blocktype = bytes[offset:offset+4] blocktype = bytes[offset:offset+4]
@ -529,21 +507,21 @@ class LitFile(object):
if not hasattr(self, 'content_offset'): if not hasattr(self, 'content_offset'):
raise LitError('Could not figure out the content offset') raise LitError('Could not figure out the content offset')
def read_header_pieces(self): def _read_header_pieces(self):
src = self.header[self.hdr_len:] src = self.header[self.hdr_len:]
for i in xrange(self.num_pieces): for i in xrange(self.num_pieces):
piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE] piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0: if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
raise LitError('Piece %s has 64bit value' % repr(piece)) raise LitError('Piece %s has 64bit value' % repr(piece))
offset, size = u32(piece), int32(piece[8:]) offset, size = u32(piece), int32(piece[8:])
piece = self.read_raw(offset, size) piece = self._read_raw(offset, size)
if i == 0: if i == 0:
continue # Dont need this piece continue # Dont need this piece
elif i == 1: elif i == 1:
if u32(piece[8:]) != self.entry_chunklen or \ if u32(piece[8:]) != self.entry_chunklen or \
u32(piece[12:]) != self.entry_unknown: u32(piece[12:]) != self.entry_unknown:
raise LitError('Secondary header does not match piece') raise LitError('Secondary header does not match piece')
self.read_directory(piece) self._read_directory(piece)
elif i == 2: elif i == 2:
if u32(piece[8:]) != self.count_chunklen or \ if u32(piece[8:]) != self.count_chunklen or \
u32(piece[12:]) != self.count_unknown: u32(piece[12:]) != self.count_unknown:
@ -554,13 +532,12 @@ class LitFile(object):
elif i == 4: elif i == 4:
self.piece4_guid = piece self.piece4_guid = piece
def read_directory(self, piece): def _read_directory(self, piece):
if not piece.startswith('IFCM'): if not piece.startswith('IFCM'):
raise LitError('Header piece #1 is not main directory.') raise LitError('Header piece #1 is not main directory.')
chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28]) chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
if (32 + (num_chunks * chunk_size)) != len(piece): if (32 + (num_chunks * chunk_size)) != len(piece):
raise LitError('IFCM header has incorrect length') raise LitError('IFCM HEADER has incorrect length')
self.entries = {}
for i in xrange(num_chunks): for i in xrange(num_chunks):
offset = 32 + (i * chunk_size) offset = 32 + (i * chunk_size)
chunk = piece[offset:offset + chunk_size] chunk = piece[offset:offset + chunk_size]
@ -594,17 +571,17 @@ class LitFile(object):
entry = DirectoryEntry(name, section, offset, size) entry = DirectoryEntry(name, section, offset, size)
self.entries[name] = entry self.entries[name] = entry
def read_section_names(self): def _read_section_names(self):
if '::DataSpace/NameList' not in self.entries: if '::DataSpace/NameList' not in self.entries:
raise LitError('Lit file does not have a valid NameList') raise LitError('Lit file does not have a valid NameList')
raw = self.get_file('::DataSpace/NameList') raw = self.get_file('::DataSpace/NameList')
if len(raw) < 4: if len(raw) < 4:
raise LitError('Invalid Namelist section') raise LitError('Invalid Namelist section')
pos = 4 pos = 4
num_sections = u16(raw[2:pos]) self.num_sections = u16(raw[2:pos])
self.section_names = [""] * num_sections self.section_names = [""]*self.num_sections
self.section_data = [None] * num_sections self.section_data = [None]*self.num_sections
for section in xrange(num_sections): for section in xrange(self.num_sections):
size = u16(raw[pos:pos+2]) size = u16(raw[pos:pos+2])
pos += 2 pos += 2
size = size*2 + 2 size = size*2 + 2
@ -614,12 +591,11 @@ class LitFile(object):
raw[pos:pos+size].decode('utf-16-le').rstrip('\000') raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
pos += size pos += size
def read_manifest(self): def _read_manifest(self):
if '/manifest' not in self.entries: if '/manifest' not in self.entries:
raise LitError('Lit file does not have a valid manifest') raise LitError('Lit file does not have a valid manifest')
raw = self.get_file('/manifest') raw = self.get_file('/manifest')
self.manifest = {} self.manifest = {}
self.paths = {self.opf_path: None}
while raw: while raw:
slen, raw = ord(raw[0]), raw[1:] slen, raw = ord(raw[0]), raw[1:]
if slen == 0: break if slen == 0: break
@ -658,9 +634,28 @@ class LitFile(object):
for item in mlist: for item in mlist:
if item.path[0] == '/': if item.path[0] == '/':
item.path = os.path.basename(item.path) item.path = os.path.basename(item.path)
self.paths[item.path] = item
def read_drm(self): def _pretty_print(self, xml):
f = cStringIO.StringIO(xml.encode('utf-8'))
doc = etree.parse(f, parser=self.XML_PARSER)
pretty = etree.tostring(doc, encoding='ascii', pretty_print=True)
return XML_DECL + unicode(pretty)
def _read_meta(self):
path = 'content.opf'
raw = self.get_file('/meta')
xml = OPF_DECL
try:
xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
except LitError:
if 'PENGUIN group' not in raw: raise
print "WARNING: attempting PENGUIN malformed OPF fix"
raw = raw.replace(
'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
self.meta = xml
def _read_drm(self):
self.drmlevel = 0 self.drmlevel = 0
if '/DRMStorage/Licenses/EUL' in self.entries: if '/DRMStorage/Licenses/EUL' in self.entries:
self.drmlevel = 5 self.drmlevel = 5
@ -671,7 +666,7 @@ class LitFile(object):
else: else:
return return
if self.drmlevel < 5: if self.drmlevel < 5:
msdes.deskey(self.calculate_deskey(), msdes.DE1) msdes.deskey(self._calculate_deskey(), msdes.DE1)
bookkey = msdes.des(self.get_file('/DRMStorage/DRMSealed')) bookkey = msdes.des(self.get_file('/DRMStorage/DRMSealed'))
if bookkey[0] != '\000': if bookkey[0] != '\000':
raise LitError('Unable to decrypt title key!') raise LitError('Unable to decrypt title key!')
@ -679,7 +674,7 @@ class LitFile(object):
else: else:
raise DRMError("Cannot access DRM-protected book") raise DRMError("Cannot access DRM-protected book")
def calculate_deskey(self): def _calculate_deskey(self):
hashfiles = ['/meta', '/DRMStorage/DRMSource'] hashfiles = ['/meta', '/DRMStorage/DRMSource']
if self.drmlevel == 3: if self.drmlevel == 3:
hashfiles.append('/DRMStorage/DRMBookplate') hashfiles.append('/DRMStorage/DRMBookplate')
@ -703,18 +698,18 @@ class LitFile(object):
def get_file(self, name): def get_file(self, name):
entry = self.entries[name] entry = self.entries[name]
if entry.section == 0: if entry.section == 0:
return self.read_content(entry.offset, entry.size) return self._read_content(entry.offset, entry.size)
section = self.get_section(entry.section) section = self.get_section(entry.section)
return section[entry.offset:entry.offset+entry.size] return section[entry.offset:entry.offset+entry.size]
def get_section(self, section): def get_section(self, section):
data = self.section_data[section] data = self.section_data[section]
if not data: if not data:
data = self.get_section_uncached(section) data = self._get_section(section)
self.section_data[section] = data self.section_data[section] = data
return data return data
def get_section_uncached(self, section): def _get_section(self, section):
name = self.section_names[section] name = self.section_names[section]
path = '::DataSpace/Storage/' + name path = '::DataSpace/Storage/' + name
transform = self.get_file(path + '/Transform/List') transform = self.get_file(path + '/Transform/List')
@ -726,29 +721,29 @@ class LitFile(object):
raise LitError("ControlData is too short") raise LitError("ControlData is too short")
guid = msguid(transform) guid = msguid(transform)
if guid == DESENCRYPT_GUID: if guid == DESENCRYPT_GUID:
content = self.decrypt(content) content = self._decrypt(content)
control = control[csize:] control = control[csize:]
elif guid == LZXCOMPRESS_GUID: elif guid == LZXCOMPRESS_GUID:
reset_table = self.get_file( reset_table = self.get_file(
'/'.join(('::DataSpace/Storage', name, 'Transform', '/'.join(('::DataSpace/Storage', name, 'Transform',
LZXCOMPRESS_GUID, 'InstanceData/ResetTable'))) LZXCOMPRESS_GUID, 'InstanceData/ResetTable')))
content = self.decompress(content, control, reset_table) content = self._decompress(content, control, reset_table)
control = control[csize:] control = control[csize:]
else: else:
raise LitError("Unrecognized transform: %s." % repr(guid)) raise LitError("Unrecognized transform: %s." % repr(guid))
transform = transform[16:] transform = transform[16:]
return content return content
def decrypt(self, content): def _decrypt(self, content):
length = len(content) length = len(content)
extra = length & 0x7 extra = length & 0x7
if extra > 0: if extra > 0:
self.warn("content length not a multiple of block size") self._warn("content length not a multiple of block size")
content += "\0" * (8 - extra) content += "\0" * (8 - extra)
msdes.deskey(self.bookkey, msdes.DE1) msdes.deskey(self.bookkey, msdes.DE1)
return msdes.des(content) return msdes.des(content)
def decompress(self, content, control, reset_table): def _decompress(self, content, control, reset_table):
if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != "LZXC": if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != "LZXC":
raise LitError("Invalid ControlData tag value") raise LitError("Invalid ControlData tag value")
if len(reset_table) < (RESET_INTERVAL + 8): if len(reset_table) < (RESET_INTERVAL + 8):
@ -789,7 +784,7 @@ class LitFile(object):
result.append( result.append(
lzx.decompress(content[base:size], window_bytes)) lzx.decompress(content[base:size], window_bytes))
except lzx.LZXError: except lzx.LZXError:
self.warn("LZX decompression error; skipping chunk") self._warn("LZX decompression error; skipping chunk")
bytes_remaining -= window_bytes bytes_remaining -= window_bytes
base = size base = size
accum += int32(reset_table[RESET_INTERVAL:]) accum += int32(reset_table[RESET_INTERVAL:])
@ -799,88 +794,55 @@ class LitFile(object):
try: try:
result.append(lzx.decompress(content[base:], bytes_remaining)) result.append(lzx.decompress(content[base:], bytes_remaining))
except lzx.LZXError: except lzx.LZXError:
self.warn("LZX decompression error; skipping chunk") self._warn("LZX decompression error; skipping chunk")
bytes_remaining = 0 bytes_remaining = 0
if bytes_remaining > 0: if bytes_remaining > 0:
raise LitError("Failed to completely decompress section") raise LitError("Failed to completely decompress section")
return ''.join(result) return ''.join(result)
def get_entry_content(self, entry, pretty_print=False):
class LitReader(object): if 'spine' in entry.state:
def __init__(self, filename_or_stream): name = '/'.join(('/data', entry.internal, 'content'))
self._litfile = LitFile(filename_or_stream) path = entry.path
raw = self.get_file(name)
def namelist(self): decl, map = (OPF_DECL, OPF_MAP) \
return self._litfile.paths.keys() if name == '/meta' else (HTML_DECL, HTML_MAP)
content = decl + unicode(UnBinary(raw, path, self.manifest, map))
def exists(self, name):
return urlunquote(name) in self._litfile.paths
def read_xml(self, name):
entry = self._litfile.paths[urlunquote(name)] if name else None
if entry is None:
content = self._read_meta()
elif 'spine' in entry.state:
internal = '/'.join(('/data', entry.internal, 'content'))
raw = self._litfile.get_file(internal)
unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
content = unbin.tree
else:
raise LitError('Requested non-XML content as XML')
return content
def read(self, name, pretty_print=False):
entry = self._litfile.paths[urlunquote(name)] if name else None
if entry is None:
meta = self._read_meta()
content = OPF_DECL + etree.tostring(
meta, encoding='ascii', pretty_print=pretty_print)
elif 'spine' in entry.state:
internal = '/'.join(('/data', entry.internal, 'content'))
raw = self._litfile.get_file(internal)
unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
content = HTML_DECL
if pretty_print: if pretty_print:
content += etree.tostring(unbin.tree, content = self._pretty_print(content)
encoding='ascii', pretty_print=True) content = content.encode('utf-8')
else: else:
content += str(unbin) name = '/'.join(('/data', entry.internal))
else: content = self.get_file(name)
internal = '/'.join(('/data', entry.internal))
content = self._litfile.get_file(internal)
return content return content
def meta(): def extract_content(self, output_dir=os.getcwdu(), pretty_print=False):
def fget(self): output_dir = os.path.abspath(output_dir)
return self.read(self._litfile.opf_path) try:
return property(fget=fget) opf_path = os.path.splitext(
meta = meta() os.path.basename(self._stream.name))[0] + '.opf'
except AttributeError:
opf_path = 'content.opf'
opf_path = os.path.join(output_dir, opf_path)
self._ensure_dir(opf_path)
with open(opf_path, 'wb') as f:
xml = self.meta
if pretty_print:
xml = self._pretty_print(xml)
f.write(xml.encode('utf-8'))
for entry in self.manifest.values():
path = os.path.join(output_dir, entry.path)
self._ensure_dir(path)
with open(path, 'wb') as f:
f.write(self.get_entry_content(entry, pretty_print))
def _ensure_dir(self, path): def _ensure_dir(self, path):
dir = os.path.dirname(path) dir = os.path.dirname(path)
if not os.path.isdir(dir): if not os.path.isdir(dir):
os.makedirs(dir) os.makedirs(dir)
def extract_content(self, output_dir=os.getcwdu(), pretty_print=False): def _warn(self, msg):
for name in self.namelist(): print "WARNING: %s" % (msg,)
path = os.path.join(output_dir, name)
self._ensure_dir(path)
with open(path, 'wb') as f:
f.write(self.read(name, pretty_print=pretty_print))
def _read_meta(self):
path = 'content.opf'
raw = self._litfile.get_file('/meta')
try:
unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
except LitError:
if 'PENGUIN group' not in raw: raise
print "WARNING: attempting PENGUIN malformed OPF fix"
raw = raw.replace(
'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
return unbin.tree
def option_parser(): def option_parser():
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
@ -890,8 +852,7 @@ def option_parser():
help=_('Output directory. Defaults to current directory.')) help=_('Output directory. Defaults to current directory.'))
parser.add_option( parser.add_option(
'-p', '--pretty-print', default=False, action='store_true', '-p', '--pretty-print', default=False, action='store_true',
help=_('Legibly format extracted markup.' \ help=_('Legibly format extracted markup. May modify meaningful whitespace.'))
' May modify meaningful whitespace.'))
parser.add_option( parser.add_option(
'--verbose', default=False, action='store_true', '--verbose', default=False, action='store_true',
help=_('Useful for debugging.')) help=_('Useful for debugging.'))

View File

@ -27,11 +27,16 @@ from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
CSS_MIME, OPF_MIME, XML_NS, XML CSS_MIME, OPF_MIME, XML_NS, XML
from calibre.ebooks.oeb.base import namespace, barename, prefixname, \ from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
urlnormalize, xpath urlnormalize, xpath
from calibre.ebooks.oeb.base import FauxLogger, OEBBook from calibre.ebooks.oeb.base import Logger, OEBBook
from calibre.ebooks.oeb.profile import Context
from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
from calibre.ebooks.lit.lzx import Compressor from calibre.ebooks.lit.lzx import Compressor
import calibre import calibre
from calibre import LoggingInterface
from calibre import plugins from calibre import plugins
msdes, msdeserror = plugins['msdes'] msdes, msdeserror = plugins['msdes']
import calibre.ebooks.lit.mssha1 as mssha1 import calibre.ebooks.lit.mssha1 as mssha1
@ -138,17 +143,16 @@ def warn(x):
class ReBinary(object): class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'} NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, path, oeb, map=HTML_MAP, logger=FauxLogger()): def __init__(self, root, path, oeb, map=HTML_MAP):
self.path = path self.item = item
self.logger = logger self.logger = oeb.logger
self.dir = os.path.dirname(path)
self.manifest = oeb.manifest self.manifest = oeb.manifest
self.tags, self.tattrs = map self.tags, self.tattrs = map
self.buf = StringIO() self.buf = StringIO()
self.anchors = [] self.anchors = []
self.page_breaks = [] self.page_breaks = []
self.is_html = is_html = map is HTML_MAP self.is_html = is_html = map is HTML_MAP
self.stylizer = Stylizer(root, path, oeb) if is_html else None self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
self.tree_to_binary(root) self.tree_to_binary(root)
self.content = self.buf.getvalue() self.content = self.buf.getvalue()
self.ahc = self.build_ahc() if is_html else None self.ahc = self.build_ahc() if is_html else None
@ -205,6 +209,8 @@ class ReBinary(object):
if attr in ('href', 'src'): if attr in ('href', 'src'):
value = urlnormalize(value) value = urlnormalize(value)
path, frag = urldefrag(value) path, frag = urldefrag(value)
if self.item:
path = self.item.abshref(path)
prefix = unichr(3) prefix = unichr(3)
if path in self.manifest.hrefs: if path in self.manifest.hrefs:
prefix = unichr(2) prefix = unichr(2)
@ -217,7 +223,7 @@ class ReBinary(object):
elif attr.startswith('ms--'): elif attr.startswith('ms--'):
attr = '%' + attr[4:] attr = '%' + attr[4:]
elif tag == 'link' and attr == 'type' and value in OEB_STYLES: elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
value = OEB_CSS_MIME value = CSS_MIME
if attr in tattrs: if attr in tattrs:
self.write(tattrs[attr]) self.write(tattrs[attr])
else: else:
@ -270,7 +276,7 @@ class ReBinary(object):
def build_ahc(self): def build_ahc(self):
if len(self.anchors) > 6: if len(self.anchors) > 6:
self.logger.log_warn("More than six anchors in file %r. " \ self.logger.log_warn("More than six anchors in file %r. " \
"Some links may not work properly." % self.path) "Some links may not work properly." % self.item.href)
data = StringIO() data = StringIO()
data.write(unichr(len(self.anchors)).encode('utf-8')) data.write(unichr(len(self.anchors)).encode('utf-8'))
for anchor, offset in self.anchors: for anchor, offset in self.anchors:
@ -294,10 +300,9 @@ def preserve(function):
return wrapper return wrapper
class LitWriter(object): class LitWriter(object):
def __init__(self, oeb, logger=FauxLogger()): def __init__(self):
self._oeb = oeb # Wow, no options
self._logger = logger pass
self._litize_oeb()
def _litize_oeb(self): def _litize_oeb(self):
oeb = self._oeb oeb = self._oeb
@ -306,32 +311,27 @@ class LitWriter(object):
if oeb.metadata.cover: if oeb.metadata.cover:
id = str(oeb.metadata.cover[0]) id = str(oeb.metadata.cover[0])
cover = oeb.manifest[id] cover = oeb.manifest[id]
elif MS_COVER_TYPE in oeb.guide:
href = oeb.guide[MS_COVER_TYPE].href
cover = oeb.manifest.hrefs[href]
elif 'cover' in oeb.guide:
href = oeb.guide['cover'].href
cover = oeb.manifest.hrefs[href]
else:
html = oeb.spine[0].data
imgs = xpath(html, '//img[position()=1]')
href = imgs[0].get('src') if imgs else None
cover = oeb.manifest.hrefs[href] if href else None
if cover:
if not oeb.metadata.cover:
oeb.metadata.add('cover', cover.id)
for type, title in ALL_MS_COVER_TYPES: for type, title in ALL_MS_COVER_TYPES:
if type not in oeb.guide: if type not in oeb.guide:
oeb.guide.add(type, title, cover.href) oeb.guide.add(type, title, cover.href)
else: else:
self._logger.log_warn('No suitable cover image found.') self._logger.warn('No suitable cover image found.')
def dump(self, stream): def dump(self, oeb, path):
if hasattr(path, 'write'):
return self._dump_stream(oeb, path)
with open(path, 'w+b') as stream:
return self._dump_stream(oeb, stream)
def _dump_stream(self, oeb, stream):
self._oeb = oeb
self._logger = oeb.logger
self._stream = stream self._stream = stream
self._sections = [StringIO() for i in xrange(4)] self._sections = [StringIO() for i in xrange(4)]
self._directory = [] self._directory = []
self._meta = None self._meta = None
self._dump() self._litize_oeb()
self._write_content()
def _write(self, *data): def _write(self, *data):
for datum in data: for datum in data:
@ -345,7 +345,7 @@ class LitWriter(object):
def _tell(self): def _tell(self):
return self._stream.tell() return self._stream.tell()
def _dump(self): def _write_content(self):
# Build content sections # Build content sections
self._build_sections() self._build_sections()
@ -474,8 +474,7 @@ class LitWriter(object):
secnum = 0 secnum = 0
if not isinstance(data, basestring): if not isinstance(data, basestring):
self._add_folder(name) self._add_folder(name)
rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP, rebin = ReBinary(data, item, self._oeb, map=HTML_MAP)
logger=self._logger)
self._add_file(name + '/ahc', rebin.ahc, 0) self._add_file(name + '/ahc', rebin.ahc, 0)
self._add_file(name + '/aht', rebin.aht, 0) self._add_file(name + '/aht', rebin.aht, 0)
item.page_breaks = rebin.page_breaks item.page_breaks = rebin.page_breaks
@ -554,8 +553,7 @@ class LitWriter(object):
meta.attrib['ms--minimum_level'] = '0' meta.attrib['ms--minimum_level'] = '0'
meta.attrib['ms--attr5'] = '1' meta.attrib['ms--attr5'] = '1'
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper() meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP, rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP)
logger=self._logger)
meta = rebin.content meta = rebin.content
self._meta = meta self._meta = meta
self._add_file('/meta', meta) self._add_file('/meta', meta)
@ -719,19 +717,31 @@ def option_parser():
help=_('Useful for debugging.')) help=_('Useful for debugging.'))
return parser return parser
def oeb2lit(opts, opfpath): def oeb2lit(opts, inpath):
logger = LoggingInterface(logging.getLogger('oeb2lit')) logger = Logger(logging.getLogger('oeb2lit'))
logger.setup_cli_handler(opts.verbose) logger.setup_cli_handler(opts.verbose)
litpath = opts.output outpath = opts.output
if litpath is None: if outpath is None:
litpath = os.path.basename(opfpath) outpath = os.path.basename(inpath)
litpath = os.path.splitext(litpath)[0] + '.lit' outpath = os.path.splitext(outpath)[0] + '.lit'
litpath = os.path.abspath(litpath) outpath = os.path.abspath(outpath)
lit = LitWriter(OEBBook(opfpath, logger=logger), logger=logger) context = Context('Firefox', 'MSReader')
with open(litpath, 'wb') as f: oeb = OEBBook(inpath, logger=logger)
lit.dump(f) tocadder = HTMLTOCAdder()
run_plugins_on_postprocess(litpath, 'lit') tocadder.transform(oeb, context)
logger.log_info(_('Output written to ')+litpath) mangler = CaseMangler()
mangler.transform(oeb, context)
fbase = context.dest.fbase
flattener = CSSFlattener(fbase=fbase, unfloat=True, untable=True)
flattener.transform(oeb, context)
rasterizer = SVGRasterizer()
rasterizer.transform(oeb, context)
trimmer = ManifestTrimmer()
trimmer.transform(oeb, context)
lit = LitWriter()
lit.dump(oeb, outpath)
run_plugins_on_postprocess(outpath, 'lit')
logger.info(_('Output written to ') + outpath)
def main(argv=sys.argv): def main(argv=sys.argv):
@ -740,8 +750,8 @@ def main(argv=sys.argv):
if len(args) != 1: if len(args) != 1:
parser.print_help() parser.print_help()
return 1 return 1
opfpath = args[0] inpath = args[0]
oeb2lit(opts, opfpath) oeb2lit(opts, inpath)
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -425,7 +425,7 @@ def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='l
thumbnail = None thumbnail = None
if not pages: if not pages:
raise ValueError('Could not find any pages in the comic: %s'%source) raise ValueError('Could not find any pages in the comic: %s'%source)
if not opts.no_process: if not getattr(opts, 'no_process', False):
pages, failures, tdir2 = process_pages(pages, opts, notification) pages, failures, tdir2 = process_pages(pages, opts, notification)
if not pages: if not pages:
raise ValueError('Could not find any valid pages in the comic: %s'%source) raise ValueError('Could not find any valid pages in the comic: %s'%source)
@ -443,7 +443,7 @@ def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='l
if output_format == 'pdf': if output_format == 'pdf':
create_pdf(pages, opts.profile, opts, thumbnail=thumbnail) create_pdf(pages, opts.profile, opts, thumbnail=thumbnail)
shutil.rmtree(tdir) shutil.rmtree(tdir)
if not opts.no_process: if not getattr(opts, 'no_process', False):
shutil.rmtree(tdir2) shutil.rmtree(tdir2)
@ -457,7 +457,7 @@ def main(args=sys.argv, notification=None, output_format='lrf'):
if not callable(notification): if not callable(notification):
pb = ProgressBar(terminal_controller, _('Rendering comic pages...'), pb = ProgressBar(terminal_controller, _('Rendering comic pages...'),
no_progress_bar=opts.no_progress_bar) no_progress_bar=opts.no_progress_bar or getattr(opts, 'no_process', False))
notification = pb.update notification = pb.update
source = os.path.abspath(args[1]) source = os.path.abspath(args[1])

View File

@ -109,6 +109,10 @@ class HTMLConverter(object, LoggingInterface):
# Remove self closing script tags as they also mess up BeautifulSoup # Remove self closing script tags as they also mess up BeautifulSoup
(re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''), (re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
# BeautifulSoup treats self closing <div> tags as open <div> tags
(re.compile(r'(?i)<\s*div([^>]*)/\s*>'),
lambda match: '<div%s></div>'%match.group(1))
] ]
# Fix Baen markup # Fix Baen markup
BAEN = [ BAEN = [
@ -122,7 +126,7 @@ class HTMLConverter(object, LoggingInterface):
# Fix pdftohtml markup # Fix pdftohtml markup
PDFTOHTML = [ PDFTOHTML = [
# Remove <hr> tags # Remove <hr> tags
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'), (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
# Remove page numbers # Remove page numbers
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''), (re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
# Remove <br> and replace <br><br> with <p> # Remove <br> and replace <br><br> with <p>
@ -576,20 +580,20 @@ class HTMLConverter(object, LoggingInterface):
if (css.has_key('display') and css['display'].lower() == 'none') or \ if (css.has_key('display') and css['display'].lower() == 'none') or \
(css.has_key('visibility') and css['visibility'].lower() == 'hidden'): (css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
return '' return ''
text = u'' text, alt_text = u'', u''
for c in tag.contents: for c in tag.contents:
if limit != None and len(text) > limit: if limit != None and len(text) > limit:
break break
if isinstance(c, HTMLConverter.IGNORED_TAGS): if isinstance(c, HTMLConverter.IGNORED_TAGS):
return u'' continue
if isinstance(c, NavigableString): if isinstance(c, NavigableString):
text += unicode(c) text += unicode(c)
elif isinstance(c, Tag): elif isinstance(c, Tag):
if c.name.lower() == 'img' and c.has_key('alt'): if c.name.lower() == 'img' and c.has_key('alt'):
text += c['alt'] alt_text += c['alt']
return text continue
text += self.get_text(c) text += self.get_text(c)
return text return text if text.strip() else alt_text
def process_links(self): def process_links(self):
def add_toc_entry(text, target): def add_toc_entry(text, target):

View File

@ -799,18 +799,39 @@ class Text(LRFStream):
length = len(self.stream) length = len(self.stream)
style = self.style.as_dict() style = self.style.as_dict()
current_style = style.copy() current_style = style.copy()
text_tags = set(list(TextAttr.tag_map.keys()) + \
list(Text.text_tags.keys()) + \
list(ruby_tags.keys()))
text_tags -= set([0xf500+i for i in range(10)])
text_tags.add(0xf5cc)
while stream.tell() < length: while stream.tell() < length:
# Is there some text beofre a tag? # Is there some text before a tag?
pos = self.stream.find('\xf5', stream.tell()) - 1 def find_first_tag(start):
if pos > 0: pos = self.stream.find('\xf5', start)
self.add_text(self.stream[stream.tell():pos]) if pos == -1:
stream.seek(pos) return -1
elif pos == -2: # No tags in this stream try:
stream.seek(pos-1)
_t = Tag(stream)
if _t.id in text_tags:
return pos-1
return find_first_tag(pos+1)
except:
return find_first_tag(pos+1)
start_pos = stream.tell()
tag_pos = find_first_tag(start_pos)
if tag_pos >= start_pos:
if tag_pos > start_pos:
self.add_text(self.stream[start_pos:tag_pos])
stream.seek(tag_pos)
else: # No tags in this stream
self.add_text(self.stream) self.add_text(self.stream)
stream.seek(0, 2) stream.seek(0, 2)
print repr(self.stream)
break break
tag = Tag(stream) tag = Tag(stream)
@ -1166,7 +1187,8 @@ class TOCObject(LRFStream):
refpage = struct.unpack("<I", stream.read(4))[0] refpage = struct.unpack("<I", stream.read(4))[0]
refobj = struct.unpack("<I", stream.read(4))[0] refobj = struct.unpack("<I", stream.read(4))[0]
cnt = struct.unpack("<H", stream.read(2))[0] cnt = struct.unpack("<H", stream.read(2))[0]
label = unicode(stream.read(cnt), "utf_16") raw = stream.read(cnt)
label = raw.decode('utf_16_le')
self._contents.append(TocLabel(refpage, refobj, label)) self._contents.append(TocLabel(refpage, refobj, label))
c -= 1 c -= 1

View File

@ -249,7 +249,7 @@ class MetaInformation(object):
ans = u'' ans = u''
ans += u'Title : ' + unicode(self.title) + u'\n' ans += u'Title : ' + unicode(self.title) + u'\n'
if self.authors: if self.authors:
ans += u'Author : ' + (', '.join(self.authors) if self.authors is not None else u'None') ans += u'Author : ' + (' & '.join(self.authors) if self.authors is not None else _('Unknown'))
ans += ((' [' + self.author_sort + ']') if self.author_sort else '') + u'\n' ans += ((' [' + self.author_sort + ']') if self.author_sort else '') + u'\n'
if self.publisher: if self.publisher:
ans += u'Publisher: '+ unicode(self.publisher) + u'\n' ans += u'Publisher: '+ unicode(self.publisher) + u'\n'

View File

@ -0,0 +1,63 @@
'''
Convert any ebook format to Mobipocket.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \
'and Marshall T. Vandegrift <llasram@gmail.com>'
__docformat__ = 'restructuredtext en'
import sys, os, glob, logging
from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
from calibre.ebooks.epub import config as common_config
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.mobi.writer import oeb2mobi, add_mobi_options
def config(defaults=None):
return common_config(defaults=defaults, name='mobi')
def option_parser(usage=USAGE):
usage = usage % ('Mobipocket', formats())
parser = config().option_parser(usage=usage)
add_mobi_options(parser)
return parser
def any2mobi(opts, path):
ext = os.path.splitext(path)[1]
if not ext:
raise ValueError('Unknown file type: '+path)
ext = ext.lower()[1:]
if opts.output is None:
opts.output = os.path.splitext(os.path.basename(path))[0]+'.mobi'
opts.output = os.path.abspath(opts.output)
orig_output = opts.output
with TemporaryDirectory('_any2mobi') as tdir:
oebdir = os.path.join(tdir, 'oeb')
os.mkdir(oebdir)
opts.output = os.path.join(tdir, 'dummy.epub')
opts.profile = 'None'
any2epub(opts, path, create_epub=False, oeb_cover=True, extract_to=oebdir)
opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
opts.output = orig_output
logging.getLogger('html2epub').info(_('Creating Mobipocket file from EPUB...'))
oeb2mobi(opts, opf)
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
print 'No input file specified.'
return 1
any2mobi(opts, args[1])
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -114,10 +114,10 @@ class MobiMLizer(object):
def mobimlize_measure(self, ptsize): def mobimlize_measure(self, ptsize):
if isinstance(ptsize, basestring): if isinstance(ptsize, basestring):
return ptsize return ptsize
fbase = self.profile.fbase embase = self.profile.fbase
if ptsize < fbase: if round(ptsize) < embase:
return "%dpt" % int(round(ptsize)) return "%dpt" % int(round(ptsize))
return "%dem" % int(round(ptsize / fbase)) return "%dem" % int(round(ptsize / embase))
def preize_text(self, text): def preize_text(self, text):
text = unicode(text).replace(u' ', u'\xa0') text = unicode(text).replace(u' ', u'\xa0')
@ -171,8 +171,7 @@ class MobiMLizer(object):
para = etree.SubElement(para, XHTML('blockquote')) para = etree.SubElement(para, XHTML('blockquote'))
emleft -= 1 emleft -= 1
else: else:
ptag = 'p' #tag if tag in HEADER_TAGS else 'p' para = wrapper = etree.SubElement(parent, XHTML('p'))
para = wrapper = etree.SubElement(parent, XHTML(ptag))
bstate.inline = bstate.para = para bstate.inline = bstate.para = para
vspace = bstate.vpadding + bstate.vmargin vspace = bstate.vpadding + bstate.vmargin
bstate.vpadding = bstate.vmargin = 0 bstate.vpadding = bstate.vmargin = 0
@ -213,11 +212,11 @@ class MobiMLizer(object):
inline = etree.SubElement(inline, XHTML('sup')) inline = etree.SubElement(inline, XHTML('sup'))
elif valign == 'sub': elif valign == 'sub':
inline = etree.SubElement(inline, XHTML('sub')) inline = etree.SubElement(inline, XHTML('sub'))
if istate.family == 'monospace': elif fsize != 3:
inline = etree.SubElement(inline, XHTML('tt'))
if fsize != 3:
inline = etree.SubElement(inline, XHTML('font'), inline = etree.SubElement(inline, XHTML('font'),
size=str(fsize)) size=str(fsize))
if istate.family == 'monospace':
inline = etree.SubElement(inline, XHTML('tt'))
if istate.italic: if istate.italic:
inline = etree.SubElement(inline, XHTML('i')) inline = etree.SubElement(inline, XHTML('i'))
if istate.bold: if istate.bold:
@ -241,7 +240,8 @@ class MobiMLizer(object):
or namespace(elem.tag) != XHTML_NS: or namespace(elem.tag) != XHTML_NS:
return return
style = stylizer.style(elem) style = stylizer.style(elem)
if style['display'] == 'none' \ # <mbp:frame-set/> does not exist lalalala
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden': or style['visibility'] == 'hidden':
return return
tag = barename(elem.tag) tag = barename(elem.tag)
@ -303,7 +303,7 @@ class MobiMLizer(object):
else: else:
istate.family = 'serif' istate.family = 'serif'
valign = style['vertical-align'] valign = style['vertical-align']
if valign in ('super', 'sup') or asfloat(valign) > 0: if valign in ('super', 'text-top') or asfloat(valign) > 0:
istate.valign = 'super' istate.valign = 'super'
elif valign == 'sub' or asfloat(valign) < 0: elif valign == 'sub' or asfloat(valign) < 0:
istate.valign = 'sub' istate.valign = 'sub'

View File

@ -69,15 +69,15 @@ def compress_doc(data):
out.write(pack('>B', onch ^ 0x80)) out.write(pack('>B', onch ^ 0x80))
i += 1 i += 1
continue continue
if och == 0 or (och >= 9 and och < 0x80): if och == 0 or (och > 8 and och < 0x80):
out.write(ch) out.write(ch)
else: else:
j = i j = i
binseq = [ch] binseq = [ch]
while j < ldata: while j < ldata and len(binseq) < 8:
ch = data[j] ch = data[j]
och = ord(ch) och = ord(ch)
if och < 1 or (och > 8 and och < 0x80): if och == 0 or (och > 8 and och < 0x80):
break break
binseq.append(ch) binseq.append(ch)
j += 1 j += 1

View File

@ -33,8 +33,7 @@ class EXTHHeader(object):
self.length, self.num_items = struct.unpack('>LL', raw[4:12]) self.length, self.num_items = struct.unpack('>LL', raw[4:12])
raw = raw[12:] raw = raw[12:]
pos = 0 pos = 0
self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
self.mi = MetaInformation('Unknown', ['Unknown'])
self.has_fake_cover = True self.has_fake_cover = True
for i in range(self.num_items): for i in range(self.num_items):
@ -49,14 +48,24 @@ class EXTHHeader(object):
self.cover_offset, = struct.unpack('>L', content) self.cover_offset, = struct.unpack('>L', content)
elif id == 202: elif id == 202:
self.thumbnail_offset, = struct.unpack('>L', content) self.thumbnail_offset, = struct.unpack('>L', content)
#else:
# print 'unknown record', id, repr(content)
title = re.search(r'\0+([^\0]+)\0+', raw[pos:]) title = re.search(r'\0+([^\0]+)\0+', raw[pos:])
if title: if title:
self.mi.title = title.group(1).decode(codec, 'ignore') title = title.group(1).decode(codec, 'replace')
if len(title) > 2:
self.mi.title = title
else:
title = re.search(r'\0+([^\0]+)\0+', ''.join(reversed(raw[pos:])))
if title:
self.mi.title = ''.join(reversed(title.group(1).decode(codec, 'replace')))
def process_metadata(self, id, content, codec): def process_metadata(self, id, content, codec):
if id == 100: if id == 100:
self.mi.authors = [content.decode(codec, 'ignore').strip()] if self.mi.authors == [_('Unknown')]:
self.mi.authors = []
self.mi.authors.append(content.decode(codec, 'ignore').strip())
elif id == 101: elif id == 101:
self.mi.publisher = content.decode(codec, 'ignore').strip() self.mi.publisher = content.decode(codec, 'ignore').strip()
elif id == 103: elif id == 103:
@ -67,7 +76,8 @@ class EXTHHeader(object):
if not self.mi.tags: if not self.mi.tags:
self.mi.tags = [] self.mi.tags = []
self.mi.tags.append(content.decode(codec, 'ignore')) self.mi.tags.append(content.decode(codec, 'ignore'))
#else:
# print 'unhandled metadata record', id, repr(content), codec
class BookHeader(object): class BookHeader(object):
@ -466,6 +476,10 @@ def get_metadata(stream):
cover = os.path.join(tdir, mi.cover) cover = os.path.join(tdir, mi.cover)
if os.access(cover, os.R_OK): if os.access(cover, os.R_OK):
mi.cover_data = ('JPEG', open(os.path.join(tdir, mi.cover), 'rb').read()) mi.cover_data = ('JPEG', open(os.path.join(tdir, mi.cover), 'rb').read())
else:
path = os.path.join(tdir, 'images', '00001.jpg')
if os.access(path, os.R_OK):
mi.cover_data = ('JPEG', open(path, 'rb').read())
return mi return mi
def option_parser(): def option_parser():

View File

@ -17,26 +17,30 @@ import re
from itertools import izip, count from itertools import izip, count
from collections import defaultdict from collections import defaultdict
from urlparse import urldefrag from urlparse import urldefrag
import logging
from lxml import etree from lxml import etree
from PIL import Image from PIL import Image
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \ from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
OEB_RASTER_IMAGES OEB_RASTER_IMAGES
from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname
from calibre.ebooks.oeb.base import FauxLogger, OEBBook from calibre.ebooks.oeb.base import Logger, OEBBook
from calibre.ebooks.oeb.profile import Context from calibre.ebooks.oeb.profile import Context
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
from calibre.ebooks.mobi.palmdoc import compress_doc from calibre.ebooks.mobi.palmdoc import compress_doc
from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer
from calibre.customize.ui import run_plugins_on_postprocess
from calibre.utils.config import OptionParser
from optparse import OptionGroup
# TODO: # TODO:
# - Allow override CSS (?) # - Allow override CSS (?)
# - Generate index records # - Generate index records
# - Generate in-content ToC # - Optionally rasterize tables
# - Command line options, etc.
EXTH_CODES = { EXTH_CODES = {
'creator': 100, 'creator': 100,
@ -59,7 +63,8 @@ UNCOMPRESSED = 1
PALMDOC = 2 PALMDOC = 2
HUFFDIC = 17480 HUFFDIC = 17480
MAX_IMAGE_SIZE = 63 * 1024 PALM_MAX_IMAGE_SIZE = 63 * 1024
OTHER_MAX_IMAGE_SIZE = 10 * 1024 * 1024
MAX_THUMB_SIZE = 16 * 1024 MAX_THUMB_SIZE = 16 * 1024
MAX_THUMB_DIMEN = (180, 240) MAX_THUMB_DIMEN = (180, 240)
@ -88,7 +93,6 @@ class Serializer(object):
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'} NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
def __init__(self, oeb, images): def __init__(self, oeb, images):
oeb.logger.info('Serializing markup content...')
self.oeb = oeb self.oeb = oeb
self.images = images self.images = images
self.id_offsets = {} self.id_offsets = {}
@ -117,10 +121,16 @@ class Serializer(object):
path, frag = urldefrag(ref.href) path, frag = urldefrag(ref.href)
if hrefs[path].media_type not in OEB_DOCS: if hrefs[path].media_type not in OEB_DOCS:
continue continue
buffer.write('<reference title="%s" type="%s" ' buffer.write('<reference type="')
% (ref.title, ref.type)) self.serialize_text(ref.type, quot=True)
buffer.write('" ')
if ref.title is not None:
buffer.write('title="')
self.serialize_text(ref.title, quot=True)
buffer.write('" ')
self.serialize_href(ref.href) self.serialize_href(ref.href)
buffer.write('/>') # Space required or won't work, I kid you not
buffer.write(' />')
buffer.write('</guide>') buffer.write('</guide>')
def serialize_href(self, href, base=None): def serialize_href(self, href, base=None):
@ -144,6 +154,12 @@ class Serializer(object):
def serialize_body(self): def serialize_body(self):
buffer = self.buffer buffer = self.buffer
buffer.write('<body>') buffer.write('<body>')
# CybookG3 'Start Reading' link
if 'text' in self.oeb.guide:
href = self.oeb.guide['text'].href
buffer.write('<a ')
self.serialize_href(href)
buffer.write(' />')
spine = [item for item in self.oeb.spine if item.linear] spine = [item for item in self.oeb.spine if item.linear]
spine.extend([item for item in self.oeb.spine if not item.linear]) spine.extend([item for item in self.oeb.spine if not item.linear])
for item in spine: for item in spine:
@ -185,8 +201,10 @@ class Serializer(object):
if attr == 'href': if attr == 'href':
if self.serialize_href(val, item): if self.serialize_href(val, item):
continue continue
elif attr == 'src' and val in hrefs: elif attr == 'src':
index = self.images[val] href = item.abshref(val)
if href in hrefs:
index = self.images[href]
buffer.write('recindex="%05d"' % index) buffer.write('recindex="%05d"' % index)
continue continue
buffer.write(attr) buffer.write(attr)
@ -223,9 +241,11 @@ class Serializer(object):
class MobiWriter(object): class MobiWriter(object):
def __init__(self, compression=None, logger=FauxLogger()): COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
def __init__(self, compression=None, imagemax=None):
self._compression = compression or UNCOMPRESSED self._compression = compression or UNCOMPRESSED
self._logger = logger self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
def dump(self, oeb, path): def dump(self, oeb, path):
if hasattr(path, 'write'): if hasattr(path, 'write'):
@ -293,6 +313,7 @@ class MobiWriter(object):
return data, overlap return data, overlap
def _generate_text(self): def _generate_text(self):
self._oeb.logger.info('Serializing markup content...')
serializer = Serializer(self._oeb, self._images) serializer = Serializer(self._oeb, self._images)
breaks = serializer.breaks breaks = serializer.breaks
text = serializer.text text = serializer.text
@ -300,6 +321,8 @@ class MobiWriter(object):
text = StringIO(text) text = StringIO(text)
nrecords = 0 nrecords = 0
offset = 0 offset = 0
if self._compression != UNCOMPRESSED:
self._oeb.logger.info('Compressing markup content...')
data, overlap = self._read_text_record(text) data, overlap = self._read_text_record(text)
while len(data) > 0: while len(data) > 0:
if self._compression == PALMDOC: if self._compression == PALMDOC:
@ -335,7 +358,9 @@ class MobiWriter(object):
format = image.format format = image.format
changed = False changed = False
if image.format not in ('JPEG', 'GIF'): if image.format not in ('JPEG', 'GIF'):
format = 'GIF' width, height = image.size
area = width * height
format = 'GIF' if area <= 40000 else 'JPEG'
changed = True changed = True
if dimen is not None: if dimen is not None:
image.thumbnail(dimen, Image.ANTIALIAS) image.thumbnail(dimen, Image.ANTIALIAS)
@ -368,13 +393,14 @@ class MobiWriter(object):
return data return data
def _generate_images(self): def _generate_images(self):
self._oeb.logger.warn('Serializing images...')
images = [(index, href) for href, index in self._images.items()] images = [(index, href) for href, index in self._images.items()]
images.sort() images.sort()
metadata = self._oeb.metadata metadata = self._oeb.metadata
coverid = metadata.cover[0] if metadata.cover else None coverid = metadata.cover[0] if metadata.cover else None
for _, href in images: for _, href in images:
item = self._oeb.manifest.hrefs[href] item = self._oeb.manifest.hrefs[href]
data = self._rescale_image(item.data, MAX_IMAGE_SIZE) data = self._rescale_image(item.data, self._imagemax)
self._records.append(data) self._records.append(data)
def _generate_record0(self): def _generate_record0(self):
@ -418,7 +444,8 @@ class MobiWriter(object):
if term not in EXTH_CODES: continue if term not in EXTH_CODES: continue
code = EXTH_CODES[term] code = EXTH_CODES[term]
for item in oeb.metadata[term]: for item in oeb.metadata[term]:
data = unicode(item).encode('utf-8') data = self.COLLAPSE_RE.sub(' ', unicode(item))
data = data.encode('utf-8')
exth.write(pack('>II', code, len(data) + 8)) exth.write(pack('>II', code, len(data) + 8))
exth.write(data) exth.write(data)
nrecs += 1 nrecs += 1
@ -467,29 +494,90 @@ class MobiWriter(object):
self._write(record) self._write(record)
def main(argv=sys.argv): def add_mobi_options(parser):
from calibre.ebooks.oeb.base import DirWriter profiles = Context.PROFILES.keys()
inpath, outpath = argv[1:] profiles.sort()
context = Context('Firefox', 'MobiDesktop') profiles = ', '.join(profiles)
oeb = OEBBook(inpath) group = OptionGroup(parser, _('Mobipocket'),
#writer = MobiWriter(compression=PALMDOC) _('Mobipocket-specific options.'))
writer = MobiWriter(compression=UNCOMPRESSED) group.add_option(
#writer = DirWriter() '-c', '--compress', default=False, action='store_true',
help=_('Compress file text using PalmDOC compression.'))
group.add_option(
'-r', '--rescale-images', default=False, action='store_true',
help=_('Modify images to meet Palm device size limitations.'))
parser.add_option_group(group)
group = OptionGroup(parser, _('Profiles'), _('Device renderer profiles. '
'Affects conversion of default font sizes and rasterization '
'resolution. Valid profiles are: %s.') % profiles)
group.add_option(
'--source-profile', default='Browser', metavar='PROFILE',
help=_("Source renderer profile. Default is 'Browser'."))
group.add_option(
'--dest-profile', default='CybookG3', metavar='PROFILE',
help=_("Destination renderer profile. Default is 'CybookG3'."))
parser.add_option_group(group)
return
def option_parser():
parser = OptionParser(usage=_('%prog [options] OPFFILE'))
parser.add_option(
'-o', '--output', default=None,
help=_('Output file. Default is derived from input filename.'))
parser.add_option(
'-v', '--verbose', default=False, action='store_true',
help=_('Useful for debugging.'))
add_mobi_options(parser)
return parser
def oeb2mobi(opts, inpath):
logger = Logger(logging.getLogger('oeb2mobi'))
logger.setup_cli_handler(opts.verbose)
outpath = opts.output
if outpath is None:
outpath = os.path.basename(inpath)
outpath = os.path.splitext(outpath)[0] + '.mobi'
source = opts.source_profile
if source not in Context.PROFILES:
logger.error(_('Unknown source profile %r') % source)
return 1
dest = opts.dest_profile
if dest not in Context.PROFILES:
logger.error(_('Unknown destination profile %r') % dest)
return 1
compression = PALMDOC if opts.compress else UNCOMPRESSED
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
context = Context(source, dest)
oeb = OEBBook(inpath, logger=logger)
tocadder = HTMLTOCAdder()
tocadder.transform(oeb, context)
mangler = CaseMangler()
mangler.transform(oeb, context)
fbase = context.dest.fbase fbase = context.dest.fbase
fkey = context.dest.fnums.values() fkey = context.dest.fnums.values()
tocadder = HTMLTOCAdder()
flattener = CSSFlattener( flattener = CSSFlattener(
fbase=fbase, fkey=fkey, unfloat=True, untable=True) fbase=fbase, fkey=fkey, unfloat=True, untable=True)
rasterizer = SVGRasterizer()
trimmer = ManifestTrimmer()
mobimlizer = MobiMLizer()
tocadder.transform(oeb, context)
flattener.transform(oeb, context) flattener.transform(oeb, context)
rasterizer = SVGRasterizer()
rasterizer.transform(oeb, context) rasterizer.transform(oeb, context)
mobimlizer.transform(oeb, context) trimmer = ManifestTrimmer()
trimmer.transform(oeb, context) trimmer.transform(oeb, context)
mobimlizer = MobiMLizer()
mobimlizer.transform(oeb, context)
writer = MobiWriter(compression=compression, imagemax=imagemax)
writer.dump(oeb, outpath) writer.dump(oeb, outpath)
return 0 run_plugins_on_postprocess(outpath, 'mobi')
logger.info(_('Output written to ') + outpath)
def main(argv=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(argv[1:])
if len(args) != 1:
parser.print_help()
return 1
inpath = args[0]
retval = oeb2mobi(opts, inpath)
return retval
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(main()) sys.exit(main())

View File

@ -67,11 +67,13 @@ OEB_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME, SVG_MIME])
MS_COVER_TYPE = 'other.ms-coverimage-standard' MS_COVER_TYPE = 'other.ms-coverimage-standard'
ENTITYDEFS = dict(htmlentitydefs.entitydefs) recode = lambda s: s.decode('iso-8859-1').encode('ascii', 'xmlcharrefreplace')
ENTITYDEFS = dict((k, recode(v)) for k, v in htmlentitydefs.entitydefs.items())
del ENTITYDEFS['lt'] del ENTITYDEFS['lt']
del ENTITYDEFS['gt'] del ENTITYDEFS['gt']
del ENTITYDEFS['quot'] del ENTITYDEFS['quot']
del ENTITYDEFS['amp'] del ENTITYDEFS['amp']
del recode
def element(parent, *args, **kwargs): def element(parent, *args, **kwargs):
@ -351,6 +353,9 @@ class Manifest(object):
def __eq__(self, other): def __eq__(self, other):
return id(self) == id(other) return id(self) == id(other)
def __ne__(self, other):
return not self.__eq__(other)
def __cmp__(self, other): def __cmp__(self, other):
result = cmp(self.spine_position, other.spine_position) result = cmp(self.spine_position, other.spine_position)
if result != 0: if result != 0:
@ -534,7 +539,27 @@ class Spine(object):
class Guide(object): class Guide(object):
class Reference(object): class Reference(object):
_TYPES_TITLES = [('cover', 'Cover'), ('title-page', 'Title Page'),
('toc', 'Table of Contents'), ('index', 'Index'),
('glossary', 'Glossary'), ('acknowledgements', 'Acknowledgements'),
('bibliography', 'Bibliography'), ('colophon', 'Colophon'),
('copyright-page', 'Copyright'), ('dedication', 'Dedication'),
('epigraph', 'Epigraph'), ('foreword', 'Foreword'),
('loi', 'List of Illustrations'), ('lot', 'List of Tables'),
('notes', 'Notes'), ('preface', 'Preface'),
('text', 'Main Text')]
TYPES = set(t for t, _ in _TYPES_TITLES)
TITLES = dict(_TYPES_TITLES)
ORDER = dict((t, i) for (t, _), i in izip(_TYPES_TITLES, count(0)))
def __init__(self, type, title, href): def __init__(self, type, title, href):
if type.lower() in self.TYPES:
type = type.lower()
elif type not in self.TYPES and \
not type.startswith('other.'):
type = 'other.' + type
if not title:
title = self.TITLES.get(type, None)
self.type = type self.type = type
self.title = title self.title = title
self.href = urlnormalize(href) self.href = urlnormalize(href)
@ -543,6 +568,17 @@ class Guide(object):
return 'Reference(type=%r, title=%r, href=%r)' \ return 'Reference(type=%r, title=%r, href=%r)' \
% (self.type, self.title, self.href) % (self.type, self.title, self.href)
def _order():
def fget(self):
return self.ORDER.get(self.type, self.type)
return property(fget=fget)
_order = _order()
def __cmp__(self, other):
if not isinstance(other, Guide.Reference):
return NotImplemented
return cmp(self._order, other._order)
def __init__(self, oeb): def __init__(self, oeb):
self.oeb = oeb self.oeb = oeb
self.refs = {} self.refs = {}
@ -552,17 +588,15 @@ class Guide(object):
self.refs[type] = ref self.refs[type] = ref
return ref return ref
def by_type(self, type):
return self.ref_types[type]
def iterkeys(self): def iterkeys(self):
for type in self.refs: for type in self.refs:
yield type yield type
__iter__ = iterkeys __iter__ = iterkeys
def values(self): def values(self):
for ref in self.refs.values(): values = list(self.refs.values())
yield ref values.sort()
return values
def items(self): def items(self):
for type, ref in self.refs.items(): for type, ref in self.refs.items():
@ -914,11 +948,11 @@ class OEBBook(object):
cover = self.manifest.hrefs[href] cover = self.manifest.hrefs[href]
elif xpath(html, '//h:img[position()=1]'): elif xpath(html, '//h:img[position()=1]'):
img = xpath(html, '//h:img[position()=1]')[0] img = xpath(html, '//h:img[position()=1]')[0]
href = img.get('src') href = spine0.abshref(img.get('src'))
cover = self.manifest.hrefs[href] cover = self.manifest.hrefs[href]
elif xpath(html, '//h:object[position()=1]'): elif xpath(html, '//h:object[position()=1]'):
object = xpath(html, '//h:object[position()=1]')[0] object = xpath(html, '//h:object[position()=1]')[0]
href = object.get('data') href = spine0.abshref(object.get('data'))
cover = self.manifest.hrefs[href] cover = self.manifest.hrefs[href]
elif xpath(html, '//svg:svg[position()=1]'): elif xpath(html, '//svg:svg[position()=1]'):
svg = copy.deepcopy(xpath(html, '//svg:svg[position()=1]')[0]) svg = copy.deepcopy(xpath(html, '//svg:svg[position()=1]')[0])

View File

@ -36,26 +36,36 @@ PROFILES = {
fsizes=[7.5, 9, 10, 12, 15.5, 20, 22, 24]), fsizes=[7.5, 9, 10, 12, 15.5, 20, 22, 24]),
'MSReader': 'MSReader':
Profile(width=480, height=652, dpi=100.0, fbase=13, Profile(width=480, height=652, dpi=96, fbase=13,
fsizes=[10, 11, 13, 16, 18, 20, 22, 26]), fsizes=[10, 11, 13, 16, 18, 20, 22, 26]),
# Not really, but let's pretend # Not really, but let's pretend
'MobiDesktop': 'Mobipocket':
Profile(width=280, height=300, dpi=96, fbase=18, Profile(width=600, height=800, dpi=96, fbase=18,
fsizes=[14, 14, 16, 18, 20, 22, 22, 24]), fsizes=[14, 14, 16, 18, 20, 22, 24, 26]),
# No clue on usable screen size; DPI should be good
'HanlinV3':
Profile(width=584, height=754, dpi=168.451, fbase=16,
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
# No clue on usable screen size and DPI
'CybookG3': 'CybookG3':
Profile(width=584, height=754, dpi=168.451, fbase=12, Profile(width=600, height=800, dpi=168.451, fbase=16,
fsizes=[9, 10, 11, 12, 14, 17, 20, 24]), fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
'Firefox': 'Kindle':
Profile(width=525, height=640, dpi=168.451, fbase=16,
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
'Browser':
Profile(width=800, height=600, dpi=100.0, fbase=12, Profile(width=800, height=600, dpi=100.0, fbase=12,
fsizes=[5, 7, 9, 12, 13.5, 17, 20, 22, 24]) fsizes=[5, 7, 9, 12, 13.5, 17, 20, 22, 24])
} }
class Context(object): class Context(object):
PROFILES = PROFILES
def __init__(self, source, dest): def __init__(self, source, dest):
if source in PROFILES: if source in PROFILES:
source = PROFILES[source] source = PROFILES[source]

View File

@ -23,7 +23,7 @@ from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
from lxml import etree from lxml import etree
from lxml.cssselect import css_to_xpath, ExpressionError from lxml.cssselect import css_to_xpath, ExpressionError
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.oeb.base import barename, urlnormalize from calibre.ebooks.oeb.base import XPNSMAP, xpath, barename, urlnormalize
from calibre.ebooks.oeb.profile import PROFILES from calibre.ebooks.oeb.profile import PROFILES
from calibre.resources import html_css from calibre.resources import html_css
@ -87,10 +87,6 @@ FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
'x-large', 'xx-large']) 'x-large', 'xx-large'])
XPNSMAP = {'h': XHTML_NS,}
def xpath(elem, expr):
return elem.xpath(expr, namespaces=XPNSMAP)
class CSSSelector(etree.XPath): class CSSSelector(etree.XPath):
MIN_SPACE_RE = re.compile(r' *([>~+]) *') MIN_SPACE_RE = re.compile(r' *([>~+]) *')
LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:") LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:")
@ -269,6 +265,7 @@ class Style(object):
self._fontSize = None self._fontSize = None
self._width = None self._width = None
self._height = None self._height = None
self._lineHeight = None
stylizer._styles[element] = self stylizer._styles[element] = self
def _update_cssdict(self, cssdict): def _update_cssdict(self, cssdict):
@ -324,7 +321,7 @@ class Style(object):
unit = m.group(2) unit = m.group(2)
if unit == '%': if unit == '%':
base = base or self.width base = base or self.width
result = (value/100.0) * base result = (value / 100.0) * base
elif unit == 'px': elif unit == 'px':
result = value * 72.0 / self._profile.dpi result = value * 72.0 / self._profile.dpi
elif unit == 'in': elif unit == 'in':
@ -388,7 +385,7 @@ class Style(object):
@property @property
def width(self): def width(self):
if self._width is None: if self._width is None:
result = None width = None
base = None base = None
parent = self._get_parent() parent = self._get_parent()
if parent is not None: if parent is not None:
@ -399,9 +396,9 @@ class Style(object):
width = self._element.attrib['width'] width = self._element.attrib['width']
elif 'width' in self._style: elif 'width' in self._style:
width = self._style['width'] width = self._style['width']
else: if not width or width == 'auto':
result = base result = base
if not result: else:
result = self._unit_convert(width, base=base) result = self._unit_convert(width, base=base)
self._width = result self._width = result
return self._width return self._width
@ -409,7 +406,7 @@ class Style(object):
@property @property
def height(self): def height(self):
if self._height is None: if self._height is None:
result = None height = None
base = None base = None
parent = self._get_parent() parent = self._get_parent()
if parent is not None: if parent is not None:
@ -420,13 +417,54 @@ class Style(object):
height = self._element.attrib['height'] height = self._element.attrib['height']
elif 'height' in self._style: elif 'height' in self._style:
height = self._style['height'] height = self._style['height']
else: if not height or height == 'auto':
result = base result = base
if not result: else:
result = self._unit_convert(height, base=base) result = self._unit_convert(height, base=base)
self._height = result self._height = result
return self._height return self._height
@property
def lineHeight(self):
if self._lineHeight is None:
result = None
parent = self._getparent()
if 'line-height' in self._style:
lineh = self._style['line-height']
try:
float(lineh)
except ValueError:
result = self._unit_convert(lineh, base=self.fontSize)
else:
result = float(lineh) * self.fontSize
elif parent is not None:
# TODO: proper inheritance
result = parent.lineHeight
else:
result = 1.2 * self.fontSize
self._lineHeight = result
return self._lineHeight
@property
def marginTop(self):
return self._unit_convert(
self._get('margin-top'), base=self.height)
@property
def marginBottom(self):
return self._unit_convert(
self._get('margin-bottom'), base=self.height)
@property
def paddingTop(self):
return self._unit_convert(
self._get('padding-top'), base=self.height)
@property
def paddingBottom(self):
return self._unit_convert(
self._get('padding-bottom'), base=self.height)
def __str__(self): def __str__(self):
items = self._style.items() items = self._style.items()
items.sort() items.sort()

View File

@ -33,12 +33,13 @@ class KeyMapper(object):
def relate(size, base): def relate(size, base):
size = float(size) size = float(size)
base = float(base) base = float(base)
if size == base: return 0 if abs(size - base) < 0.1: return 0
sign = -1 if size < base else 1 sign = -1 if size < base else 1
endp = 0 if size < base else 36 endp = 0 if size < base else 36
diff = (abs(base - size) * 3) + ((36 - size) / 100) diff = (abs(base - size) * 3) + ((36 - size) / 100)
logb = abs(base - endp) logb = abs(base - endp)
return sign * math.log(diff, logb) result = sign * math.log(diff, logb)
return result
def __getitem__(self, ssize): def __getitem__(self, ssize):
if ssize in self.cache: if ssize in self.cache:
@ -122,6 +123,8 @@ class CSSFlattener(object):
fsize = self.context.source.fbase fsize = self.context.source.fbase
self.baseline_node(body, stylizer, sizes, fsize) self.baseline_node(body, stylizer, sizes, fsize)
sbase = max(sizes.items(), key=operator.itemgetter(1))[0] sbase = max(sizes.items(), key=operator.itemgetter(1))[0]
self.oeb.logger.info(
"Source base font size is %0.05fpt" % sbase)
return sbase return sbase
def clean_edges(self, cssdict, style, fsize): def clean_edges(self, cssdict, style, fsize):
@ -154,13 +157,13 @@ class CSSFlattener(object):
if node.tag == XHTML('font'): if node.tag == XHTML('font'):
node.tag = XHTML('span') node.tag = XHTML('span')
if 'size' in node.attrib: if 'size' in node.attrib:
size = node.attrib['size'] size = node.attrib['size'].strip()
if size.startswith('+'): if size:
cssdict['font-size'] = 'larger'
elif size.startswith('-'):
cssdict['font-size'] = 'smaller'
else:
fnums = self.context.source.fnums fnums = self.context.source.fnums
if size[0] in ('+', '-'):
# Oh, the warcrimes
cssdict['font-size'] = fnums[3+int(size)]
else:
cssdict['font-size'] = fnums[int(size)] cssdict['font-size'] = fnums[int(size)]
del node.attrib['size'] del node.attrib['size']
if 'color' in node.attrib: if 'color' in node.attrib:
@ -182,8 +185,9 @@ class CSSFlattener(object):
percent = (margin - style['text-indent']) / style['width'] percent = (margin - style['text-indent']) / style['width']
cssdict['margin-left'] = "%d%%" % (percent * 100) cssdict['margin-left'] = "%d%%" % (percent * 100)
left -= style['text-indent'] left -= style['text-indent']
if 'display' in cssdict and cssdict['display'] == 'in-line':
cssdict['display'] = 'inline'
if self.unfloat and 'float' in cssdict \ if self.unfloat and 'float' in cssdict \
and tag not in ('img', 'object') \
and cssdict.get('display', 'none') != 'none': and cssdict.get('display', 'none') != 'none':
del cssdict['display'] del cssdict['display']
if self.untable and 'display' in cssdict \ if self.untable and 'display' in cssdict \
@ -218,7 +222,9 @@ class CSSFlattener(object):
for child in node: for child in node:
self.flatten_node(child, stylizer, names, styles, psize, left) self.flatten_node(child, stylizer, names, styles, psize, left)
def flatten_head(self, head, stylizer, href): def flatten_head(self, item, stylizer, href):
html = item.data
head = html.find(XHTML('head'))
for node in head: for node in head:
if node.tag == XHTML('link') \ if node.tag == XHTML('link') \
and node.get('rel', 'stylesheet') == 'stylesheet' \ and node.get('rel', 'stylesheet') == 'stylesheet' \
@ -227,6 +233,7 @@ class CSSFlattener(object):
elif node.tag == XHTML('style') \ elif node.tag == XHTML('style') \
and node.get('type', CSS_MIME) in OEB_STYLES: and node.get('type', CSS_MIME) in OEB_STYLES:
head.remove(node) head.remove(node)
href = item.relhref(href)
etree.SubElement(head, XHTML('link'), etree.SubElement(head, XHTML('link'),
rel='stylesheet', type=CSS_MIME, href=href) rel='stylesheet', type=CSS_MIME, href=href)
if stylizer.page_rule: if stylizer.page_rule:
@ -259,7 +266,5 @@ class CSSFlattener(object):
css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items) css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items)
href = self.replace_css(css) href = self.replace_css(css)
for item in self.oeb.spine: for item in self.oeb.spine:
html = item.data
stylizer = self.stylizers[item] stylizer = self.stylizers[item]
head = html.find(XHTML('head')) self.flatten_head(item, stylizer, href)
self.flatten_head(head, stylizer, href)

View File

@ -0,0 +1,87 @@
'''
HTML-TOC-adding transform.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
import os
from lxml import etree
from calibre.ebooks.oeb.base import XML, XHTML, XHTML_NS
from calibre.ebooks.oeb.base import XHTML_MIME, CSS_MIME
from calibre.ebooks.oeb.base import element
STYLE_CSS = {
'nested': """
.calibre_toc_header {
text-align: center;
}
.calibre_toc_block {
margin-left: 1.2em;
text-indent: -1.2em;
}
.calibre_toc_block .calibre_toc_block {
margin-left: 2.4em;
}
.calibre_toc_block .calibre_toc_block .calibre_toc_block {
margin-left: 3.6em;
}
""",
'centered': """
.calibre_toc_header {
text-align: center;
}
.calibre_toc_block {
text-align: center;
}
body > .calibre_toc_block {
margin-top: 1.2em;
}
"""
}
class HTMLTOCAdder(object):
def __init__(self, style='nested'):
self.style = style
def transform(self, oeb, context):
if 'toc' in oeb.guide:
return
oeb.logger.info('Generating in-line TOC...')
style = self.style
if style not in STYLE_CSS:
oeb.logger.error('Unknown TOC style %r' % style)
style = 'nested'
id, css_href = oeb.manifest.generate('tocstyle', 'tocstyle.css')
oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style])
language = str(oeb.metadata.language[0])
contents = element(None, XHTML('html'), nsmap={None: XHTML_NS},
attrib={XML('lang'): language})
head = element(contents, XHTML('head'))
title = element(head, XHTML('title'))
title.text = 'Table of Contents'
element(head, XHTML('link'), rel='stylesheet', type=CSS_MIME,
href=css_href)
body = element(contents, XHTML('body'),
attrib={'class': 'calibre_toc'})
h1 = element(body, XHTML('h1'),
attrib={'class': 'calibre_toc_header'})
h1.text = 'Table of Contents'
self.add_toc_level(body, oeb.toc)
id, href = oeb.manifest.generate('contents', 'contents.xhtml')
item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)
oeb.spine.add(item, linear=False)
oeb.guide.add('toc', 'Table of Contents', href)
def add_toc_level(self, elem, toc):
for node in toc:
block = element(elem, XHTML('div'),
attrib={'class': 'calibre_toc_block'})
line = element(block, XHTML('a'),
attrib={'href': node.href,
'class': 'calibre_toc_line'})
line.text = node.title
self.add_toc_level(block, node)

View File

@ -0,0 +1,112 @@
'''
CSS case-mangling transform.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
import os
import re
import operator
import math
from itertools import chain
from collections import defaultdict
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
from calibre.ebooks.oeb.base import CSS_MIME
from calibre.ebooks.oeb.base import namespace
from calibre.ebooks.oeb.stylizer import Stylizer
CASE_MANGLER_CSS = """
.calibre_lowercase {
font-variant: normal;
font-size: 0.65em;
}
"""
TEXT_TRANSFORMS = set(['capitalize', 'uppercase', 'lowercase'])
class CaseMangler(object):
def transform(self, oeb, context):
oeb.logger.info('Applying case-transforming CSS...')
self.oeb = oeb
self.profile = context.source
self.mangle_spine()
def mangle_spine(self):
id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS)
for item in self.oeb.spine:
html = item.data
relhref = item.relhref(href)
etree.SubElement(html.find(XHTML('head')), XHTML('link'),
rel='stylesheet', href=relhref, type=CSS_MIME)
stylizer = Stylizer(html, item.href, self.oeb, self.profile)
self.mangle_elem(html.find(XHTML('body')), stylizer)
def text_transform(self, transform, text):
if transform == 'capitalize':
return text.title()
elif transform == 'uppercase':
return text.upper()
elif transform == 'lowercase':
return text.lower()
return text
def split_text(self, text):
results = ['']
isupper = text[0].isupper()
for char in text:
if char.isupper() == isupper:
results[-1] += char
else:
isupper = not isupper
results.append(char)
return results
def smallcaps_elem(self, elem, attr):
texts = self.split_text(getattr(elem, attr))
setattr(elem, attr, None)
last = elem if attr == 'tail' else None
attrib = {'class': 'calibre_lowercase'}
for text in texts:
if text.isupper():
if last is None:
elem.text = text
else:
last.tail = text
else:
child = etree.Element(XHTML('span'), attrib=attrib)
child.text = text.upper()
if last is None:
elem.insert(0, child)
else:
# addnext() moves the tail for some reason
tail = last.tail
last.addnext(child)
last.tail = tail
child.tail = None
last = child
def mangle_elem(self, elem, stylizer):
if not isinstance(elem.tag, basestring) or \
namespace(elem.tag) != XHTML_NS:
return
children = list(elem)
style = stylizer.style(elem)
transform = style['text-transform']
variant = style['font-variant']
if elem.text:
if transform in TEXT_TRANSFORMS:
elem.text = self.text_transform(transform, elem.text)
if variant == 'small-caps':
self.smallcaps_elem(elem, 'text')
for child in children:
self.mangle_elem(child, stylizer)
if child.tail:
if transform in TEXT_TRANSFORMS:
child.tail = self.text_transform(transform, child.tail)
if variant == 'small-caps':
self.smallcaps_elem(child, 'tail')

View File

@ -21,11 +21,12 @@ from PyQt4.QtGui import QPainter
from PyQt4.QtSvg import QSvgRenderer from PyQt4.QtSvg import QSvgRenderer
from PyQt4.QtGui import QApplication from PyQt4.QtGui import QApplication
from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK
from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME, JPEG_MIME
from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename
from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.stylizer import Stylizer
IMAGE_TAGS = set([XHTML('img'), XHTML('object')]) IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align'])
class SVGRasterizer(object): class SVGRasterizer(object):
def __init__(self): def __init__(self):
@ -41,7 +42,7 @@ class SVGRasterizer(object):
self.rasterize_spine() self.rasterize_spine()
self.rasterize_cover() self.rasterize_cover()
def rasterize_svg(self, elem, width=0, height=0): def rasterize_svg(self, elem, width=0, height=0, format='PNG'):
data = QByteArray(xml2str(elem)) data = QByteArray(xml2str(elem))
svg = QSvgRenderer(data) svg = QSvgRenderer(data)
size = svg.defaultSize() size = svg.defaultSize()
@ -52,6 +53,9 @@ class SVGRasterizer(object):
size.setHeight(box[3] - box[1]) size.setHeight(box[3] - box[1])
if width or height: if width or height:
size.scale(width, height, Qt.KeepAspectRatio) size.scale(width, height, Qt.KeepAspectRatio)
logger = self.oeb.logger
logger.info('Rasterizing %r to %dx%d'
% (elem, size.width(), size.height()))
image = QImage(size, QImage.Format_ARGB32_Premultiplied) image = QImage(size, QImage.Format_ARGB32_Premultiplied)
image.fill(QColor("white").rgb()) image.fill(QColor("white").rgb())
painter = QPainter(image) painter = QPainter(image)
@ -60,7 +64,7 @@ class SVGRasterizer(object):
array = QByteArray() array = QByteArray()
buffer = QBuffer(array) buffer = QBuffer(array)
buffer.open(QIODevice.WriteOnly) buffer.open(QIODevice.WriteOnly)
image.save(buffer, 'PNG') image.save(buffer, format)
return str(array) return str(array)
def dataize_manifest(self): def dataize_manifest(self):
@ -113,11 +117,7 @@ class SVGRasterizer(object):
def rasterize_inline(self, elem, style, item): def rasterize_inline(self, elem, style, item):
width = style['width'] width = style['width']
if width == 'auto':
width = self.profile.width
height = style['height'] height = style['height']
if height == 'auto':
height = self.profile.height
width = (width / 72) * self.profile.dpi width = (width / 72) * self.profile.dpi
height = (height / 72) * self.profile.dpi height = (height / 72) * self.profile.dpi
elem = self.dataize_svg(item, elem) elem = self.dataize_svg(item, elem)
@ -134,11 +134,7 @@ class SVGRasterizer(object):
def rasterize_external(self, elem, style, item, svgitem): def rasterize_external(self, elem, style, item, svgitem):
width = style['width'] width = style['width']
if width == 'auto':
width = self.profile.width
height = style['height'] height = style['height']
if height == 'auto':
height = self.profile.height
width = (width / 72) * self.profile.dpi width = (width / 72) * self.profile.dpi
height = (height / 72) * self.profile.dpi height = (height / 72) * self.profile.dpi
data = QByteArray(str(svgitem)) data = QByteArray(str(svgitem))
@ -168,7 +164,12 @@ class SVGRasterizer(object):
manifest.add(id, href, PNG_MIME, data=data) manifest.add(id, href, PNG_MIME, data=data)
self.images[key] = href self.images[key] = href
elem.tag = XHTML('img') elem.tag = XHTML('img')
for attr in elem.attrib:
if attr not in KEEP_ATTRS:
del elem.attrib[attr]
elem.attrib['src'] = item.relhref(href) elem.attrib['src'] = item.relhref(href)
if elem.text:
elem.attrib['alt'] = elem.text
elem.text = None elem.text = None
for child in elem: for child in elem:
elem.remove(child) elem.remove(child)
@ -180,9 +181,9 @@ class SVGRasterizer(object):
cover = self.oeb.manifest.ids[str(covers[0])] cover = self.oeb.manifest.ids[str(covers[0])]
if not cover.media_type == SVG_MIME: if not cover.media_type == SVG_MIME:
return return
logger = self.oeb.logger width = (self.profile.width / 72) * self.profile.dpi
logger.info('Rasterizing %r to %dx%d' % (cover.href, 600, 800)) height = (self.profile.height / 72) * self.profile.dpi
data = self.rasterize_svg(cover.data, 600, 800) data = self.rasterize_svg(cover.data, width, height)
href = os.path.splitext(cover.href)[0] + '.png' href = os.path.splitext(cover.href)[0] + '.png'
id, href = self.oeb.manifest.generate(cover.id, href) id, href = self.oeb.manifest.generate(cover.id, href)
self.oeb.manifest.add(id, href, PNG_MIME, data=data) self.oeb.manifest.add(id, href, PNG_MIME, data=data)

View File

@ -9,6 +9,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys import sys
import os import os
from itertools import chain from itertools import chain
from urlparse import urldefrag
from lxml import etree from lxml import etree
import cssutils import cssutils
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
@ -29,6 +30,11 @@ class ManifestTrimmer(object):
used.add(oeb.manifest.hrefs[item.value]) used.add(oeb.manifest.hrefs[item.value])
elif item.value in oeb.manifest.ids: elif item.value in oeb.manifest.ids:
used.add(oeb.manifest.ids[item.value]) used.add(oeb.manifest.ids[item.value])
for ref in oeb.guide.values():
path, _ = urldefrag(ref.href)
if path in oeb.manifest.hrefs:
used.add(oeb.manifest.hrefs[path])
# TOC items are required to be in the spine
for item in oeb.spine: for item in oeb.spine:
used.add(item) used.add(item)
unchecked = used unchecked = used
@ -56,7 +62,6 @@ class ManifestTrimmer(object):
cssutils.replaceUrls(sheet, replacer) cssutils.replaceUrls(sheet, replacer)
used.update(new) used.update(new)
unchecked = new unchecked = new
# All guide and TOC items are required to be in the spine
for item in oeb.manifest.values(): for item in oeb.manifest.values():
if item not in used: if item not in used:
oeb.logger.info('Trimming %r from manifest' % item.href) oeb.logger.info('Trimming %r from manifest' % item.href)

View File

@ -29,7 +29,7 @@ def config(defaults=None):
c.add_opt('top_right_y', [ '-w', '--righty'], default=default_crop, c.add_opt('top_right_y', [ '-w', '--righty'], default=default_crop,
help=_('Number of pixels to crop from the right most y (default is %d)')%default_crop ) help=_('Number of pixels to crop from the right most y (default is %d)')%default_crop )
c.add_opt('bounding', ['-b', '--bounding'], c.add_opt('bounding', ['-b', '--bounding'],
help=_('A file generated by ghostscript which allows each page to be individually cropped')) help=_('A file generated by ghostscript which allows each page to be individually cropped [gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox > bounding] '))
return c return c
@ -38,14 +38,28 @@ def option_parser():
return c.option_parser(usage=_('''\ return c.option_parser(usage=_('''\
%prog [options] file.pdf %prog [options] file.pdf
Crop a pdf. Crops a pdf.
''')) '''))
def main(args=sys.argv): def main(args=sys.argv):
parser = option_parser() parser = option_parser()
opts, args = parser.parse_args(args) opts, args = parser.parse_args(args)
try:
source = os.path.abspath(args[1]) source = os.path.abspath(args[1])
input_pdf = PdfFileReader(file(source, "rb")) input_pdf = PdfFileReader(file(source, "rb"))
except:
print "Unable to read input"
return 2
title = _('Unknown')
author = _('Unknown')
try:
info = input_pdf.getDocumentInfo()
if info.title:
title = info.title
if info.author:
author = info.author
except:
pass
if opts.bounding != None: if opts.bounding != None:
try: try:
bounding = open( opts.bounding , 'r' ) bounding = open( opts.bounding , 'r' )
@ -53,7 +67,7 @@ def main(args=sys.argv):
except: except:
print 'Error opening %s' % opts.bounding print 'Error opening %s' % opts.bounding
return 1 return 1
output_pdf = PdfFileWriter() output_pdf = PdfFileWriter(title=title,author=author)
for page_number in range (0, input_pdf.getNumPages() ): for page_number in range (0, input_pdf.getNumPages() ):
page = input_pdf.getPage(page_number) page = input_pdf.getPage(page_number)
if opts.bounding != None: if opts.bounding != None:

View File

@ -136,16 +136,18 @@ class DeviceManager(Thread):
return self.create_job(self._sync_booklists, done, args=[booklists], return self.create_job(self._sync_booklists, done, args=[booklists],
description=_('Send metadata to device')) description=_('Send metadata to device'))
def _upload_books(self, files, names, on_card=False): def _upload_books(self, files, names, on_card=False, metadata=None):
'''Upload books to device: ''' '''Upload books to device: '''
return self.device.upload_books(files, names, on_card, end_session=False) return self.device.upload_books(files, names, on_card,
metadata=metadata, end_session=False)
def upload_books(self, done, files, names, on_card=False, titles=None): def upload_books(self, done, files, names, on_card=False, titles=None,
metadata=None):
desc = _('Upload %d books to device')%len(names) desc = _('Upload %d books to device')%len(names)
if titles: if titles:
desc += u':' + u', '.join(titles) desc += u':' + u', '.join(titles)
return self.create_job(self._upload_books, done, args=[files, names], return self.create_job(self._upload_books, done, args=[files, names],
kwargs={'on_card':on_card}, description=desc) kwargs={'on_card':on_card,'metadata':metadata}, description=desc)
def add_books_to_metadata(self, locations, metadata, booklists): def add_books_to_metadata(self, locations, metadata, booklists):
self.device.add_books_to_metadata(locations, metadata, booklists) self.device.add_books_to_metadata(locations, metadata, booklists)

View File

@ -28,9 +28,6 @@
<property name="readOnly" > <property name="readOnly" >
<bool>true</bool> <bool>true</bool>
</property> </property>
<property name="maximumBlockCount" >
<number>400</number>
</property>
</widget> </widget>
</item> </item>
</layout> </layout>

View File

@ -75,7 +75,13 @@ def save_recipes(recipes):
def load_recipes(): def load_recipes():
config.refresh() config.refresh()
return [Recipe().unpickle(r) for r in config.get('scheduled_recipes', [])] recipes = []
for r in config.get('scheduled_recipes', []):
r = Recipe().unpickle(r)
if r.builtin and not str(r.id).startswith('recipe_'):
continue
recipes.append(r)
return recipes
class RecipeModel(QAbstractListModel, SearchQueryParser): class RecipeModel(QAbstractListModel, SearchQueryParser):
@ -438,7 +444,7 @@ class Scheduler(QObject):
self.lock.unlock() self.lock.unlock()
def main(args=sys.argv): def main(args=sys.argv):
app = QApplication([]) QApplication([])
from calibre.library.database2 import LibraryDatabase2 from calibre.library.database2 import LibraryDatabase2
d = SchedulerDialog(LibraryDatabase2('/home/kovid/documents/library')) d = SchedulerDialog(LibraryDatabase2('/home/kovid/documents/library'))
d.exec_() d.exec_()

Binary file not shown.

After

Width:  |  Height:  |  Size: 586 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 992 B

View File

@ -385,13 +385,35 @@ class BooksModel(QAbstractTableModel):
metadata.append(mi) metadata.append(mi)
return metadata return metadata
def get_preferred_formats_from_ids(self, ids, all_formats, mode='r+b'):
ans = []
for id in ids:
format = None
fmts = self.db.formats(id, index_is_id=True)
if not fmts:
fmts = ''
available_formats = set(fmts.lower().split(','))
for f in all_formats:
if f.lower() in available_formats:
format = f.lower()
break
if format is None:
ans.append(format)
else:
f = self.db.format(id, format, index_is_id=True, as_file=True,
mode=mode)
ans.append(f)
return ans
def get_preferred_formats(self, rows, formats, paths=False): def get_preferred_formats(self, rows, formats, paths=False):
ans = [] ans = []
for row in (row.row() for row in rows): for row in (row.row() for row in rows):
format = None format = None
fmts = self.db.formats(row) fmts = self.db.formats(row)
if not fmts: if not fmts:
return [] fmts = ''
db_formats = set(fmts.lower().split(',')) db_formats = set(fmts.lower().split(','))
available_formats = set([f.lower() for f in formats]) available_formats = set([f.lower() for f in formats])
u = available_formats.intersection(db_formats) u = available_formats.intersection(db_formats)

View File

@ -406,6 +406,7 @@ class Document(QGraphicsScene):
for font in lrf.font_map: for font in lrf.font_map:
fdata = QByteArray(lrf.font_map[font].data) fdata = QByteArray(lrf.font_map[font].data)
id = QFontDatabase.addApplicationFontFromData(fdata) id = QFontDatabase.addApplicationFontFromData(fdata)
if id != -1:
font_map[font] = [str(i) for i in QFontDatabase.applicationFontFamilies(id)][0] font_map[font] = [str(i) for i in QFontDatabase.applicationFontFamilies(id)][0]
if load_substitutions: if load_substitutions:

View File

@ -746,7 +746,7 @@ class Main(MainWindow, Ui_MainWindow):
titles = [i['title'] for i in metadata] titles = [i['title'] for i in metadata]
job = self.device_manager.upload_books(Dispatcher(self.books_uploaded), job = self.device_manager.upload_books(Dispatcher(self.books_uploaded),
files, names, on_card=on_card, files, names, on_card=on_card,
titles=titles metadata=metadata, titles=titles
) )
self.upload_memory[job] = (metadata, on_card, memory, files) self.upload_memory[job] = (metadata, on_card, memory, files)
@ -887,8 +887,12 @@ class Main(MainWindow, Ui_MainWindow):
if self.device_connected: if self.device_connected:
ids = list(dynamic.get('news_to_be_synced', set([]))) ids = list(dynamic.get('news_to_be_synced', set([])))
ids = [id for id in ids if self.library_view.model().db.has_id(id)] ids = [id for id in ids if self.library_view.model().db.has_id(id)]
files = [self.library_view.model().db.format(id, prefs['output_format'], index_is_id=True, as_file=True) for id in ids] files = self.library_view.model().get_preferred_formats_from_ids(
ids, self.device_manager.device_class.FORMATS)
files = [f for f in files if f is not None] files = [f for f in files if f is not None]
if not files:
dynamic.set('news_to_be_synced', set([]))
return
metadata = self.library_view.model().get_metadata(ids, rows_are_ids=True) metadata = self.library_view.model().get_metadata(ids, rows_are_ids=True)
names = [] names = []
for mi in metadata: for mi in metadata:
@ -1479,7 +1483,8 @@ in which you want to store your books files. Any existing books will be automati
return True return True
def shutdown(self): def shutdown(self, write_settings=True):
if write_settings:
self.write_settings() self.write_settings()
self.job_manager.terminate_all_jobs() self.job_manager.terminate_all_jobs()
self.device_manager.keep_going = False self.device_manager.keep_going = False
@ -1500,6 +1505,7 @@ in which you want to store your books files. Any existing books will be automati
def closeEvent(self, e): def closeEvent(self, e):
self.write_settings()
if self.system_tray_icon.isVisible(): if self.system_tray_icon.isVisible():
if not dynamic['systray_msg'] and not isosx: if not dynamic['systray_msg'] and not isosx:
info_dialog(self, 'calibre', 'calibre '+_('will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray.')).exec_() info_dialog(self, 'calibre', 'calibre '+_('will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray.')).exec_()
@ -1509,7 +1515,7 @@ in which you want to store your books files. Any existing books will be automati
else: else:
if self.confirm_quit(): if self.confirm_quit():
try: try:
self.shutdown() self.shutdown(write_settings=False)
except: except:
pass pass
e.accept() e.accept()

View File

@ -1551,9 +1551,6 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
def has_book(self, mi):
return bool(self.conn.get('SELECT id FROM books where title=?', (mi.title,), all=False))
def has_id(self, id): def has_id(self, id):
return self.conn.get('SELECT id FROM books where id=?', (id,), all=False) is not None return self.conn.get('SELECT id FROM books where id=?', (id,), all=False) is not None

View File

@ -217,7 +217,11 @@ class ResultCache(SearchQueryParser):
return self.index(id) return self.index(id)
def has_id(self, id): def has_id(self, id):
try:
return self._data[id] is not None return self._data[id] is not None
except IndexError:
pass
return False
def refresh_ids(self, conn, ids): def refresh_ids(self, conn, ids):
for id in ids: for id in ids:
@ -558,6 +562,14 @@ class LibraryDatabase2(LibraryDatabase):
return img return img
return f if as_file else f.read() return f if as_file else f.read()
def has_book(self, mi):
title = mi.title
if title:
if not isinstance(title, unicode):
title = title.decode(preferred_encoding, 'replace')
return bool(self.conn.get('SELECT id FROM books where title=?', (title,), all=False))
return False
def has_cover(self, index, index_is_id=False): def has_cover(self, index, index_is_id=False):
id = index if index_is_id else self.id(index) id = index if index_is_id else self.id(index)
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg') path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')

View File

@ -7,8 +7,8 @@ var column_titles = {
'rating' : 'Rating', 'rating' : 'Rating',
'date' : 'Date', 'date' : 'Date',
'tags' : 'Tags', 'tags' : 'Tags',
'series' : 'Series', 'series' : 'Series'
} };
String.prototype.format = function() { String.prototype.format = function() {
var pattern = /\{\d+\}/g; var pattern = /\{\d+\}/g;

View File

@ -48,12 +48,14 @@ entry_points = {
'any2lrf = calibre.ebooks.lrf.any.convert_from:main', 'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
'any2epub = calibre.ebooks.epub.from_any:main', 'any2epub = calibre.ebooks.epub.from_any:main',
'any2lit = calibre.ebooks.lit.from_any:main', 'any2lit = calibre.ebooks.lit.from_any:main',
'any2mobi = calibre.ebooks.mobi.from_any:main',
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main', 'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main', 'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main', 'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
'isbndb = calibre.ebooks.metadata.isbndb:main', 'isbndb = calibre.ebooks.metadata.isbndb:main',
'librarything = calibre.ebooks.metadata.library_thing:main', 'librarything = calibre.ebooks.metadata.library_thing:main',
'mobi2oeb = calibre.ebooks.mobi.reader:main', 'mobi2oeb = calibre.ebooks.mobi.reader:main',
'oeb2mobi = calibre.ebooks.mobi.writer:main',
'lrf2html = calibre.ebooks.lrf.html.convert_to:main', 'lrf2html = calibre.ebooks.lrf.html.convert_to:main',
'lit2oeb = calibre.ebooks.lit.reader:main', 'lit2oeb = calibre.ebooks.lit.reader:main',
'oeb2lit = calibre.ebooks.lit.writer:main', 'oeb2lit = calibre.ebooks.lit.writer:main',

View File

@ -102,7 +102,7 @@ Device Integration
What devices does |app| support? What devices does |app| support?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
At the moment |app| has full support for the SONY PRS 500/505/700 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk. At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader? I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -286,7 +286,7 @@ def write(socket, msg, timeout=5):
def read(socket, timeout=5): def read(socket, timeout=5):
''' '''
Read a message from `socket`. The message must have been sent with the :function:`write` Read a message from `socket`. The message must have been sent with the :function:`write`
function. Raises a `RuntimeError` if the message is corrpted. Can return an function. Raises a `RuntimeError` if the message is corrupted. Can return an
empty string. empty string.
''' '''
if isworker: if isworker:
@ -299,7 +299,12 @@ def read(socket, timeout=5):
if not msg: if not msg:
break break
if length is None: if length is None:
try:
length, msg = int(msg[:12]), msg[12:] length, msg = int(msg[:12]), msg[12:]
except ValueError:
if DEBUG:
print >>sys.__stdout__, 'read(%s):'%('worker' if isworker else 'overseer'), 'no length in', msg
return ''
buf.write(msg) buf.write(msg)
if buf.tell() >= length: if buf.tell() >= length:
break break

View File

@ -217,8 +217,7 @@ class Server(object):
pos = pos.replace(month = 1) pos = pos.replace(month = 1)
else: else:
pos = pos.replace(month = pos.month + 1) pos = pos.replace(month = pos.month + 1)
_months = list(months(self.earliest, self.latest))[:-1][-12:]
_months = list(months(self.earliest, self.latest))[:-1][:12]
_months = [range_for_month(*m) for m in _months] _months = [range_for_month(*m) for m in _months]
_months = [self.get_slice(*m) for m in _months] _months = [self.get_slice(*m) for m in _months]
x = [m.min for m in _months] x = [m.min for m in _months]

View File

@ -35,7 +35,7 @@ class Distribution(object):
('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'), ('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'),
('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'), ('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'),
('lxml', '2.0.5', 'lxml', 'python-lxml', 'python-lxml'), ('lxml', '2.0.5', 'lxml', 'python-lxml', 'python-lxml'),
('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-beautifulsoup'), ('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'), ('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'),
] ]
@ -205,23 +205,7 @@ select Install.</li>
<ol> <ol>
<li>Before trying to use the command line tools, you must run the app at least once. This will ask you for you password and then setup the symbolic links for the command line tools.</li> <li>Before trying to use the command line tools, you must run the app at least once. This will ask you for you password and then setup the symbolic links for the command line tools.</li>
<li>The app cannot be run from within the dmg. You must drag it to a folder on your filesystem (The Desktop, Applications, wherever).</li> <li>The app cannot be run from within the dmg. You must drag it to a folder on your filesystem (The Desktop, Applications, wherever).</li>
<li>In order for the conversion of RTF to LRF to support WMF images (common in older RTF files) you need to install ImageMagick.</li> <li>In order for localization of the user interface in your language, select your language in the configuration dialog (by clicking the hammer icon next to the search bar) and select your language.</li>
<li>In order for localization of the user interface in your language you must create the file <code>~/.MacOSX/environment.plist</code> as shown below:
<pre class="wiki">
&lt;?xml version="1.0" encoding="UTF-8"?&gt;
&lt;!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"&gt;
&lt;plist version="1.0"&gt;
&lt;dict&gt;
&lt;key&gt;LANG&lt;/key&gt;
&lt;string&gt;de_DE&lt;/string&gt;
&lt;/dict&gt;
&lt;/plist&gt;
</pre>
The example above is for the German language. Substitute the language code you need.
After creating the file you need to log out and log in again for the changes to become
active. Of course, this will only work if calibre has been translated for your language.
If not, head over to <a href="http://calibre.kovidgoyal.net/wiki/Development#Translations">Translations</a> to see how you can translate it.
</li>
</ol> </ol>
''')) '''))
return 'binary.html', data, None return 'binary.html', data, None

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -58,17 +58,17 @@ The pyparsing module handles some of the problems that are typically vexing when
- embedded comments - embedded comments
""" """
__version__ = "1.5.0" __version__ = "1.5.1"
__versionTime__ = "28 May 2008 10:05" __versionTime__ = "2 October 2008 00:44"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string import string
from weakref import ref as wkref from weakref import ref as wkref
import copy,sys import copy
import sys
import warnings import warnings
import re import re
import sre_constants import sre_constants
import xml.sax.saxutils
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
__all__ = [ __all__ = [
@ -88,7 +88,7 @@ __all__ = [
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
'indentedBlock', 'indentedBlock', 'originalTextFor',
] ]
@ -130,11 +130,22 @@ if not _PY3K:
# ... # ...
else: else:
_ustr = str _ustr = str
unichr = chr
def _str2dict(strg): def _str2dict(strg):
return dict( [(c,0) for c in strg] ) return dict( [(c,0) for c in strg] )
#~ return set( [c for c in strg] ) #~ return set( [c for c in strg] )
def _xml_escape(data):
"""Escape &, <, >, ", ', etc. in a string of data."""
# ampersand must be replaced first
from_symbols = '&><"\''
to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
for from_,to_ in zip(from_symbols, to_symbols):
data = data.replace(from_, to_)
return data
class _Constants(object): class _Constants(object):
pass pass
@ -145,7 +156,7 @@ else:
nums = string.digits nums = string.digits
hexnums = nums + "ABCDEFabcdef" hexnums = nums + "ABCDEFabcdef"
alphanums = alphas + nums alphanums = alphas + nums
_bslash = "\\" _bslash = chr(92)
printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
class ParseBaseException(Exception): class ParseBaseException(Exception):
@ -193,6 +204,9 @@ class ParseBaseException(Exception):
line_str = "".join( [line_str[:line_column], line_str = "".join( [line_str[:line_column],
markerString, line_str[line_column:]]) markerString, line_str[line_column:]])
return line_str.strip() return line_str.strip()
def __dir__(self):
return "loc msg pstr parserElement lineno col line " \
"markInputLine __str__ __repr__".split()
class ParseException(ParseBaseException): class ParseException(ParseBaseException):
"""exception thrown when parse expressions don't match class; """exception thrown when parse expressions don't match class;
@ -213,7 +227,8 @@ class ParseSyntaxException(ParseFatalException):
ErrorStop indicates that parsing is to stop immediately because ErrorStop indicates that parsing is to stop immediately because
an unbacktrackable syntax error has been found""" an unbacktrackable syntax error has been found"""
def __init__(self, pe): def __init__(self, pe):
ParseFatalException.__init__(self, pe.pstr, pe.loc, pe.msg, pe.parserElement) super(ParseSyntaxException, self).__init__(
pe.pstr, pe.loc, pe.msg, pe.parserElement)
#~ class ReparseException(ParseBaseException): #~ class ReparseException(ParseBaseException):
#~ """Experimental class - parse actions can raise this exception to cause #~ """Experimental class - parse actions can raise this exception to cause
@ -243,6 +258,8 @@ class _ParseResultsWithOffset(object):
return self.tup[i] return self.tup[i]
def __repr__(self): def __repr__(self):
return repr(self.tup) return repr(self.tup)
def setOffset(self,i):
self.tup = (self.tup[0],i)
class ParseResults(object): class ParseResults(object):
"""Structured parse results, to provide multiple means of access to the parsed data: """Structured parse results, to provide multiple means of access to the parsed data:
@ -272,9 +289,6 @@ class ParseResults(object):
self.__toklist = [toklist] self.__toklist = [toklist]
self.__tokdict = dict() self.__tokdict = dict()
# this line is related to debugging the asXML bug
#~ asList = False
if name: if name:
if not modal: if not modal:
self.__accumNames[name] = 0 self.__accumNames[name] = 0
@ -286,9 +300,9 @@ class ParseResults(object):
toklist = [ toklist ] toklist = [ toklist ]
if asList: if asList:
if isinstance(toklist,ParseResults): if isinstance(toklist,ParseResults):
self[name] = _ParseResultsWithOffset(toklist.copy(),-1) self[name] = _ParseResultsWithOffset(toklist.copy(),0)
else: else:
self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1) self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
self[name].__name = name self[name].__name = name
else: else:
try: try:
@ -374,7 +388,7 @@ class ParseResults(object):
for name in self.__tokdict: for name in self.__tokdict:
occurrences = self.__tokdict[name] occurrences = self.__tokdict[name]
for k, (value, position) in enumerate(occurrences): for k, (value, position) in enumerate(occurrences):
occurrences[k] = _ParseResultsWithOffset(value, position + (position > j)) occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
def items( self ): def items( self ):
"""Returns all named result keys and values as a list of tuples.""" """Returns all named result keys and values as a list of tuples."""
@ -411,6 +425,7 @@ class ParseResults(object):
self[k] = v self[k] = v
if isinstance(v[0],ParseResults): if isinstance(v[0],ParseResults):
v[0].__parent = wkref(self) v[0].__parent = wkref(self)
self.__toklist += other.__toklist self.__toklist += other.__toklist
self.__accumNames.update( other.__accumNames ) self.__accumNames.update( other.__accumNames )
del other del other
@ -517,7 +532,7 @@ class ParseResults(object):
continue continue
else: else:
resTag = "ITEM" resTag = "ITEM"
xmlBodyText = xml.sax.saxutils.escape(_ustr(res)) xmlBodyText = _xml_escape(_ustr(res))
out += [ nl, nextLevelIndent, "<", resTag, ">", out += [ nl, nextLevelIndent, "<", resTag, ">",
xmlBodyText, xmlBodyText,
"</", resTag, ">" ] "</", resTag, ">" ]
@ -594,6 +609,8 @@ class ParseResults(object):
else: else:
self.__parent = None self.__parent = None
def __dir__(self):
return dir(super(ParseResults,self)) + self.keys()
def col (loc,strg): def col (loc,strg):
"""Returns current column within a string, counting newlines as line separators. """Returns current column within a string, counting newlines as line separators.
@ -715,7 +732,7 @@ class ParserElement(object):
def breaker(instring, loc, doActions=True, callPreParse=True): def breaker(instring, loc, doActions=True, callPreParse=True):
import pdb import pdb
pdb.set_trace() pdb.set_trace()
_parseMethod( instring, loc, doActions, callPreParse ) return _parseMethod( instring, loc, doActions, callPreParse )
breaker._originalParseMethod = _parseMethod breaker._originalParseMethod = _parseMethod
self._parse = breaker self._parse = breaker
else: else:
@ -1047,6 +1064,7 @@ class ParserElement(object):
instring = instring.expandtabs() instring = instring.expandtabs()
loc, tokens = self._parse( instring, 0 ) loc, tokens = self._parse( instring, 0 )
if parseAll: if parseAll:
loc = self.preParse( instring, loc )
StringEnd()._parse( instring, loc ) StringEnd()._parse( instring, loc )
return tokens return tokens
@ -1158,11 +1176,7 @@ class ParserElement(object):
if isinstance(other,int): if isinstance(other,int):
minElements, optElements = other,0 minElements, optElements = other,0
elif isinstance(other,tuple): elif isinstance(other,tuple):
if len(other)==0: other = (other + (None, None))[:2]
other = (None,None)
elif len(other)==1:
other = (other[0],None)
if len(other)==2:
if other[0] is None: if other[0] is None:
other = (0, other[1]) other = (0, other[1])
if isinstance(other[0],int) and other[1] is None: if isinstance(other[0],int) and other[1] is None:
@ -1177,8 +1191,6 @@ class ParserElement(object):
optElements -= minElements optElements -= minElements
else: else:
raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
else:
raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
else: else:
raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
@ -1360,7 +1372,7 @@ class ParserElement(object):
"""Check defined expressions for valid structure, check for infinite recursive definitions.""" """Check defined expressions for valid structure, check for infinite recursive definitions."""
self.checkRecursion( [] ) self.checkRecursion( [] )
def parseFile( self, file_or_filename ): def parseFile( self, file_or_filename, parseAll=False ):
"""Execute the parse expression on the given file or filename. """Execute the parse expression on the given file or filename.
If a filename is specified (instead of a file object), If a filename is specified (instead of a file object),
the entire file is opened, read, and closed before parsing. the entire file is opened, read, and closed before parsing.
@ -1371,7 +1383,7 @@ class ParserElement(object):
f = open(file_or_filename, "rb") f = open(file_or_filename, "rb")
file_contents = f.read() file_contents = f.read()
f.close() f.close()
return self.parseString(file_contents) return self.parseString(file_contents, parseAll)
def getException(self): def getException(self):
return ParseException("",0,self.errmsg,self) return ParseException("",0,self.errmsg,self)
@ -1393,12 +1405,18 @@ class ParserElement(object):
else: else:
return super(ParserElement,self)==other return super(ParserElement,self)==other
def __ne__(self,other):
return not (self == other)
def __hash__(self): def __hash__(self):
return hash(id(self)) return hash(id(self))
def __req__(self,other): def __req__(self,other):
return self == other return self == other
def __rne__(self,other):
return not (self == other)
class Token(ParserElement): class Token(ParserElement):
"""Abstract ParserElement subclass, for defining atomic matching patterns.""" """Abstract ParserElement subclass, for defining atomic matching patterns."""
@ -1533,7 +1551,6 @@ class Keyword(Token):
Keyword.DEFAULT_KEYWORD_CHARS = chars Keyword.DEFAULT_KEYWORD_CHARS = chars
setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
class CaselessLiteral(Literal): class CaselessLiteral(Literal):
"""Token to match a specified string, ignoring case of letters. """Token to match a specified string, ignoring case of letters.
Note: the matched results will always be in the case of the given Note: the matched results will always be in the case of the given
@ -2034,7 +2051,7 @@ class LineStart(_PositionToken):
"""Matches if current position is at the beginning of a line within the parse string""" """Matches if current position is at the beginning of a line within the parse string"""
def __init__( self ): def __init__( self ):
super(LineStart,self).__init__() super(LineStart,self).__init__()
self.setWhitespaceChars( " \t" ) self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
self.errmsg = "Expected start of line" self.errmsg = "Expected start of line"
#self.myException.msg = self.errmsg #self.myException.msg = self.errmsg
@ -2059,7 +2076,7 @@ class LineEnd(_PositionToken):
"""Matches if current position is at the end of a line within the parse string""" """Matches if current position is at the end of a line within the parse string"""
def __init__( self ): def __init__( self ):
super(LineEnd,self).__init__() super(LineEnd,self).__init__()
self.setWhitespaceChars( " \t" ) self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
self.errmsg = "Expected end of line" self.errmsg = "Expected end of line"
#self.myException.msg = self.errmsg #self.myException.msg = self.errmsg
@ -2271,10 +2288,9 @@ class And(ParseExpression):
""" """
class _ErrorStop(Empty): class _ErrorStop(Empty):
def __new__(cls,*args,**kwargs): def __init__(self, *args, **kwargs):
return And._ErrorStop.instance super(Empty,self).__init__(*args, **kwargs)
_ErrorStop.instance = Empty() self.leaveWhitespace()
_ErrorStop.instance.leaveWhitespace()
def __init__( self, exprs, savelist = True ): def __init__( self, exprs, savelist = True ):
super(And,self).__init__(exprs, savelist) super(And,self).__init__(exprs, savelist)
@ -2293,12 +2309,14 @@ class And(ParseExpression):
loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
errorStop = False errorStop = False
for e in self.exprs[1:]: for e in self.exprs[1:]:
if e is And._ErrorStop.instance: if isinstance(e, And._ErrorStop):
errorStop = True errorStop = True
continue continue
if errorStop: if errorStop:
try: try:
loc, exprtokens = e._parse( instring, loc, doActions ) loc, exprtokens = e._parse( instring, loc, doActions )
except ParseSyntaxException:
raise
except ParseBaseException, pe: except ParseBaseException, pe:
raise ParseSyntaxException(pe) raise ParseSyntaxException(pe)
except IndexError, ie: except IndexError, ie:
@ -2502,7 +2520,7 @@ class Each(ParseExpression):
raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
# add any unmatched Optionals, in case they have default values defined # add any unmatched Optionals, in case they have default values defined
matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt) matchOrder += [ e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt ]
resultlist = [] resultlist = []
for e in matchOrder: for e in matchOrder:
@ -2781,7 +2799,7 @@ class SkipTo(ParseElementEnhance):
argument is used to define grammars (typically quoted strings and comments) that argument is used to define grammars (typically quoted strings and comments) that
might contain false matches. might contain false matches.
""" """
def __init__( self, other, include=False, ignore=None ): def __init__( self, other, include=False, ignore=None, failOn=None ):
super( SkipTo, self ).__init__( other ) super( SkipTo, self ).__init__( other )
if ignore is not None: if ignore is not None:
self.expr = self.expr.copy() self.expr = self.expr.copy()
@ -2790,6 +2808,10 @@ class SkipTo(ParseElementEnhance):
self.mayIndexError = False self.mayIndexError = False
self.includeMatch = include self.includeMatch = include
self.asList = False self.asList = False
if failOn is not None and isinstance(failOn, basestring):
self.failOn = Literal(failOn)
else:
self.failOn = failOn
self.errmsg = "No match found for "+_ustr(self.expr) self.errmsg = "No match found for "+_ustr(self.expr)
#self.myException = ParseException("",0,self.errmsg,self) #self.myException = ParseException("",0,self.errmsg,self)
@ -2797,12 +2819,17 @@ class SkipTo(ParseElementEnhance):
startLoc = loc startLoc = loc
instrlen = len(instring) instrlen = len(instring)
expr = self.expr expr = self.expr
failParse = False
while loc <= instrlen: while loc <= instrlen:
try: try:
if self.failOn:
failParse = True
self.failOn.tryParse(instring, loc)
failParse = False
loc = expr._skipIgnorables( instring, loc ) loc = expr._skipIgnorables( instring, loc )
expr._parse( instring, loc, doActions=False, callPreParse=False ) expr._parse( instring, loc, doActions=False, callPreParse=False )
if self.includeMatch:
skipText = instring[startLoc:loc] skipText = instring[startLoc:loc]
if self.includeMatch:
loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
if mat: if mat:
skipRes = ParseResults( skipText ) skipRes = ParseResults( skipText )
@ -2811,8 +2838,11 @@ class SkipTo(ParseElementEnhance):
else: else:
return loc, [ skipText ] return loc, [ skipText ]
else: else:
return loc, [ instring[startLoc:loc] ] return loc, [ skipText ]
except (ParseException,IndexError): except (ParseException,IndexError):
if failParse:
raise
else:
loc += 1 loc += 1
exc = self.myException exc = self.myException
exc.loc = loc exc.loc = loc
@ -2872,6 +2902,7 @@ class Forward(ParseElementEnhance):
if hasattr(self,"name"): if hasattr(self,"name"):
return self.name return self.name
self._revertClass = self.__class__
self.__class__ = _ForwardNoRecurse self.__class__ = _ForwardNoRecurse
try: try:
if self.expr is not None: if self.expr is not None:
@ -2879,8 +2910,8 @@ class Forward(ParseElementEnhance):
else: else:
retString = "None" retString = "None"
finally: finally:
self.__class__ = Forward self.__class__ = self._revertClass
return "Forward: "+retString return self.__class__.__name__ + ": " + retString
def copy(self): def copy(self):
if self.expr is not None: if self.expr is not None:
@ -3121,7 +3152,7 @@ def matchPreviousExpr(expr):
def _escapeRegexRangeChars(s): def _escapeRegexRangeChars(s):
#~ escape these chars: ^-] #~ escape these chars: ^-]
for c in r"\^-]": for c in r"\^-]":
s = s.replace(c,"\\"+c) s = s.replace(c,_bslash+c)
s = s.replace("\n",r"\n") s = s.replace("\n",r"\n")
s = s.replace("\t",r"\t") s = s.replace("\t",r"\t")
return _ustr(s) return _ustr(s)
@ -3195,6 +3226,33 @@ def dictOf( key, value ):
""" """
return Dict( ZeroOrMore( Group ( key + value ) ) ) return Dict( ZeroOrMore( Group ( key + value ) ) )
def originalTextFor(expr, asString=True):
"""Helper to return the original, untokenized text for a given expression. Useful to
restore the parsed fields of an HTML start tag into the raw tag text itself, or to
revert separate tokens with intervening whitespace back to the original matching
input text. Simpler to use than the parse action keepOriginalText, and does not
require the inspect module to chase up the call stack. By default, returns a
string containing the original parsed text.
If the optional asString argument is passed as False, then the return value is a
ParseResults containing any results names that were originally matched, and a
single token containing the original matched text from the input string. So if
the expression passed to originalTextFor contains expressions with defined
results names, you must set asString to False if you want to preserve those
results name values."""
locMarker = Empty().setParseAction(lambda s,loc,t: loc)
matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
if asString:
extractText = lambda s,l,t: s[t._original_start:t._original_end]
else:
def extractText(s,l,t):
del t[:]
t.insert(0, s[t._original_start:t._original_end])
del t["_original_start"]
del t["_original_end"]
matchExpr.setParseAction(extractText)
return matchExpr
# convenience constants for positional expressions # convenience constants for positional expressions
empty = Empty().setName("empty") empty = Empty().setName("empty")
lineStart = LineStart().setName("lineStart") lineStart = LineStart().setName("lineStart")
@ -3464,12 +3522,24 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
raise ValueError("opening and closing strings cannot be the same") raise ValueError("opening and closing strings cannot be the same")
if content is None: if content is None:
if isinstance(opener,basestring) and isinstance(closer,basestring): if isinstance(opener,basestring) and isinstance(closer,basestring):
if len(opener) == 1 and len(closer)==1:
if ignoreExpr is not None: if ignoreExpr is not None:
content = (Combine(OneOrMore(~ignoreExpr + content = (Combine(OneOrMore(~ignoreExpr +
CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip())) ).setParseAction(lambda t:t[0].strip()))
else: else:
content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip())) content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
).setParseAction(lambda t:t[0].strip()))
else:
if ignoreExpr is not None:
content = (Combine(OneOrMore(~ignoreExpr +
~Literal(opener) + ~Literal(closer) +
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip()))
else:
content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip()))
else: else:
raise ValueError("opening and closing arguments must be strings if no content expression is given") raise ValueError("opening and closing arguments must be strings if no content expression is given")
ret = Forward() ret = Forward()
@ -3528,7 +3598,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
else: else:
smExpr = Group( Optional(NL) + smExpr = Group( Optional(NL) +
(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
blockStatementExpr.ignore("\\" + LineEnd()) blockStatementExpr.ignore(_bslash + LineEnd())
return smExpr return smExpr
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
@ -3536,7 +3606,7 @@ punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";") commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '")) _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
# it's easy to get these comment structures wrong - they're very common, so may as well make them available # it's easy to get these comment structures wrong - they're very common, so may as well make them available

View File

@ -338,7 +338,7 @@ class ZipInfo (object):
if isinstance(self.filename, unicode): if isinstance(self.filename, unicode):
try: try:
return self.filename.encode('ascii'), self.flag_bits return self.filename.encode('ascii'), self.flag_bits
except UnicodeEncodeError: except:
return self.filename.encode('utf-8'), self.flag_bits | 0x800 return self.filename.encode('utf-8'), self.flag_bits | 0x800
else: else:
return self.filename, self.flag_bits return self.filename, self.flag_bits

View File

@ -765,6 +765,8 @@ class BasicNewsRecipe(object, LoggingInterface):
self.log_debug(traceback.format_exc()) self.log_debug(traceback.format_exc())
if cu is not None: if cu is not None:
ext = cu.rpartition('.')[-1] ext = cu.rpartition('.')[-1]
if '?' in ext:
ext = ''
ext = ext.lower() if ext else 'jpg' ext = ext.lower() if ext else 'jpg'
self.report_progress(1, _('Downloading cover from %s')%cu) self.report_progress(1, _('Downloading cover from %s')%cu)
cpath = os.path.join(self.output_dir, 'cover.'+ext) cpath = os.path.join(self.output_dir, 'cover.'+ext)

View File

@ -21,7 +21,8 @@ recipe_modules = ['recipe_' + r for r in (
'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes', 'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes',
'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik', 'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik',
'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet', 'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
'joelonsoftware', 'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de',
)] )]
import re, imp, inspect, time, os import re, imp, inspect, time, os

View File

@ -42,3 +42,9 @@ class ChristianScienceMonitor(BasicNewsRecipe):
feeds[-1][1].append(art) feeds[-1][1].append(art)
return feeds return feeds
def postprocess_html(self, soup, first_fetch):
html = soup.find('html')
if html is None:
return soup
html.extract()
return html

View File

@ -0,0 +1,16 @@
from calibre.web.feeds.news import BasicNewsRecipe
class CommonDreams(BasicNewsRecipe):
title = u'Common Dreams'
description = u'Progressive news and views'
__author__ = u'XanthanGum'
oldest_article = 7
max_articles_per_feed = 100
feeds = [
(u'Common Dreams Headlines',
u'http://www.commondreams.org/feed/headlines_rss'),
(u'Common Dreams Views', u'http://www.commondreams.org/feed/views_rss'),
(u'Common Dreams Newswire', u'http://www.commondreams.org/feed/newswire_rss')
]

View File

@ -49,7 +49,9 @@ class Economist(BasicNewsRecipe):
if not index_started: if not index_started:
continue continue
text = string.capwords(text) text = string.capwords(text)
if text not in feeds.keys():
feeds[text] = [] feeds[text] = []
if text not in ans:
ans.append(text) ans.append(text)
key = text key = text
continue continue

View File

@ -0,0 +1,55 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
nin.co.yu
'''
import re, urllib
from calibre.web.feeds.news import BasicNewsRecipe
class Nin(BasicNewsRecipe):
title = 'NIN online'
__author__ = 'Darko Miletic'
description = 'Nedeljne informativne novine'
no_stylesheets = True
oldest_article = 15
simultaneous_downloads = 1
delay = 1
encoding = 'utf8'
needs_subscription = True
PREFIX = 'http://www.nin.co.yu'
INDEX = PREFIX + '/?change_lang=ls'
LOGIN = PREFIX + '/?logout=true'
html2lrf_options = [
'--comment' , description
, '--category' , 'news, politics, Serbia'
, '--publisher' , 'NIN'
]
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.INDEX)
if self.username is not None and self.password is not None:
data = urllib.urlencode({ 'login_name':self.username
,'login_password':self.password
,'imageField.x':'32'
,'imageField.y':'15'
})
br.open(self.LOGIN,data)
return br
keep_only_tags =[dict(name='td', attrs={'width':'520'})]
remove_tags_after =dict(name='html')
feeds =[(u'NIN', u'http://www.nin.co.yu/misc/rss.php?feed=RSS2.0')]
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.INDEX)
link_item = soup.find('img',attrs={'width':'100','height':'137','border':'0'})
if link_item:
cover_url = self.PREFIX + link_item['src']
return cover_url

View File

@ -0,0 +1,34 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
www.heise.de/tp
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Telepolis(BasicNewsRecipe):
title = 'Telepolis'
__author__ = 'Darko Miletic'
description = 'News from Germany in German'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
html2lrf_options = [ '--comment' , description
, '--category' , 'blog,news'
]
keep_only_tags = [
dict(name='table', attrs={'class':'inhalt-table'})
,dict(name='table', attrs={'class':'blogtable' })
]
remove_tags = [
dict(name='table', attrs={'class':'img' })
,dict(name='img' , attrs={'src':'/tp/r4/icons/inline/extlink.gif'})
]
feeds = [(u'Telepolis Newsfeed', u'http://www.heise.de/tp/news.rdf')]

View File

@ -33,6 +33,7 @@ class TimesOnline(BasicNewsRecipe):
('Sports News', 'http://www.timesonline.co.uk/tol/feeds/rss/sport.xml'), ('Sports News', 'http://www.timesonline.co.uk/tol/feeds/rss/sport.xml'),
('Film News', 'http://www.timesonline.co.uk/tol/feeds/rss/film.xml'), ('Film News', 'http://www.timesonline.co.uk/tol/feeds/rss/film.xml'),
('Tech news', 'http://www.timesonline.co.uk/tol/feeds/rss/tech.xml'), ('Tech news', 'http://www.timesonline.co.uk/tol/feeds/rss/tech.xml'),
('Literary Supplement', 'http://www.timesonline.co.uk/tol/feeds/rss/thetls.xml'),
] ]
def print_version(self, url): def print_version(self, url):

View File

@ -6,7 +6,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
tomshardware.com tomshardware.com
''' '''
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class Tomshardware(BasicNewsRecipe): class Tomshardware(BasicNewsRecipe):

View File

@ -0,0 +1,54 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetch tomshardware.
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class TomsHardwareDe(BasicNewsRecipe):
title = 'Tom\'s Hardware German'
description = 'Computer news in german'
__author__ = 'Oliver Niesner'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 50
no_stylesheets = True
encoding = 'utf-8'
#preprocess_regexps = \
# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
# [
# (r'<84>', lambda match: ''),
# (r'<93>', lambda match: ''),
# ]
# ]
remove_tags = [dict(id='outside-advert'),
dict(id='advertRightWhite'),
dict(id='header-advert'),
dict(id='header-banner'),
dict(id='header-menu'),
dict(id='header-top'),
dict(id='header-tools'),
dict(id='nbComment'),
dict(id='internalSidebar'),
dict(id='header-news-infos'),
dict(id='breadcrumbs'),
dict(id=''),
dict(name='div', attrs={'class':'pyjama'}),
dict(name='href', attrs={'class':'comment'}),
dict(name='div', attrs={'class':'greyBoxR clearfix'}),
dict(name='div', attrs={'class':'greyBoxL clearfix'}),
dict(name='div', attrs={'class':'greyBox clearfix'}),
dict(id='')]
#remove_tags_before = [dict(id='header-news-title')]
remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})]
#remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})]
feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ]

View File

@ -55,7 +55,7 @@ from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirt
# This class supports writing PDF files out, given pages produced by another # This class supports writing PDF files out, given pages produced by another
# class (typically {@link #PdfFileReader PdfFileReader}). # class (typically {@link #PdfFileReader PdfFileReader}).
class PdfFileWriter(object): class PdfFileWriter(object):
def __init__(self): def __init__(self,title=u"Unknown",author=u"Unknown"):
self._header = "%PDF-1.3" self._header = "%PDF-1.3"
self._objects = [] # array of indirect objects self._objects = [] # array of indirect objects
@ -71,7 +71,9 @@ class PdfFileWriter(object):
# info object # info object
info = DictionaryObject() info = DictionaryObject()
info.update({ info.update({
NameObject("/Producer"): createStringObject(u"Python PDF Library - http://pybrary.net/pyPdf/") NameObject("/Producer"): createStringObject(u"Python PDF Library - http://pybrary.net/pyPdf/"),
NameObject("/Author"): createStringObject(author),
NameObject("/Title"): createStringObject(title),
}) })
self._info = self._addObject(info) self._info = self._addObject(info)