Pull from driver-dev

This commit is contained in:
Kovid Goyal 2009-05-25 07:37:49 -07:00
commit c90c086117
33 changed files with 1544 additions and 662 deletions

View File

@ -13,6 +13,7 @@ src/calibre/manual/cli/
build build
dist dist
docs docs
nbproject/
src/calibre/gui2/pictureflow/Makefile.Debug src/calibre/gui2/pictureflow/Makefile.Debug
src/calibre/gui2/pictureflow/Makefile.Release src/calibre/gui2/pictureflow/Makefile.Release
src/calibre/gui2/pictureflow/debug/ src/calibre/gui2/pictureflow/debug/

View File

@ -89,7 +89,7 @@ if __name__ == '__main__':
include_dirs=['src/calibre/utils/msdes']), include_dirs=['src/calibre/utils/msdes']),
Extension('calibre.plugins.cPalmdoc', Extension('calibre.plugins.cPalmdoc',
sources=['src/calibre/ebooks/mobi/palmdoc.c']), sources=['src/calibre/ebooks/compression/palmdoc.c']),
PyQtExtension('calibre.plugins.pictureflow', PyQtExtension('calibre.plugins.pictureflow',
['src/calibre/gui2/pictureflow/pictureflow.cpp', ['src/calibre/gui2/pictureflow/pictureflow.cpp',

View File

@ -1,8 +1,9 @@
from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import textwrap, os, glob import textwrap
import os
import glob
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
from calibre.constants import __version__ from calibre.constants import __version__
@ -39,172 +40,6 @@ every time you add an HTML file to the library.\
return of.name return of.name
class OPFMetadataReader(MetadataReaderPlugin):
name = 'Read OPF metadata'
file_types = set(['opf'])
description = _('Read metadata from %s files')%'OPF'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.metadata import MetaInformation
return MetaInformation(OPF(stream, os.getcwd()))
class RTFMetadataReader(MetadataReaderPlugin):
name = 'Read RTF metadata'
file_types = set(['rtf'])
description = _('Read metadata from %s files')%'RTF'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.rtf import get_metadata
return get_metadata(stream)
class FB2MetadataReader(MetadataReaderPlugin):
name = 'Read FB2 metadata'
file_types = set(['fb2'])
description = _('Read metadata from %s files')%'FB2'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.fb2 import get_metadata
return get_metadata(stream)
class LRFMetadataReader(MetadataReaderPlugin):
name = 'Read LRF metadata'
file_types = set(['lrf'])
description = _('Read metadata from %s files')%'LRF'
def get_metadata(self, stream, ftype):
from calibre.ebooks.lrf.meta import get_metadata
return get_metadata(stream)
class PDFMetadataReader(MetadataReaderPlugin):
name = 'Read PDF metadata'
file_types = set(['pdf'])
description = _('Read metadata from %s files')%'PDF'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.pdf import get_metadata
return get_metadata(stream)
class LITMetadataReader(MetadataReaderPlugin):
name = 'Read LIT metadata'
file_types = set(['lit'])
description = _('Read metadata from %s files')%'LIT'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.lit import get_metadata
return get_metadata(stream)
class IMPMetadataReader(MetadataReaderPlugin):
name = 'Read IMP metadata'
file_types = set(['imp'])
description = _('Read metadata from %s files')%'IMP'
author = 'Ashish Kulkarni'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.imp import get_metadata
return get_metadata(stream)
class RBMetadataReader(MetadataReaderPlugin):
name = 'Read RB metadata'
file_types = set(['rb'])
description = _('Read metadata from %s files')%'RB'
author = 'Ashish Kulkarni'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.rb import get_metadata
return get_metadata(stream)
class EPUBMetadataReader(MetadataReaderPlugin):
name = 'Read EPUB metadata'
file_types = set(['epub'])
description = _('Read metadata from %s files')%'EPUB'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.epub import get_metadata
return get_metadata(stream)
class HTMLMetadataReader(MetadataReaderPlugin):
name = 'Read HTML metadata'
file_types = set(['html'])
description = _('Read metadata from %s files')%'HTML'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.html import get_metadata
return get_metadata(stream)
class MOBIMetadataReader(MetadataReaderPlugin):
name = 'Read MOBI metadata'
file_types = set(['mobi', 'prc', 'azw'])
description = _('Read metadata from %s files')%'MOBI'
def get_metadata(self, stream, ftype):
from calibre.ebooks.mobi.reader import get_metadata
return get_metadata(stream)
class TOPAZMetadataReader(MetadataReaderPlugin):
name = 'Read Topaz metadata'
file_types = set(['tpz', 'azw1'])
description = _('Read metadata from %s files')%'MOBI'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.topaz import get_metadata
return get_metadata(stream)
class ODTMetadataReader(MetadataReaderPlugin):
name = 'Read ODT metadata'
file_types = set(['odt'])
description = _('Read metadata from %s files')%'ODT'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.odt import get_metadata
return get_metadata(stream)
class TXTMetadataReader(MetadataReaderPlugin):
name = 'Read TXT metadata'
file_types = set(['txt'])
description = _('Read metadata from %s files') % 'TXT'
author = 'John Schember'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.txt import get_metadata
return get_metadata(stream)
class PDBMetadataReader(MetadataReaderPlugin):
name = 'Read PDB metadata'
file_types = set(['pdb'])
description = _('Read metadata from %s files') % 'PDB'
author = 'John Schember'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.pdb import get_metadata
return get_metadata(stream)
class LRXMetadataReader(MetadataReaderPlugin):
name = 'Read LRX metadata'
file_types = set(['lrx'])
description = _('Read metadata from %s files')%'LRX'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.lrx import get_metadata
return get_metadata(stream)
class ComicMetadataReader(MetadataReaderPlugin): class ComicMetadataReader(MetadataReaderPlugin):
@ -227,14 +62,127 @@ class ComicMetadataReader(MetadataReaderPlugin):
mi.cover_data = (ext.lower(), data) mi.cover_data = (ext.lower(), data)
return mi return mi
class ZipMetadataReader(MetadataReaderPlugin): class EPUBMetadataReader(MetadataReaderPlugin):
name = 'Read ZIP metadata' name = 'Read EPUB metadata'
file_types = set(['zip', 'oebzip']) file_types = set(['epub'])
description = _('Read metadata from ebooks in ZIP archives') description = _('Read metadata from %s files')%'EPUB'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.zip import get_metadata from calibre.ebooks.metadata.epub import get_metadata
return get_metadata(stream)
class FB2MetadataReader(MetadataReaderPlugin):
name = 'Read FB2 metadata'
file_types = set(['fb2'])
description = _('Read metadata from %s files')%'FB2'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.fb2 import get_metadata
return get_metadata(stream)
class HTMLMetadataReader(MetadataReaderPlugin):
name = 'Read HTML metadata'
file_types = set(['html'])
description = _('Read metadata from %s files')%'HTML'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.html import get_metadata
return get_metadata(stream)
class IMPMetadataReader(MetadataReaderPlugin):
name = 'Read IMP metadata'
file_types = set(['imp'])
description = _('Read metadata from %s files')%'IMP'
author = 'Ashish Kulkarni'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.imp import get_metadata
return get_metadata(stream)
class LITMetadataReader(MetadataReaderPlugin):
name = 'Read LIT metadata'
file_types = set(['lit'])
description = _('Read metadata from %s files')%'LIT'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.lit import get_metadata
return get_metadata(stream)
class LRFMetadataReader(MetadataReaderPlugin):
name = 'Read LRF metadata'
file_types = set(['lrf'])
description = _('Read metadata from %s files')%'LRF'
def get_metadata(self, stream, ftype):
from calibre.ebooks.lrf.meta import get_metadata
return get_metadata(stream)
class LRXMetadataReader(MetadataReaderPlugin):
name = 'Read LRX metadata'
file_types = set(['lrx'])
description = _('Read metadata from %s files')%'LRX'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.lrx import get_metadata
return get_metadata(stream)
class MOBIMetadataReader(MetadataReaderPlugin):
name = 'Read MOBI metadata'
file_types = set(['mobi', 'prc', 'azw'])
description = _('Read metadata from %s files')%'MOBI'
def get_metadata(self, stream, ftype):
from calibre.ebooks.mobi.reader import get_metadata
return get_metadata(stream)
class ODTMetadataReader(MetadataReaderPlugin):
name = 'Read ODT metadata'
file_types = set(['odt'])
description = _('Read metadata from %s files')%'ODT'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.odt import get_metadata
return get_metadata(stream)
class OPFMetadataReader(MetadataReaderPlugin):
name = 'Read OPF metadata'
file_types = set(['opf'])
description = _('Read metadata from %s files')%'OPF'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.metadata import MetaInformation
return MetaInformation(OPF(stream, os.getcwd()))
class PDBMetadataReader(MetadataReaderPlugin):
name = 'Read PDB metadata'
file_types = set(['pdb'])
description = _('Read metadata from %s files') % 'PDB'
author = 'John Schember'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.pdb import get_metadata
return get_metadata(stream)
class PDFMetadataReader(MetadataReaderPlugin):
name = 'Read PDF metadata'
file_types = set(['pdf'])
description = _('Read metadata from %s files')%'PDF'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.pdf import get_metadata
return get_metadata(stream) return get_metadata(stream)
class RARMetadataReader(MetadataReaderPlugin): class RARMetadataReader(MetadataReaderPlugin):
@ -247,6 +195,58 @@ class RARMetadataReader(MetadataReaderPlugin):
from calibre.ebooks.metadata.rar import get_metadata from calibre.ebooks.metadata.rar import get_metadata
return get_metadata(stream) return get_metadata(stream)
class RBMetadataReader(MetadataReaderPlugin):
name = 'Read RB metadata'
file_types = set(['rb'])
description = _('Read metadata from %s files')%'RB'
author = 'Ashish Kulkarni'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.rb import get_metadata
return get_metadata(stream)
class RTFMetadataReader(MetadataReaderPlugin):
name = 'Read RTF metadata'
file_types = set(['rtf'])
description = _('Read metadata from %s files')%'RTF'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.rtf import get_metadata
return get_metadata(stream)
class TOPAZMetadataReader(MetadataReaderPlugin):
name = 'Read Topaz metadata'
file_types = set(['tpz', 'azw1'])
description = _('Read metadata from %s files')%'MOBI'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.topaz import get_metadata
return get_metadata(stream)
class TXTMetadataReader(MetadataReaderPlugin):
name = 'Read TXT metadata'
file_types = set(['txt'])
description = _('Read metadata from %s files') % 'TXT'
author = 'John Schember'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.txt import get_metadata
return get_metadata(stream)
class ZipMetadataReader(MetadataReaderPlugin):
name = 'Read ZIP metadata'
file_types = set(['zip', 'oebzip'])
description = _('Read metadata from ebooks in ZIP archives')
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.zip import get_metadata
return get_metadata(stream)
class EPUBMetadataWriter(MetadataWriterPlugin): class EPUBMetadataWriter(MetadataWriterPlugin):
@ -268,16 +268,6 @@ class LRFMetadataWriter(MetadataWriterPlugin):
from calibre.ebooks.lrf.meta import set_metadata from calibre.ebooks.lrf.meta import set_metadata
set_metadata(stream, mi) set_metadata(stream, mi)
class RTFMetadataWriter(MetadataWriterPlugin):
name = 'Set RTF metadata'
file_types = set(['rtf'])
description = _('Set metadata in %s files')%'RTF'
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.rtf import set_metadata
set_metadata(stream, mi)
class MOBIMetadataWriter(MetadataWriterPlugin): class MOBIMetadataWriter(MetadataWriterPlugin):
name = 'Set MOBI metadata' name = 'Set MOBI metadata'
@ -289,17 +279,6 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
from calibre.ebooks.metadata.mobi import set_metadata from calibre.ebooks.metadata.mobi import set_metadata
set_metadata(stream, mi) set_metadata(stream, mi)
class PDFMetadataWriter(MetadataWriterPlugin):
name = 'Set PDF metadata'
file_types = set(['pdf'])
description = _('Set metadata in %s files') % 'PDF'
author = 'Kovid Goyal'
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.pdf import set_metadata
set_metadata(stream, mi)
class PDBMetadataWriter(MetadataWriterPlugin): class PDBMetadataWriter(MetadataWriterPlugin):
name = 'Set PDB metadata' name = 'Set PDB metadata'
@ -311,49 +290,113 @@ class PDBMetadataWriter(MetadataWriterPlugin):
from calibre.ebooks.metadata.pdb import set_metadata from calibre.ebooks.metadata.pdb import set_metadata
set_metadata(stream, mi) set_metadata(stream, mi)
class PDFMetadataWriter(MetadataWriterPlugin):
name = 'Set PDF metadata'
file_types = set(['pdf'])
description = _('Set metadata in %s files') % 'PDF'
author = 'Kovid Goyal'
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.pdf import set_metadata
set_metadata(stream, mi)
class RTFMetadataWriter(MetadataWriterPlugin):
name = 'Set RTF metadata'
file_types = set(['rtf'])
description = _('Set metadata in %s files')%'RTF'
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.rtf import set_metadata
set_metadata(stream, mi)
from calibre.ebooks.comic.input import ComicInput
from calibre.ebooks.epub.input import EPUBInput from calibre.ebooks.epub.input import EPUBInput
from calibre.ebooks.fb2.input import FB2Input
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.lit.input import LITInput
from calibre.ebooks.mobi.input import MOBIInput from calibre.ebooks.mobi.input import MOBIInput
from calibre.ebooks.odt.input import ODTInput
from calibre.ebooks.pdb.input import PDBInput from calibre.ebooks.pdb.input import PDBInput
from calibre.ebooks.pdf.input import PDFInput from calibre.ebooks.pdf.input import PDFInput
from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lit.input import LITInput
from calibre.ebooks.fb2.input import FB2Input
from calibre.ebooks.fb2.output import FB2Output
from calibre.ebooks.odt.input import ODTInput
from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.comic.input import ComicInput
from calibre.web.feeds.input import RecipeInput
from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.mobi.output import MOBIOutput
from calibre.ebooks.pdb.output import PDBOutput
from calibre.ebooks.lrf.output import LRFOutput
from calibre.ebooks.lit.output import LITOutput
from calibre.ebooks.txt.output import TXTOutput
from calibre.ebooks.pdf.output import PDFOutput
from calibre.ebooks.pml.input import PMLInput from calibre.ebooks.pml.input import PMLInput
from calibre.ebooks.rb.input import RBInput
from calibre.web.feeds.input import RecipeInput
from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.fb2.output import FB2Output
from calibre.ebooks.lit.output import LITOutput
from calibre.ebooks.lrf.output import LRFOutput
from calibre.ebooks.mobi.output import MOBIOutput
from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.pdb.output import PDBOutput
from calibre.ebooks.pdf.output import PDFOutput
from calibre.ebooks.pml.output import PMLOutput from calibre.ebooks.pml.output import PMLOutput
from calibre.ebooks.rb.output import RBOutput
from calibre.ebooks.txt.output import TXTOutput
from calibre.customize.profiles import input_profiles, output_profiles from calibre.customize.profiles import input_profiles, output_profiles
from calibre.devices.bebook.driver import BEBOOK, BEBOOK_MINI
from calibre.devices.blackberry.driver import BLACKBERRY
from calibre.devices.cybookg3.driver import CYBOOKG3
from calibre.devices.eb600.driver import EB600
from calibre.devices.jetbook.driver import JETBOOK
from calibre.devices.kindle.driver import KINDLE
from calibre.devices.kindle.driver import KINDLE2
from calibre.devices.prs500.driver import PRS500 from calibre.devices.prs500.driver import PRS500
from calibre.devices.prs505.driver import PRS505 from calibre.devices.prs505.driver import PRS505
from calibre.devices.prs700.driver import PRS700 from calibre.devices.prs700.driver import PRS700
from calibre.devices.cybookg3.driver import CYBOOKG3
from calibre.devices.kindle.driver import KINDLE
from calibre.devices.kindle.driver import KINDLE2
from calibre.devices.blackberry.driver import BLACKBERRY
from calibre.devices.eb600.driver import EB600
from calibre.devices.jetbook.driver import JETBOOK
from calibre.devices.bebook.driver import BEBOOK, BEBOOK_MINI
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput, plugins = []
FB2Input, FB2Output, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput, plugins += [
PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput] ComicInput,
plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY, EPUBInput,
EB600, JETBOOK, BEBOOK, BEBOOK_MINI] FB2Input,
HTMLInput,
LITInput,
MOBIInput,
ODTInput,
PDBInput,
PDFInput,
PMLInput,
RBInput,
RecipeInput,
RTFInput,
TXTInput,
]
plugins += [
EPUBOutput,
FB2Output,
LITOutput,
LRFOutput,
MOBIOutput,
OEBOutput,
PDBOutput,
PDFOutput,
PMLOutput,
RBOutput,
TXTOutput,
]
plugins += [
BEBOOK,
BEBOOK_MINI,
BLACKBERRY,
CYBOOKG3,
EB600,
JETBOOK,
KINDLE,
KINDLE2,
PRS500,
PRS505,
PRS700,
]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')] x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -0,0 +1,5 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'

View File

@ -9,8 +9,10 @@ Transform OEB content into FB2 markup
''' '''
import os import os
import re
from base64 import b64encode from base64 import b64encode
from calibre import entity_to_unicode
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import OEB_IMAGES from calibre.ebooks.oeb.base import OEB_IMAGES
@ -25,15 +27,9 @@ TAG_MAP = {
'div' : 'p', 'div' : 'p',
} }
STYLE_MAP = {
'bold' : 'strong',
'bolder' : 'strong',
'italic' : 'emphasis',
}
STYLES = [ STYLES = [
'font-weight', ('font-weight', {'bold' : 'strong', 'bolder' : 'strong'}),
'font-style', ('font-style', {'italic' : 'emphasis'}),
] ]
class FB2MLizer(object): class FB2MLizer(object):
@ -81,7 +77,13 @@ class FB2MLizer(object):
return images return images
def clean_text(self, text): def clean_text(self, text):
return text.replace('&', '') for entity in set(re.findall('&.+?;', text)):
mo = re.search('(%s)' % entity[1:-1], text)
text = text.replace(entity, entity_to_unicode(mo))
text = text.replace('&', '')
return text
def dump_text(self, elem, stylizer, tag_stack=[]): def dump_text(self, elem, stylizer, tag_stack=[]):
if not isinstance(elem.tag, basestring) \ if not isinstance(elem.tag, basestring) \
@ -107,8 +109,9 @@ class FB2MLizer(object):
fb2_text += '<%s>' % fb2_tag fb2_text += '<%s>' % fb2_tag
tag_stack.append(fb2_tag) tag_stack.append(fb2_tag)
# Processes style information
for s in STYLES: for s in STYLES:
style_tag = STYLE_MAP.get(style[s], None) style_tag = s[1].get(style[s[0]], None)
if style_tag: if style_tag:
tag_count += 1 tag_count += 1
fb2_text += '<%s>' % style_tag fb2_text += '<%s>' % style_tag

View File

@ -8,11 +8,13 @@ __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re import struct
from calibre.ebooks.metadata import MetaInformation, authors_to_string from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.pdb.header import PdbHeaderReader, PdbHeaderBuilder from calibre.ebooks.metadata import authors_to_string
from calibre.ebooks.pdb.ereader.reader import HeaderRecord from calibre.ebooks.pdb.ereader.reader132 import HeaderRecord
from calibre.ebooks.pdb.header import PdbHeaderBuilder
from calibre.ebooks.pdb.header import PdbHeaderReader
def get_metadata(stream, extract_cover=True): def get_metadata(stream, extract_cover=True):
""" """
@ -22,6 +24,9 @@ def get_metadata(stream, extract_cover=True):
stream.seek(0) stream.seek(0)
pheader = PdbHeaderReader(stream) pheader = PdbHeaderReader(stream)
# Only Dropbook produced 132 byte record0 files are supported
if len(pheader.section_data(0)) == 132:
hr = HeaderRecord(pheader.section_data(0)) hr = HeaderRecord(pheader.section_data(0))
if hr.version in (2, 10) and hr.has_metadata == 1: if hr.version in (2, 10) and hr.has_metadata == 1:
@ -43,6 +48,11 @@ def get_metadata(stream, extract_cover=True):
def set_metadata(stream, mi): def set_metadata(stream, mi):
pheader = PdbHeaderReader(stream) pheader = PdbHeaderReader(stream)
# Only Dropbook produced 132 byte record0 files are supported
if pheader.section_data(0) != 132:
return
sections = [pheader.section_data(x) for x in range(0, pheader.section_count())] sections = [pheader.section_data(x) for x in range(0, pheader.section_count())]
hr = HeaderRecord(sections[0]) hr = HeaderRecord(sections[0])
@ -79,4 +89,3 @@ def set_metadata(stream, mi):
# Write the data back to the file # Write the data back to the file
for item in sections: for item in sections:
stream.write(item) stream.write(item)

View File

@ -38,7 +38,6 @@ def get_metadata(stream, extract_cover=True):
if MetadataReader is None: if MetadataReader is None:
return MetaInformation(pheader.title, [_('Unknown')]) return MetaInformation(pheader.title, [_('Unknown')])
return MetadataReader(stream, extract_cover) return MetadataReader(stream, extract_cover)
def set_metadata(stream, mi): def set_metadata(stream, mi):

View File

@ -1,11 +1,17 @@
from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
Read data from .mobi files Read data from .mobi files
''' '''
import struct, os, cStringIO, re, functools, datetime, textwrap import datetime
import functools
import os
import re
import struct
import textwrap
import cStringIO
try: try:
from PIL import Image as PILImage from PIL import Image as PILImage
@ -21,8 +27,8 @@ from calibre.ebooks import DRMError
from calibre.ebooks.chardet import ENCODING_PATS from calibre.ebooks.chardet import ENCODING_PATS
from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.huffcdic import HuffReader from calibre.ebooks.mobi.huffcdic import HuffReader
from calibre.ebooks.mobi.palmdoc import decompress_doc
from calibre.ebooks.mobi.langcodes import main_language, sub_language from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.compression.palmdoc import decompress_doc
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC

View File

@ -1,27 +1,32 @@
''' '''
Write content to Mobipocket books. Write content to Mobipocket books.
''' '''
from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>' __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
from collections import defaultdict
from itertools import count
from itertools import izip
import random
import re
from struct import pack from struct import pack
import time import time
import random
from cStringIO import StringIO
import re
from itertools import izip, count
from collections import defaultdict
from urlparse import urldefrag from urlparse import urldefrag
from PIL import Image from PIL import Image
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \ from cStringIO import StringIO
OEB_RASTER_IMAGES
from calibre.ebooks.oeb.base import namespace, prefixname
from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.mobi.palmdoc import compress_doc
from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.ebooks.mobi.mobiml import MBP_NS from calibre.ebooks.mobi.mobiml import MBP_NS
from calibre.ebooks.oeb.base import OEB_DOCS
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.base import XHTML_NS
from calibre.ebooks.oeb.base import XML_NS
from calibre.ebooks.oeb.base import namespace
from calibre.ebooks.oeb.base import prefixname
from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.compression.palmdoc import compress_doc
# TODO: # TODO:
# - Allow override CSS (?) # - Allow override CSS (?)

View File

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -10,8 +9,8 @@ class PDBError(Exception):
from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
FORMAT_READERS = { FORMAT_READERS = {
'PNPdPPrs': ereader_reader, 'PNPdPPrs': ereader_reader,

View File

@ -7,10 +7,27 @@ __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import struct, sys import struct
import sys
from calibre.ebooks.pdb.ereader import EreaderError
from calibre.ebooks.pdb.header import PdbHeaderReader from calibre.ebooks.pdb.header import PdbHeaderReader
from calibre.ebooks.pdb.ereader.reader import HeaderRecord
def ereader_header_info(header):
h0 = header.section_data(0)
print 'Header Size: %s' % len(h0)
if len(h0) == 132:
print 'Header Type: Dropbook compatible'
print ''
ereader_header_info132(h0)
elif len(h0) == 202:
print 'Header Type: Makebook compatible'
print ''
ereader_header_info202(h0)
else:
raise EreaderError('Size mismatch. eReader header record size %i KB is not supported.' % len(h0))
def pdb_header_info(header): def pdb_header_info(header):
print 'PDB Header Info:' print 'PDB Header Info:'
@ -20,44 +37,75 @@ def pdb_header_info(header):
print 'Title: %s' % header.title print 'Title: %s' % header.title
print '' print ''
def ereader_header_info(header): def ereader_header_info132(h0):
h0 = header.section_data(0)
print 'Ereader Record 0 (Header) Info:' print 'Ereader Record 0 (Header) Info:'
print '' print ''
print '0-2 Version: %i' % struct.unpack('>H', h0[0:2])[0] print '0-2 Version: %i' % struct.unpack('>H', h0[0:2])[0]
print '2-4: %i' % struct.unpack('>H', h0[2:4])[0] print '2-4: %i' % struct.unpack('>H', h0[2:4])[0]
print '4-6: %i' % struct.unpack('>H', h0[4:6])[0] print '4-6: %i' % struct.unpack('>H', h0[4:6])[0]
print '6-8: %i' % struct.unpack('>H', h0[6:8])[0] print '6-8 Codepage: %i' % struct.unpack('>H', h0[6:8])[0]
print '8-10: %i' % struct.unpack('>H', h0[8:10])[0] print '8-10: %i' % struct.unpack('>H', h0[8:10])[0]
print '10-12: %i' % struct.unpack('>H', h0[10:12])[0] print '10-12: %i' % struct.unpack('>H', h0[10:12])[0]
print '12-14 Non-Text: %i' % struct.unpack('>H', h0[12:14])[0] print '12-14 Non-Text offset: %i' % struct.unpack('>H', h0[12:14])[0]
print '14-16: %i' % struct.unpack('>H', h0[14:16])[0] print '14-16: %i' % struct.unpack('>H', h0[14:16])[0]
print '16-18: %i' % struct.unpack('>H', h0[16:18])[0] print '16-18: %i' % struct.unpack('>H', h0[16:18])[0]
print '18-20: %i' % struct.unpack('>H', h0[18:20])[0] print '18-20: %i' % struct.unpack('>H', h0[18:20])[0]
print '20-22: %i' % struct.unpack('>H', h0[20:22])[0] print '20-22 Image Count: %i' % struct.unpack('>H', h0[20:22])[0]
print '22-24: %i' % struct.unpack('>H', h0[22:24])[0] print '22-24: %i' % struct.unpack('>H', h0[22:24])[0]
print '24-26: %i' % struct.unpack('>H', h0[24:26])[0] print '24-26 Has Metadata?: %i' % struct.unpack('>H', h0[24:26])[0]
print '26-28: %i' % struct.unpack('>H', h0[26:28])[0] print '26-28: %i' % struct.unpack('>H', h0[26:28])[0]
print '28-30 footnote_rec: %i' % struct.unpack('>H', h0[28:30])[0] print '28-30 Footnote Count: %i' % struct.unpack('>H', h0[28:30])[0]
print '30-32 sidebar_rec: %i' % struct.unpack('>H', h0[30:32])[0] print '30-32 Sidebar Count: %i' % struct.unpack('>H', h0[30:32])[0]
print '32-34 bookmark_offset: %i' % struct.unpack('>H', h0[32:34])[0] print '32-34 Bookmark Offset: %i' % struct.unpack('>H', h0[32:34])[0]
print '34-36: %i' % struct.unpack('>H', h0[34:36])[0] print '34-36 MAGIC: %i' % struct.unpack('>H', h0[34:36])[0]
print '36-38: %i' % struct.unpack('>H', h0[36:38])[0] print '36-38: %i' % struct.unpack('>H', h0[36:38])[0]
print '38-40: %i' % struct.unpack('>H', h0[38:40])[0] print '38-40: %i' % struct.unpack('>H', h0[38:40])[0]
print '40-42 image_data_offset: %i' % struct.unpack('>H', h0[40:42])[0] print '40-42 Image Data Offset: %i' % struct.unpack('>H', h0[40:42])[0]
print '42-44: %i' % struct.unpack('>H', h0[42:44])[0] print '42-44: %i' % struct.unpack('>H', h0[42:44])[0]
print '44-46 metadata_offset: %i' % struct.unpack('>H', h0[44:46])[0] print '44-46 Metadata Offset: %i' % struct.unpack('>H', h0[44:46])[0]
print '46-48: %i' % struct.unpack('>H', h0[46:48])[0] print '46-48: %i' % struct.unpack('>H', h0[46:48])[0]
print '48-50 footnote_offset: %i' % struct.unpack('>H', h0[48:50])[0] print '48-50 Footnote Offset: %i' % struct.unpack('>H', h0[48:50])[0]
print '50-52 sidebar_offset: %i' % struct.unpack('>H', h0[50:52])[0] print '50-52 Sidebar Offset: %i' % struct.unpack('>H', h0[50:52])[0]
print '52-54 last_data_offset: %i' % struct.unpack('>H', h0[52:54])[0] print '52-54 Last Data Offset: %i' % struct.unpack('>H', h0[52:54])[0]
for i in range(54, 131, 2): for i in range(54, 131, 2):
print '%i-%i: %i' % (i, i+2, struct.unpack('>H', h0[i:i+2])[0]) print '%i-%i: %i' % (i, i+2, struct.unpack('>H', h0[i:i+2])[0])
print '' print ''
def ereader_header_info202(h0):
print 'Ereader Record 0 (Header) Info:'
print ''
print '0-2 Version: %i' % struct.unpack('>H', h0[0:2])[0]
print '2-4 Garbage: %i' % struct.unpack('>H', h0[2:4])[0]
print '4-6 Garbage: %i' % struct.unpack('>H', h0[4:6])[0]
print '6-8 Garbage: %i' % struct.unpack('>H', h0[6:8])[0]
print '8-10 Non-Text Offset: %i' % struct.unpack('>H', h0[8:10])[0]
print '10-12: %i' % struct.unpack('>H', h0[10:12])[0]
print '12-14: %i' % struct.unpack('>H', h0[12:14])[0]
print '14-16 Garbage: %i' % struct.unpack('>H', h0[14:16])[0]
print '16-18 Garbage: %i' % struct.unpack('>H', h0[16:18])[0]
print '18-20 Garbage: %i' % struct.unpack('>H', h0[18:20])[0]
print '20-22 Garbage: %i' % struct.unpack('>H', h0[20:22])[0]
print '22-24 Garbage: %i' % struct.unpack('>H', h0[22:24])[0]
print '24-26: %i' % struct.unpack('>H', h0[24:26])[0]
print '26-28: %i' % struct.unpack('>H', h0[26:28])[0]
for i in range(28, 98, 2):
print '%i-%i Garbage: %i' % (i, i+2, struct.unpack('>H', h0[i:i+2])[0])
print '98-100: %i' % struct.unpack('>H', h0[98:100])[0]
for i in range(100, 110, 2):
print '%i-%i Garbage: %i' % (i, i+2, struct.unpack('>H', h0[i:i+2])[0])
print '110-112: %i' % struct.unpack('>H', h0[110:112])[0]
print '112-114: %i' % struct.unpack('>H', h0[112:114])[0]
print '114-116 Garbage: %i' % struct.unpack('>H', h0[114:116])[0]
for i in range(116, 202, 2):
print '%i-%i: %i' % (i, i+2, struct.unpack('>H', h0[i:i+2])[0])
print ''
print '* Garbage: Random values.'
print ''
def section_lengths(header): def section_lengths(header):
print 'Section Sizes' print 'Section Sizes'
print '' print ''

View File

@ -8,183 +8,28 @@ __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, re, struct, zlib
from calibre import CurrentDir
from calibre.ebooks import DRMError
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.pdb.ereader import EreaderError from calibre.ebooks.pdb.ereader import EreaderError
from calibre.ebooks.pml.pmlconverter import pml_to_html, \ from calibre.ebooks.pdb.formatreader import FormatReader
footnote_sidebar_to_html from calibre.ebooks.pdb.ereader.reader132 import Reader132
from calibre.ebooks.mobi.palmdoc import decompress_doc from calibre.ebooks.pdb.ereader.reader202 import Reader202
from calibre.ebooks.metadata.opf2 import OPFCreator
class HeaderRecord(object):
'''
The first record in the file is always the header record. It holds
information related to the location of text, images, and so on
in the file. This is used in conjunction with the sections
defined in the file header.
'''
def __init__(self, raw):
self.version, = struct.unpack('>H', raw[0:2])
self.non_text_offset, = struct.unpack('>H', raw[12:14])
self.has_metadata, = struct.unpack('>H', raw[24:26])
self.footnote_rec, = struct.unpack('>H', raw[28:30])
self.sidebar_rec, = struct.unpack('>H', raw[30:32])
self.bookmark_offset, = struct.unpack('>H', raw[32:34])
self.image_data_offset, = struct.unpack('>H', raw[40:42])
self.metadata_offset, = struct.unpack('>H', raw[44:46])
self.footnote_offset, = struct.unpack('>H', raw[48:50])
self.sidebar_offset, = struct.unpack('>H', raw[50:52])
self.last_data_offset, = struct.unpack('>H', raw[52:54])
self.num_text_pages = self.non_text_offset - 1
self.num_image_pages = self.metadata_offset - self.image_data_offset
class Reader(FormatReader): class Reader(FormatReader):
def __init__(self, header, stream, log, encoding=None): def __init__(self, header, stream, log, encoding=None):
self.log = log record0_size = len(header.section_data(0))
self.encoding = encoding
self.sections = [] if record0_size == 132:
for i in range(header.num_sections): self.reader = Reader132(header, stream, log, encoding)
self.sections.append(header.section_data(i)) elif record0_size == 202:
self.reader = Reader202(header, stream, log, encoding)
self.header_record = HeaderRecord(self.section_data(0))
if self.header_record.version not in (2, 10):
if self.header_record.version in (260, 272):
raise DRMError('eReader DRM is not supported.')
else: else:
raise EreaderError('Unknown book version %i.' % self.header_record.version) raise EreaderError('Size mismatch. eReader header record size %s KB is not supported.' % record0_size)
from calibre.ebooks.metadata.pdb import get_metadata
self.mi = get_metadata(stream, False)
def section_data(self, number):
return self.sections[number]
def decompress_text(self, number):
if self.header_record.version == 2:
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
if self.header_record.version == 10:
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
def get_image(self, number):
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
return 'empty', ''
data = self.section_data(number)
name = data[4:4+32].strip('\x00')
img = data[62:]
return name, img
def get_text_page(self, number):
'''
Only palmdoc and zlib compressed are supported. The text is
assumed to be encoded as Windows-1252. The encoding is part of
the eReader file spec and should always be this encoding.
'''
if number not in range(1, self.header_record.num_text_pages + 1):
return ''
return self.decompress_text(number)
def extract_content(self, output_dir): def extract_content(self, output_dir):
output_dir = os.path.abspath(output_dir) return self.reader.extract_content(output_dir)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
html = u'<html><head><title></title></head><body>'
for i in range(1, self.header_record.num_text_pages + 1):
self.log.debug('Extracting text page %i' % i)
html += pml_to_html(self.get_text_page(i))
if self.header_record.footnote_rec > 0:
html += '<br /><h1>%s</h1>' % _('Footnotes')
footnoteids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding))
for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_rec)):
self.log.debug('Extracting footnote page %i' % i)
html += '<dl>'
html += footnote_sidebar_to_html(footnoteids[fid], self.decompress_text(i))
html += '</dl>'
if self.header_record.sidebar_rec > 0:
html += '<br /><h1>%s</h1>' % _('Sidebar')
sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding))
for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_rec)):
self.log.debug('Extracting sidebar page %i' % i)
html += '<dl>'
html += footnote_sidebar_to_html(sidebarids[sid], self.decompress_text(i))
html += '</dl>'
html += '</body></html>'
with CurrentDir(output_dir):
with open('index.html', 'wb') as index:
self.log.debug('Writing text to index.html')
index.write(html.encode('utf-8'))
if not os.path.exists(os.path.join(output_dir, 'images/')):
os.makedirs(os.path.join(output_dir, 'images/'))
images = []
with CurrentDir(os.path.join(output_dir, 'images/')):
for i in range(0, self.header_record.num_image_pages):
name, img = self.get_image(self.header_record.image_data_offset + i)
images.append(name)
with open(name, 'wb') as imgf:
self.log.debug('Writing image %s to images/' % name)
imgf.write(img)
opf_path = self.create_opf(output_dir, images)
return opf_path
def create_opf(self, output_dir, images):
with CurrentDir(output_dir):
opf = OPFCreator(output_dir, self.mi)
manifest = [('index.html', None)]
for i in images:
manifest.append((os.path.join('images/', i), None))
opf.create_manifest(manifest)
opf.create_spine(['index.html'])
with open('metadata.opf', 'wb') as opffile:
opf.render(opffile)
return os.path.join(output_dir, 'metadata.opf')
def dump_pml(self): def dump_pml(self):
''' return self.reader.dump_pml()
This is primarily used for debugging and 3rd party tools to
get the plm markup that comprises the text in the file.
'''
pml = ''
for i in range(1, self.header_record.num_text_pages + 1):
pml += self.get_text_page(i)
return pml
def dump_images(self, output_dir):
'''
This is primarily used for debugging and 3rd party tools to
get the images in the file.
'''
if not os.path.exists(output_dir):
os.makedirs(output_dir)
with CurrentDir(output_dir):
for i in range(0, self.header_record.num_image_pages):
name, img = self.get_image(self.header_record.image_data_offset + i)
with open(name, 'wb') as imgf:
imgf.write(img)
def dump_images(self):
return self.reader.dump_images()

View File

@ -0,0 +1,192 @@
# -*- coding: utf-8 -*-
'''
Read content from ereader pdb file with a 132 byte header created by Dropbook.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
import re
import struct
import zlib
from calibre import CurrentDir
from calibre.ebooks import DRMError
from calibre.ebooks.compression.palmdoc import decompress_doc
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.pdb.ereader import EreaderError
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.pml.pmlconverter import footnote_sidebar_to_html
from calibre.ebooks.pml.pmlconverter import pml_to_html
class HeaderRecord(object):
'''
The first record in the file is always the header record. It holds
information related to the location of text, images, and so on
in the file. This is used in conjunction with the sections
defined in the file header.
'''
def __init__(self, raw):
self.version, = struct.unpack('>H', raw[0:2])
self.non_text_offset, = struct.unpack('>H', raw[12:14])
self.has_metadata, = struct.unpack('>H', raw[24:26])
self.footnote_rec, = struct.unpack('>H', raw[28:30])
self.sidebar_rec, = struct.unpack('>H', raw[30:32])
self.bookmark_offset, = struct.unpack('>H', raw[32:34])
self.image_data_offset, = struct.unpack('>H', raw[40:42])
self.metadata_offset, = struct.unpack('>H', raw[44:46])
self.footnote_offset, = struct.unpack('>H', raw[48:50])
self.sidebar_offset, = struct.unpack('>H', raw[50:52])
self.last_data_offset, = struct.unpack('>H', raw[52:54])
self.num_text_pages = self.non_text_offset - 1
self.num_image_pages = self.metadata_offset - self.image_data_offset
class Reader132(FormatReader):
def __init__(self, header, stream, log, encoding=None):
self.log = log
self.encoding = encoding
self.sections = []
for i in range(header.num_sections):
self.sections.append(header.section_data(i))
self.header_record = HeaderRecord(self.section_data(0))
if self.header_record.version not in (2, 10):
if self.header_record.version in (260, 272):
raise DRMError('eReader DRM is not supported.')
else:
raise EreaderError('Unknown book version %i.' % self.header_record.version)
from calibre.ebooks.metadata.pdb import get_metadata
self.mi = get_metadata(stream, False)
def section_data(self, number):
return self.sections[number]
def decompress_text(self, number):
if self.header_record.version == 2:
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
if self.header_record.version == 10:
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
def get_image(self, number):
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
return 'empty', ''
data = self.section_data(number)
name = data[4:4 + 32].strip('\x00')
img = data[62:]
return name, img
def get_text_page(self, number):
'''
Only palmdoc and zlib compressed are supported. The text is
assumed to be encoded as Windows-1252. The encoding is part of
the eReader file spec and should always be this encoding.
'''
if number not in range(1, self.header_record.num_text_pages + 1):
return ''
return self.decompress_text(number)
def extract_content(self, output_dir):
output_dir = os.path.abspath(output_dir)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
html = u'<html><head><title></title></head><body>'
for i in range(1, self.header_record.num_text_pages + 1):
self.log.debug('Extracting text page %i' % i)
html += pml_to_html(self.get_text_page(i))
if self.header_record.footnote_rec > 0:
html += '<br /><h1>%s</h1>' % _('Footnotes')
footnoteids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding))
for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_rec)):
self.log.debug('Extracting footnote page %i' % i)
html += '<dl>'
html += footnote_sidebar_to_html(footnoteids[fid], self.decompress_text(i))
html += '</dl>'
if self.header_record.sidebar_rec > 0:
html += '<br /><h1>%s</h1>' % _('Sidebar')
sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding))
for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_rec)):
self.log.debug('Extracting sidebar page %i' % i)
html += '<dl>'
html += footnote_sidebar_to_html(sidebarids[sid], self.decompress_text(i))
html += '</dl>'
html += '</body></html>'
with CurrentDir(output_dir):
with open('index.html', 'wb') as index:
self.log.debug('Writing text to index.html')
index.write(html.encode('utf-8'))
if not os.path.exists(os.path.join(output_dir, 'images/')):
os.makedirs(os.path.join(output_dir, 'images/'))
images = []
with CurrentDir(os.path.join(output_dir, 'images/')):
for i in range(0, self.header_record.num_image_pages):
name, img = self.get_image(self.header_record.image_data_offset + i)
images.append(name)
with open(name, 'wb') as imgf:
self.log.debug('Writing image %s to images/' % name)
imgf.write(img)
opf_path = self.create_opf(output_dir, images)
return opf_path
def create_opf(self, output_dir, images):
with CurrentDir(output_dir):
opf = OPFCreator(output_dir, self.mi)
manifest = [('index.html', None)]
for i in images:
manifest.append((os.path.join('images/', i), None))
opf.create_manifest(manifest)
opf.create_spine(['index.html'])
with open('metadata.opf', 'wb') as opffile:
opf.render(opffile)
return os.path.join(output_dir, 'metadata.opf')
def dump_pml(self):
'''
This is primarily used for debugging and 3rd party tools to
get the plm markup that comprises the text in the file.
'''
pml = ''
for i in range(1, self.header_record.num_text_pages + 1):
pml += self.get_text_page(i)
return pml
def dump_images(self, output_dir):
'''
This is primarily used for debugging and 3rd party tools to
get the images in the file.
'''
if not os.path.exists(output_dir):
os.makedirs(output_dir)
with CurrentDir(output_dir):
for i in range(0, self.header_record.num_image_pages):
name, img = self.get_image(self.header_record.image_data_offset + i)
with open(name, 'wb') as imgf:
imgf.write(img)

View File

@ -0,0 +1,157 @@
# -*- coding: utf-8 -*-
'''
Read content from ereader pdb file with a 202 byte header created by Makebook.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
import struct
from calibre import CurrentDir
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.pml.pmlconverter import pml_to_html
from calibre.ebooks.compression.palmdoc import decompress_doc
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.pdb.ereader import EreaderError
class HeaderRecord(object):
'''
The first record in the file is always the header record. It holds
information related to the location of text, images, and so on
in the file. This is used in conjunction with the sections
defined in the file header.
'''
def __init__(self, raw):
self.version, = struct.unpack('>H', raw[0:2])
self.non_text_offset, = struct.unpack('>H', raw[8:10])
self.num_text_pages = self.non_text_offset - 1
class Reader202(FormatReader):
def __init__(self, header, stream, log, encoding=None):
self.log = log
self.encoding = encoding
self.sections = []
for i in range(header.num_sections):
self.sections.append(header.section_data(i))
self.header_record = HeaderRecord(self.section_data(0))
if self.header_record.version != 4:
raise EreaderError('Unknown book version %i.' % self.header_record.version)
from calibre.ebooks.metadata.pdb import get_metadata
self.mi = get_metadata(stream, False)
def section_data(self, number):
return self.sections[number]
def decompress_text(self, number):
return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding)
def get_image(self, number):
name = None
img = None
data = self.section_data(number)
if data.startswith('PNG'):
name = data[4:4 + 32].strip('\x00')
img = data[62:]
return name, img
def get_text_page(self, number):
'''
Only palmdoc compression is supported. The text is xored with 0xA5 and
assumed to be encoded as Windows-1252. The encoding is part of
the eReader file spec and should always be this encoding.
'''
if number not in range(1, self.header_record.num_text_pages + 1):
return ''
return self.decompress_text(number)
def extract_content(self, output_dir):
output_dir = os.path.abspath(output_dir)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
html = u'<html><head><title></title></head><body>'
for i in range(1, self.header_record.num_text_pages + 1):
self.log.debug('Extracting text page %i' % i)
html += pml_to_html(self.get_text_page(i))
html += '</body></html>'
with CurrentDir(output_dir):
with open('index.html', 'wb') as index:
self.log.debug('Writing text to index.html')
index.write(html.encode('utf-8'))
if not os.path.exists(os.path.join(output_dir, 'images/')):
os.makedirs(os.path.join(output_dir, 'images/'))
images = []
with CurrentDir(os.path.join(output_dir, 'images/')):
for i in range(self.header_record.non_text_offset, len(self.sections)):
name, img = self.get_image(i)
if name:
images.append(name)
with open(name, 'wb') as imgf:
self.log.debug('Writing image %s to images/' % name)
imgf.write(img)
opf_path = self.create_opf(output_dir, images)
return opf_path
def create_opf(self, output_dir, images):
with CurrentDir(output_dir):
opf = OPFCreator(output_dir, self.mi)
manifest = [('index.html', None)]
for i in images:
manifest.append((os.path.join('images/', i), None))
opf.create_manifest(manifest)
opf.create_spine(['index.html'])
with open('metadata.opf', 'wb') as opffile:
opf.render(opffile)
return os.path.join(output_dir, 'metadata.opf')
def dump_pml(self):
'''
This is primarily used for debugging and 3rd party tools to
get the plm markup that comprises the text in the file.
'''
pml = ''
for i in range(1, self.header_record.num_text_pages + 1):
pml += self.get_text_page(i)
return pml
def dump_images(self, output_dir):
'''
This is primarily used for debugging and 3rd party tools to
get the images in the file.
'''
if not os.path.exists(output_dir):
os.makedirs(output_dir)
with CurrentDir(output_dir):
for i in range(0, self.header_record.num_image_pages):
name, img = self.get_image(self.header_record.image_data_offset + i)
with open(name, 'wb') as imgf:
imgf.write(img)

View File

@ -8,9 +8,11 @@ __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import struct, zlib import struct
import zlib
import Image, cStringIO import Image
import cStringIO
from calibre.ebooks.pdb.formatwriter import FormatWriter from calibre.ebooks.pdb.formatwriter import FormatWriter
from calibre.ebooks.oeb.base import OEB_IMAGES from calibre.ebooks.oeb.base import OEB_IMAGES
@ -97,7 +99,7 @@ class Writer(FormatWriter):
publisher = '' publisher = ''
isbn = '' isbn = ''
if metadata != None: if metadata:
if len(metadata.title) >= 1: if len(metadata.title) >= 1:
title = metadata.title[0].value title = metadata.title[0].value
if len(metadata.creator) >= 1: if len(metadata.creator) >= 1:

View File

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import with_statement
''' '''
Read the header data from a pdb file. Read the header data from a pdb file.
''' '''
@ -8,7 +7,9 @@ __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, struct, time import re
import struct
import time
class PdbHeaderReader(object): class PdbHeaderReader(object):

View File

@ -8,11 +8,13 @@ __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, struct, zlib import os
import struct
from calibre.ebooks.compression.palmdoc import decompress_doc
from calibre.ebooks.pdb.formatreader import FormatReader from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.mobi.palmdoc import decompress_doc from calibre.ebooks.txt.processor import opf_writer
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer from calibre.ebooks.txt.processor import txt_to_markdown
class HeaderRecord(object): class HeaderRecord(object):
''' '''

View File

@ -10,10 +10,11 @@ __docformat__ = 'restructuredtext en'
import struct import struct
from calibre.ebooks.compression.palmdoc import compress_doc
from calibre.ebooks.pdb.formatwriter import FormatWriter from calibre.ebooks.pdb.formatwriter import FormatWriter
from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines
from calibre.ebooks.mobi.palmdoc import compress_doc
from calibre.ebooks.pdb.header import PdbHeaderBuilder from calibre.ebooks.pdb.header import PdbHeaderBuilder
from calibre.ebooks.txt.writer import TxtNewlines
from calibre.ebooks.txt.writer import TxtWriter
MAX_RECORD_SIZE = 4096 MAX_RECORD_SIZE = 4096

View File

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -23,11 +22,13 @@ class PDFInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
accelerators): accelerators):
html = pdftohtml(stream.name) html = pdftohtml(stream.name)
if self._preprocess_html_for_viewer: if self._preprocess_html_for_viewer:
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
prepro = HTMLPreProcessor(lambda x:x, False) prepro = HTMLPreProcessor(lambda x:x, False)
html = prepro(html.decode('utf-8')).encode('utf-8') html = prepro(html.decode('utf-8')).encode('utf-8')
with open('index.html', 'wb') as index: with open('index.html', 'wb') as index:
index.write(html) index.write(html)

View File

@ -1,12 +1,14 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>, ' \ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>, ' \
'2009, John Schember <john@nachtimwald.com>' '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import errno, os, sys, subprocess import errno
import os
import sys
import subprocess
from functools import partial from functools import partial
from calibre.ebooks import ConversionError, DRMError from calibre.ebooks import ConversionError, DRMError

View File

@ -8,7 +8,8 @@ __docformat__ = 'restructuredtext en'
Transform OEB content into PML markup Transform OEB content into PML markup
''' '''
import os, re import os
import re
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.stylizer import Stylizer
@ -40,6 +41,31 @@ STYLES = [
('text-align', {'right' : 'r', 'center' : 'c'}), ('text-align', {'right' : 'r', 'center' : 'c'}),
] ]
BLOCK_TAGS = [
'p',
]
BLOCK_STYLES = [
'block',
]
LINK_TAGS = [
'a',
]
SEPARATE_TAGS = [
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'p',
'div',
'li',
'tr',
]
class PMLMLizer(object): class PMLMLizer(object):
def __init__(self, ignore_tables=False): def __init__(self, ignore_tables=False):
self.ignore_tables = ignore_tables self.ignore_tables = ignore_tables
@ -62,7 +88,7 @@ class PMLMLizer(object):
def add_page_anchor(self, href): def add_page_anchor(self, href):
href = os.path.splitext(os.path.basename(href))[0] href = os.path.splitext(os.path.basename(href))[0]
return '\\Q="%s"' % href return u'\\Q="%s"' % href
def clean_text(self, text): def clean_text(self, text):
# Remove excess spaces at beginning and end of lines # Remove excess spaces at beginning and end of lines
@ -84,7 +110,8 @@ class PMLMLizer(object):
text = text.replace('\\Q="%s"' % unused, '') text = text.replace('\\Q="%s"' % unused, '')
for entity in set(re.findall('&.+?;', text)): for entity in set(re.findall('&.+?;', text)):
text = text.replace(entity, entity_to_unicode(entity[1:-1])) mo = re.search('(%s)' % entity[1:-1], text)
text = text.replace(entity, entity_to_unicode(mo))
return text return text
@ -104,7 +131,7 @@ class PMLMLizer(object):
tag_count = 0 tag_count = 0
# Are we in a paragraph block? # Are we in a paragraph block?
if tag == 'p' or style['display'] in ('block'): if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
if 'block' not in tag_stack: if 'block' not in tag_stack:
tag_count += 1 tag_count += 1
tag_stack.append('block') tag_stack.append('block')
@ -136,7 +163,7 @@ class PMLMLizer(object):
# Special processing of tags that require an argument. # Special processing of tags that require an argument.
# Anchors links # Anchors links
if tag == 'a' and 'q' not in tag_stack: if tag in LINK_TAGS and 'q' not in tag_stack:
href = elem.get('href') href = elem.get('href')
if href and '://' not in href: if href and '://' not in href:
if '#' in href: if '#' in href:
@ -168,7 +195,7 @@ class PMLMLizer(object):
for i in range(0, tag_count): for i in range(0, tag_count):
close_tag_list.insert(0, tag_stack.pop()) close_tag_list.insert(0, tag_stack.pop())
text += self.close_tags(close_tag_list) text += self.close_tags(close_tag_list)
if tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'li', 'tr'): if tag in SEPARATE_TAGS:
text += os.linesep + os.linesep text += os.linesep + os.linesep
if 'block' not in tag_stack: if 'block' not in tag_stack:

View File

@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
HEADER = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
class RocketBookError(Exception):
pass
def unique_name(name, used_names):
name = os.path.basename(name)
if len(name) < 32 and name not in used_names:
return name
else:
ext = os.path.splitext(name)[1][:3]
base_name = name[:22]
for i in range(0, 9999):
name = '%s-%s.%s' % (str(i).rjust('0', 4)[:4], base_name, ext)
if name not in used_names:
break
return name

View File

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre.ebooks.rb.reader import Reader
from calibre.customize.conversion import InputFormatPlugin
class RBInput(InputFormatPlugin):
name = 'RB Input'
author = 'John Schember'
description = 'Convert RB files to HTML'
file_types = set(['rb'])
def convert(self, stream, options, file_ext, log,
accelerators):
reader = Reader(stream, log, options.input_encoding)
opf = reader.extract_content(os.getcwd())
return opf

View File

@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import OutputFormatPlugin
from calibre.ebooks.rb.writer import RBWriter
class RBOutput(OutputFormatPlugin):
name = 'RB Output'
author = 'John Schember'
file_type = 'rb'
def convert(self, oeb_book, output_path, input_plugin, opts, log):
close = False
if not hasattr(output_path, 'write'):
close = True
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
os.makedirs(os.path.dirname(output_path))
out_stream = open(output_path, 'wb')
else:
out_stream = output_path
writer = RBWriter(opts, log)
out_stream.seek(0)
out_stream.truncate()
writer.write_content(oeb_book, out_stream, oeb_book.metadata)
if close:
out_stream.close()

View File

@ -0,0 +1,166 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Transform OEB content into RB compatible markup.
'''
import os
import re
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
TAGS = [
'b',
'big',
'blockquote',
'br',
'center',
'code',
'div',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'hr',
'i',
'li',
'ol',
'p',
'pre',
'small',
'sub',
'sup',
'ul',
]
LINK_TAGS = [
'a',
]
STYLES = [
('font-weight', {'bold' : 'b', 'bolder' : 'b'}),
('font-style', {'italic' : 'i'}),
('text-align', {'center' : 'center'}),
]
class RBMLizer(object):
def __init__(self, name_map={}, ignore_tables=False):
self.name_map = name_map
self.ignore_tables = ignore_tables
def extract_content(self, oeb_book, opts):
oeb_book.logger.info('Converting XHTML to RB markup...')
self.oeb_book = oeb_book
self.opts = opts
return self.mlize_spine()
def mlize_spine(self):
output = u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>'
for item in self.oeb_book.spine:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
output += self.add_page_anchor(item.href)
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
output += u'</BODY></HTML>'
output = self.clean_text(output)
return output
def add_page_anchor(self, href):
href = os.path.splitext(os.path.basename(href))[0]
return u'<A NAME="%s"></A>' % href
def clean_text(self, text):
# Remove anchors that do not have links
anchors = set(re.findall(r'(?<=<A NAME=").+?(?="></A>)', text))
links = set(re.findall(r'(?<=<A HREF="#).+?(?=">)', text))
for unused in anchors.difference(links):
text = text.replace('<A NAME="%s"></A>' % unused, '')
return text
def dump_text(self, elem, stylizer, tag_stack=[]):
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
return u''
text = u''
style = stylizer.style(elem)
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden':
return u''
tag = barename(elem.tag)
tag_count = 0
# Process tags that need special processing and that do not have inner
# text. Usually these require an argument
if tag == 'img':
src = os.path.basename(elem.get('src'))
name = self.name_map.get(src, src)
text += '<IMG SRC="%s">' % name
rb_tag = tag.upper() if tag in TAGS else None
if rb_tag:
tag_count += 1
text += '<%s>' % rb_tag
tag_stack.append(rb_tag)
if tag in LINK_TAGS:
href = elem.get('href')
if href:
if '://' not in href:
if '#' in href:
href = href.partition('#')[2]
href = os.path.splitext(os.path.basename(href))[0]
tag_count += 1
text += '<A HREF="#%s">' % href
tag_stack.append('A')
# Anchor ids
id_name = elem.get('id')
if id_name:
text += '<A NAME="%s"></A>' % os.path.splitext(id_name)[0]
# Processes style information
for s in STYLES:
style_tag = s[1].get(style[s[0]], None)
if style_tag:
style_tag = style_tag.upper()
tag_count += 1
text += '<%s>' % style_tag
tag_stack.append(style_tag)
# Proccess tags that contain text.
if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
text += elem.text
for item in elem:
text += self.dump_text(item, stylizer, tag_stack)
close_tag_list = []
for i in range(0, tag_count):
close_tag_list.insert(0, tag_stack.pop())
text += self.close_tags(close_tag_list)
if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
text += elem.tail
return text
def close_tags(self, tags):
text = u''
for i in range(0, len(tags)):
tag = tags.pop()
text += '</%s>' % tag
return text

View File

@ -0,0 +1,133 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
import struct
import zlib
from urllib import unquote as urlunquote
from calibre import CurrentDir
from calibre.ebooks.rb import HEADER
from calibre.ebooks.rb import RocketBookError
from calibre.ebooks.metadata.rb import get_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator
class RBToc(list):
class Item(object):
def __init__(self, name='', size=0, offset=0, flags=0):
self.name = name
self.size = size
self.offset = offset
self.flags = flags
class Reader(object):
def __init__(self, stream, log, encoding=None):
self.stream = stream
self.log = log
self.encoding = encoding
self.verify_file()
self.mi = get_metadata(self.stream)
self.toc = self.get_toc()
def read_i32(self):
return struct.unpack('<I', self.stream.read(4))[0]
def verify_file(self):
self.stream.seek(0)
if self.stream.read(14) != HEADER:
raise RocketBookError('Could not read file: %s. Does not contain a valid RocketBook Header.' % self.stream.name)
self.stream.seek(28)
size = self.read_i32()
self.stream.seek(0, os.SEEK_END)
real_size = self.stream.tell()
if size != real_size:
raise RocketBookError('File is corrupt. The file size recorded in the header does not match the actual file size.')
def get_toc(self):
self.stream.seek(24)
toc_offset = self.read_i32()
self.stream.seek(toc_offset)
pages = self.read_i32()
toc = RBToc()
for i in range(pages):
name = urlunquote(self.stream.read(32).strip('\x00'))
size, offset, flags = self.read_i32(), self.read_i32(), self.read_i32()
toc.append(RBToc.Item(name=name, size=size, offset=offset, flags=flags))
return toc
def get_text(self, toc_item, output_dir):
if toc_item.flags in (1, 2):
return
output = u''
self.stream.seek(toc_item.offset)
if toc_item.flags == 8:
count = self.read_i32()
self.read_i32() # Uncompressed size.
chunck_sizes = []
for i in range(count):
chunck_sizes.append(self.read_i32())
for size in chunck_sizes:
cm_chunck = self.stream.read(size)
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding)
else:
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding)
with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
html.write(output.encode('utf-8'))
def get_image(self, toc_item, output_dir):
if toc_item.flags != 0:
return
self.stream.seek(toc_item.offset)
data = self.stream.read(toc_item.size)
with open(os.path.join(output_dir, toc_item.name), 'wb') as img:
img.write(data)
def extract_content(self, output_dir):
html = []
images = []
for item in self.toc:
if item.name.lower().endswith('html'):
html.append(item.name)
self.get_text(item, output_dir)
if item.name.lower().endswith('png'):
images.append(item.name)
self.get_image(item, output_dir)
opf_path = self.create_opf(output_dir, html, images)
return opf_path
def create_opf(self, output_dir, pages, images):
with CurrentDir(output_dir):
opf = OPFCreator(output_dir, self.mi)
manifest = []
for page in pages+images:
manifest.append((page, None))
opf.create_manifest(manifest)
opf.create_spine(pages)
with open('metadata.opf', 'wb') as opffile:
opf.render(opffile)
return os.path.join(output_dir, 'metadata.opf')

View File

@ -0,0 +1,143 @@
import os.path
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
import struct
import zlib
import Image
import cStringIO
from calibre.ebooks.rb.rbml import RBMLizer
from calibre.ebooks.rb import HEADER
from calibre.ebooks.rb import unique_name
from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.constants import __appname__, __version__
TEXT_RECORD_SIZE = 4096
class TocItem(object):
def __init__(self, name, size, flags):
self.name = name
self.size = size
self.flags = flags
class RBWriter(object):
def __init__(self, opts, log):
self.opts = opts
self.log = log
self.name_map = {}
def write_content(self, oeb_book, out_stream, metadata=None):
info = [('info.info', self._info_section(metadata))]
images = self._images(oeb_book.manifest)
text_size, chuncks = self._text(oeb_book)
chunck_sizes = [len(x) for x in chuncks]
text = [('index.html', chuncks)]
hidx = [('index.hidx', ' ')]
toc_items = []
page_count = 0
for name, data in info+text+hidx+images:
page_count += 1
size = len(data)
if (name, data) in text:
flags = 8
size = 0
for c in chunck_sizes:
size += c
size += 8 + (len(chunck_sizes) * 4)
elif (name, data) in info:
flags = 2
else:
flags = 0
toc_items.append(TocItem(name.ljust(32, '\x00')[:32], size, flags))
out_stream.write(HEADER)
out_stream.write(struct.pack('<I', 0))
out_stream.write(struct.pack('<IH', 0, 0))
out_stream.write(struct.pack('<I', 0x128))
out_stream.write(struct.pack('<I', 0))
for i in range(0x20, 0x128, 4):
out_stream.write(struct.pack('<I', 0))
out_stream.write(struct.pack('<I', page_count))
offset = out_stream.tell() + (len(toc_items) * 44)
for item in toc_items:
out_stream.write(item.name)
out_stream.write(struct.pack('<I', item.size))
out_stream.write(struct.pack('<I', offset))
out_stream.write(struct.pack('<I', item.flags))
offset += item.size
out_stream.write(info[0][1])
# Compressed text with proper heading
out_stream.write(struct.pack('<I', len(text[0][1])))
out_stream.write(struct.pack('<I', text_size))
for size in chunck_sizes:
out_stream.write(struct.pack('<I', size))
for chunck in text[0][1]:
out_stream.write(chunck)
for item in hidx+images:
out_stream.write(item[1])
total_size = out_stream.tell()
out_stream.seek(0x1c)
out_stream.write(struct.pack('<I', total_size))
def _text(self, oeb_book):
rbmlizer = RBMLizer(name_map=self.name_map, ignore_tables=self.opts.linearize_tables)
text = rbmlizer.extract_content(oeb_book, self.opts).encode('cp1252', 'xmlcharrefreplace')
size = len(text)
pages = []
for i in range(0, (len(text) / TEXT_RECORD_SIZE) + 1):
pages.append(zlib.compress(text[i * TEXT_RECORD_SIZE : (i * TEXT_RECORD_SIZE) + TEXT_RECORD_SIZE], 9))
return (size, pages)
def _images(self, manifest):
images = []
used_names = []
for item in manifest:
if item.media_type in OEB_IMAGES:
data = ''
im = Image.open(cStringIO.StringIO(item.data)).convert('L')
data = cStringIO.StringIO()
im.save(data, 'PNG')
data = data.getvalue()
name = '%s.png' % os.path.splitext(os.path.basename(item.href))[0]
name = unique_name(name, used_names)
used_names.append(name)
self.name_map[os.path.basename(item.href)] = name
images.append((name, data))
return images
def _info_section(self, metadata):
text = 'TYPE=2\n'
if metadata:
if len(metadata.title) >= 1:
text += 'TITLE=%s\n' % metadata.title[0].value
if len(metadata.creator) >= 1:
from calibre.ebooks.metadata import authors_to_string
text += 'AUTHOR=%s\n' % authors_to_string([x.value for x in metadata.creator])
text += 'GENERATOR=%s - %s\n' % (__appname__, __version__)
text += 'PARSE=1\n'
text += 'OUTPUT=1\n'
text += 'BODY=index.html\n'
return text

View File

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'

View File

@ -1,15 +1,17 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import with_statement
'''
Write content to TXT.
'''
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, re, sys '''
Write content to TXT.
'''
import os
import re
from calibre import entity_to_unicode
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
from BeautifulSoup import BeautifulSoup from BeautifulSoup import BeautifulSoup
@ -83,6 +85,11 @@ class TxtWriter(object):
for symbol in HTML_SYMBOLS: for symbol in HTML_SYMBOLS:
for code in HTML_SYMBOLS[symbol]: for code in HTML_SYMBOLS[symbol]:
content = content.replace(code, symbol) content = content.replace(code, symbol)
for entity in set(re.findall('&.+?;', content)):
mo = re.search('(%s)' % entity[1:-1], content)
content = content.replace(entity, entity_to_unicode(mo))
return content return content
def cleanup_text(self, text): def cleanup_text(self, text):

View File

@ -640,15 +640,15 @@ class DeviceGUI(object):
', '.join(sent_mails), 3000) ', '.join(sent_mails), 3000)
def sync_news(self, send_ids=None, do_auto=True): def sync_news(self, send_ids=None, do_auto_convert=True):
if self.device_connected: if self.device_connected:
ids = list(dynamic.get('news_to_be_synced', set([]))) if send_ids is None else send_ids ids = list(dynamic.get('news_to_be_synced', set([]))) if send_ids is None else send_ids
ids = [id for id in ids if self.library_view.model().db.has_id(id)] ids = [id for id in ids if self.library_view.model().db.has_id(id)]
files, _auto_ids = self.library_view.model().get_preferred_formats_from_ids( files, _auto_ids = self.library_view.model().get_preferred_formats_from_ids(
ids, self.device_manager.device_class.settings().format_map, ids, self.device_manager.device_class.settings().format_map,
exclude_auto=do_auto) exclude_auto=do_auto_convert)
auto = [] auto = []
if _auto_ids: if do_auto_convert and _auto_ids:
for id in _auto_ids: for id in _auto_ids:
formats = [f.lower() for f in self.library_view.model().db.formats(id, index_is_id=True).split(',')] formats = [f.lower() for f in self.library_view.model().db.formats(id, index_is_id=True).split(',')]
formats = formats if formats != None else [] formats = formats if formats != None else []

View File

@ -133,7 +133,7 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
self._map = dict(self.category_map) self._map = dict(self.category_map)
def scheduled_recipes(self): def scheduled_recipes(self):
for recipe in self.category_map[_('Scheduled')]: for recipe in self.category_map.get(_('Scheduled'), []):
yield recipe yield recipe
def sort_categories(self, x, y): def sort_categories(self, x, y):