mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Pull from driver-dev
This commit is contained in:
commit
c90c086117
@ -13,6 +13,7 @@ src/calibre/manual/cli/
|
|||||||
build
|
build
|
||||||
dist
|
dist
|
||||||
docs
|
docs
|
||||||
|
nbproject/
|
||||||
src/calibre/gui2/pictureflow/Makefile.Debug
|
src/calibre/gui2/pictureflow/Makefile.Debug
|
||||||
src/calibre/gui2/pictureflow/Makefile.Release
|
src/calibre/gui2/pictureflow/Makefile.Release
|
||||||
src/calibre/gui2/pictureflow/debug/
|
src/calibre/gui2/pictureflow/debug/
|
||||||
|
2
setup.py
2
setup.py
@ -89,7 +89,7 @@ if __name__ == '__main__':
|
|||||||
include_dirs=['src/calibre/utils/msdes']),
|
include_dirs=['src/calibre/utils/msdes']),
|
||||||
|
|
||||||
Extension('calibre.plugins.cPalmdoc',
|
Extension('calibre.plugins.cPalmdoc',
|
||||||
sources=['src/calibre/ebooks/mobi/palmdoc.c']),
|
sources=['src/calibre/ebooks/compression/palmdoc.c']),
|
||||||
|
|
||||||
PyQtExtension('calibre.plugins.pictureflow',
|
PyQtExtension('calibre.plugins.pictureflow',
|
||||||
['src/calibre/gui2/pictureflow/pictureflow.cpp',
|
['src/calibre/gui2/pictureflow/pictureflow.cpp',
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import textwrap, os, glob
|
import textwrap
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
|
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
|
||||||
from calibre.constants import __version__
|
from calibre.constants import __version__
|
||||||
|
|
||||||
@ -39,172 +40,6 @@ every time you add an HTML file to the library.\
|
|||||||
|
|
||||||
return of.name
|
return of.name
|
||||||
|
|
||||||
class OPFMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read OPF metadata'
|
|
||||||
file_types = set(['opf'])
|
|
||||||
description = _('Read metadata from %s files')%'OPF'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.opf2 import OPF
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
|
||||||
return MetaInformation(OPF(stream, os.getcwd()))
|
|
||||||
|
|
||||||
class RTFMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read RTF metadata'
|
|
||||||
file_types = set(['rtf'])
|
|
||||||
description = _('Read metadata from %s files')%'RTF'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.rtf import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class FB2MetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read FB2 metadata'
|
|
||||||
file_types = set(['fb2'])
|
|
||||||
description = _('Read metadata from %s files')%'FB2'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.fb2 import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
|
|
||||||
class LRFMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read LRF metadata'
|
|
||||||
file_types = set(['lrf'])
|
|
||||||
description = _('Read metadata from %s files')%'LRF'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.lrf.meta import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class PDFMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read PDF metadata'
|
|
||||||
file_types = set(['pdf'])
|
|
||||||
description = _('Read metadata from %s files')%'PDF'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.pdf import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class LITMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read LIT metadata'
|
|
||||||
file_types = set(['lit'])
|
|
||||||
description = _('Read metadata from %s files')%'LIT'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.lit import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class IMPMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read IMP metadata'
|
|
||||||
file_types = set(['imp'])
|
|
||||||
description = _('Read metadata from %s files')%'IMP'
|
|
||||||
author = 'Ashish Kulkarni'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.imp import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class RBMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read RB metadata'
|
|
||||||
file_types = set(['rb'])
|
|
||||||
description = _('Read metadata from %s files')%'RB'
|
|
||||||
author = 'Ashish Kulkarni'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.rb import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class EPUBMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read EPUB metadata'
|
|
||||||
file_types = set(['epub'])
|
|
||||||
description = _('Read metadata from %s files')%'EPUB'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.epub import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class HTMLMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read HTML metadata'
|
|
||||||
file_types = set(['html'])
|
|
||||||
description = _('Read metadata from %s files')%'HTML'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.html import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class MOBIMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read MOBI metadata'
|
|
||||||
file_types = set(['mobi', 'prc', 'azw'])
|
|
||||||
description = _('Read metadata from %s files')%'MOBI'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.mobi.reader import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
|
|
||||||
class TOPAZMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read Topaz metadata'
|
|
||||||
file_types = set(['tpz', 'azw1'])
|
|
||||||
description = _('Read metadata from %s files')%'MOBI'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.topaz import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class ODTMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read ODT metadata'
|
|
||||||
file_types = set(['odt'])
|
|
||||||
description = _('Read metadata from %s files')%'ODT'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.odt import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class TXTMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read TXT metadata'
|
|
||||||
file_types = set(['txt'])
|
|
||||||
description = _('Read metadata from %s files') % 'TXT'
|
|
||||||
author = 'John Schember'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.txt import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class PDBMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read PDB metadata'
|
|
||||||
file_types = set(['pdb'])
|
|
||||||
description = _('Read metadata from %s files') % 'PDB'
|
|
||||||
author = 'John Schember'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.pdb import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class LRXMetadataReader(MetadataReaderPlugin):
|
|
||||||
|
|
||||||
name = 'Read LRX metadata'
|
|
||||||
file_types = set(['lrx'])
|
|
||||||
description = _('Read metadata from %s files')%'LRX'
|
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
|
||||||
from calibre.ebooks.metadata.lrx import get_metadata
|
|
||||||
return get_metadata(stream)
|
|
||||||
|
|
||||||
class ComicMetadataReader(MetadataReaderPlugin):
|
class ComicMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
@ -227,14 +62,127 @@ class ComicMetadataReader(MetadataReaderPlugin):
|
|||||||
mi.cover_data = (ext.lower(), data)
|
mi.cover_data = (ext.lower(), data)
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
class ZipMetadataReader(MetadataReaderPlugin):
|
class EPUBMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
name = 'Read ZIP metadata'
|
name = 'Read EPUB metadata'
|
||||||
file_types = set(['zip', 'oebzip'])
|
file_types = set(['epub'])
|
||||||
description = _('Read metadata from ebooks in ZIP archives')
|
description = _('Read metadata from %s files')%'EPUB'
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
def get_metadata(self, stream, ftype):
|
||||||
from calibre.ebooks.metadata.zip import get_metadata
|
from calibre.ebooks.metadata.epub import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class FB2MetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read FB2 metadata'
|
||||||
|
file_types = set(['fb2'])
|
||||||
|
description = _('Read metadata from %s files')%'FB2'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.fb2 import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class HTMLMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read HTML metadata'
|
||||||
|
file_types = set(['html'])
|
||||||
|
description = _('Read metadata from %s files')%'HTML'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.html import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class IMPMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read IMP metadata'
|
||||||
|
file_types = set(['imp'])
|
||||||
|
description = _('Read metadata from %s files')%'IMP'
|
||||||
|
author = 'Ashish Kulkarni'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.imp import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class LITMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read LIT metadata'
|
||||||
|
file_types = set(['lit'])
|
||||||
|
description = _('Read metadata from %s files')%'LIT'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.lit import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class LRFMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read LRF metadata'
|
||||||
|
file_types = set(['lrf'])
|
||||||
|
description = _('Read metadata from %s files')%'LRF'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.lrf.meta import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class LRXMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read LRX metadata'
|
||||||
|
file_types = set(['lrx'])
|
||||||
|
description = _('Read metadata from %s files')%'LRX'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.lrx import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class MOBIMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read MOBI metadata'
|
||||||
|
file_types = set(['mobi', 'prc', 'azw'])
|
||||||
|
description = _('Read metadata from %s files')%'MOBI'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.mobi.reader import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class ODTMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read ODT metadata'
|
||||||
|
file_types = set(['odt'])
|
||||||
|
description = _('Read metadata from %s files')%'ODT'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.odt import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class OPFMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read OPF metadata'
|
||||||
|
file_types = set(['opf'])
|
||||||
|
description = _('Read metadata from %s files')%'OPF'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
return MetaInformation(OPF(stream, os.getcwd()))
|
||||||
|
|
||||||
|
class PDBMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read PDB metadata'
|
||||||
|
file_types = set(['pdb'])
|
||||||
|
description = _('Read metadata from %s files') % 'PDB'
|
||||||
|
author = 'John Schember'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.pdb import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class PDFMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read PDF metadata'
|
||||||
|
file_types = set(['pdf'])
|
||||||
|
description = _('Read metadata from %s files')%'PDF'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.pdf import get_metadata
|
||||||
return get_metadata(stream)
|
return get_metadata(stream)
|
||||||
|
|
||||||
class RARMetadataReader(MetadataReaderPlugin):
|
class RARMetadataReader(MetadataReaderPlugin):
|
||||||
@ -247,6 +195,58 @@ class RARMetadataReader(MetadataReaderPlugin):
|
|||||||
from calibre.ebooks.metadata.rar import get_metadata
|
from calibre.ebooks.metadata.rar import get_metadata
|
||||||
return get_metadata(stream)
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class RBMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read RB metadata'
|
||||||
|
file_types = set(['rb'])
|
||||||
|
description = _('Read metadata from %s files')%'RB'
|
||||||
|
author = 'Ashish Kulkarni'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.rb import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class RTFMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read RTF metadata'
|
||||||
|
file_types = set(['rtf'])
|
||||||
|
description = _('Read metadata from %s files')%'RTF'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.rtf import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class TOPAZMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read Topaz metadata'
|
||||||
|
file_types = set(['tpz', 'azw1'])
|
||||||
|
description = _('Read metadata from %s files')%'MOBI'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.topaz import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class TXTMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read TXT metadata'
|
||||||
|
file_types = set(['txt'])
|
||||||
|
description = _('Read metadata from %s files') % 'TXT'
|
||||||
|
author = 'John Schember'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.txt import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class ZipMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read ZIP metadata'
|
||||||
|
file_types = set(['zip', 'oebzip'])
|
||||||
|
description = _('Read metadata from ebooks in ZIP archives')
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.zip import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
|
||||||
class EPUBMetadataWriter(MetadataWriterPlugin):
|
class EPUBMetadataWriter(MetadataWriterPlugin):
|
||||||
|
|
||||||
@ -268,16 +268,6 @@ class LRFMetadataWriter(MetadataWriterPlugin):
|
|||||||
from calibre.ebooks.lrf.meta import set_metadata
|
from calibre.ebooks.lrf.meta import set_metadata
|
||||||
set_metadata(stream, mi)
|
set_metadata(stream, mi)
|
||||||
|
|
||||||
class RTFMetadataWriter(MetadataWriterPlugin):
|
|
||||||
|
|
||||||
name = 'Set RTF metadata'
|
|
||||||
file_types = set(['rtf'])
|
|
||||||
description = _('Set metadata in %s files')%'RTF'
|
|
||||||
|
|
||||||
def set_metadata(self, stream, mi, type):
|
|
||||||
from calibre.ebooks.metadata.rtf import set_metadata
|
|
||||||
set_metadata(stream, mi)
|
|
||||||
|
|
||||||
class MOBIMetadataWriter(MetadataWriterPlugin):
|
class MOBIMetadataWriter(MetadataWriterPlugin):
|
||||||
|
|
||||||
name = 'Set MOBI metadata'
|
name = 'Set MOBI metadata'
|
||||||
@ -289,17 +279,6 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
|
|||||||
from calibre.ebooks.metadata.mobi import set_metadata
|
from calibre.ebooks.metadata.mobi import set_metadata
|
||||||
set_metadata(stream, mi)
|
set_metadata(stream, mi)
|
||||||
|
|
||||||
class PDFMetadataWriter(MetadataWriterPlugin):
|
|
||||||
|
|
||||||
name = 'Set PDF metadata'
|
|
||||||
file_types = set(['pdf'])
|
|
||||||
description = _('Set metadata in %s files') % 'PDF'
|
|
||||||
author = 'Kovid Goyal'
|
|
||||||
|
|
||||||
def set_metadata(self, stream, mi, type):
|
|
||||||
from calibre.ebooks.metadata.pdf import set_metadata
|
|
||||||
set_metadata(stream, mi)
|
|
||||||
|
|
||||||
class PDBMetadataWriter(MetadataWriterPlugin):
|
class PDBMetadataWriter(MetadataWriterPlugin):
|
||||||
|
|
||||||
name = 'Set PDB metadata'
|
name = 'Set PDB metadata'
|
||||||
@ -311,49 +290,113 @@ class PDBMetadataWriter(MetadataWriterPlugin):
|
|||||||
from calibre.ebooks.metadata.pdb import set_metadata
|
from calibre.ebooks.metadata.pdb import set_metadata
|
||||||
set_metadata(stream, mi)
|
set_metadata(stream, mi)
|
||||||
|
|
||||||
|
class PDFMetadataWriter(MetadataWriterPlugin):
|
||||||
|
|
||||||
|
name = 'Set PDF metadata'
|
||||||
|
file_types = set(['pdf'])
|
||||||
|
description = _('Set metadata in %s files') % 'PDF'
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
|
||||||
|
def set_metadata(self, stream, mi, type):
|
||||||
|
from calibre.ebooks.metadata.pdf import set_metadata
|
||||||
|
set_metadata(stream, mi)
|
||||||
|
|
||||||
|
class RTFMetadataWriter(MetadataWriterPlugin):
|
||||||
|
|
||||||
|
name = 'Set RTF metadata'
|
||||||
|
file_types = set(['rtf'])
|
||||||
|
description = _('Set metadata in %s files')%'RTF'
|
||||||
|
|
||||||
|
def set_metadata(self, stream, mi, type):
|
||||||
|
from calibre.ebooks.metadata.rtf import set_metadata
|
||||||
|
set_metadata(stream, mi)
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.ebooks.comic.input import ComicInput
|
||||||
from calibre.ebooks.epub.input import EPUBInput
|
from calibre.ebooks.epub.input import EPUBInput
|
||||||
|
from calibre.ebooks.fb2.input import FB2Input
|
||||||
|
from calibre.ebooks.html.input import HTMLInput
|
||||||
|
from calibre.ebooks.lit.input import LITInput
|
||||||
from calibre.ebooks.mobi.input import MOBIInput
|
from calibre.ebooks.mobi.input import MOBIInput
|
||||||
|
from calibre.ebooks.odt.input import ODTInput
|
||||||
from calibre.ebooks.pdb.input import PDBInput
|
from calibre.ebooks.pdb.input import PDBInput
|
||||||
from calibre.ebooks.pdf.input import PDFInput
|
from calibre.ebooks.pdf.input import PDFInput
|
||||||
from calibre.ebooks.txt.input import TXTInput
|
|
||||||
from calibre.ebooks.lit.input import LITInput
|
|
||||||
from calibre.ebooks.fb2.input import FB2Input
|
|
||||||
from calibre.ebooks.fb2.output import FB2Output
|
|
||||||
from calibre.ebooks.odt.input import ODTInput
|
|
||||||
from calibre.ebooks.rtf.input import RTFInput
|
|
||||||
from calibre.ebooks.html.input import HTMLInput
|
|
||||||
from calibre.ebooks.comic.input import ComicInput
|
|
||||||
from calibre.web.feeds.input import RecipeInput
|
|
||||||
from calibre.ebooks.oeb.output import OEBOutput
|
|
||||||
from calibre.ebooks.epub.output import EPUBOutput
|
|
||||||
from calibre.ebooks.mobi.output import MOBIOutput
|
|
||||||
from calibre.ebooks.pdb.output import PDBOutput
|
|
||||||
from calibre.ebooks.lrf.output import LRFOutput
|
|
||||||
from calibre.ebooks.lit.output import LITOutput
|
|
||||||
from calibre.ebooks.txt.output import TXTOutput
|
|
||||||
from calibre.ebooks.pdf.output import PDFOutput
|
|
||||||
from calibre.ebooks.pml.input import PMLInput
|
from calibre.ebooks.pml.input import PMLInput
|
||||||
|
from calibre.ebooks.rb.input import RBInput
|
||||||
|
from calibre.web.feeds.input import RecipeInput
|
||||||
|
from calibre.ebooks.rtf.input import RTFInput
|
||||||
|
from calibre.ebooks.txt.input import TXTInput
|
||||||
|
|
||||||
|
from calibre.ebooks.epub.output import EPUBOutput
|
||||||
|
from calibre.ebooks.fb2.output import FB2Output
|
||||||
|
from calibre.ebooks.lit.output import LITOutput
|
||||||
|
from calibre.ebooks.lrf.output import LRFOutput
|
||||||
|
from calibre.ebooks.mobi.output import MOBIOutput
|
||||||
|
from calibre.ebooks.oeb.output import OEBOutput
|
||||||
|
from calibre.ebooks.pdb.output import PDBOutput
|
||||||
|
from calibre.ebooks.pdf.output import PDFOutput
|
||||||
from calibre.ebooks.pml.output import PMLOutput
|
from calibre.ebooks.pml.output import PMLOutput
|
||||||
|
from calibre.ebooks.rb.output import RBOutput
|
||||||
|
from calibre.ebooks.txt.output import TXTOutput
|
||||||
|
|
||||||
from calibre.customize.profiles import input_profiles, output_profiles
|
from calibre.customize.profiles import input_profiles, output_profiles
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.devices.bebook.driver import BEBOOK, BEBOOK_MINI
|
||||||
|
from calibre.devices.blackberry.driver import BLACKBERRY
|
||||||
|
from calibre.devices.cybookg3.driver import CYBOOKG3
|
||||||
|
from calibre.devices.eb600.driver import EB600
|
||||||
|
from calibre.devices.jetbook.driver import JETBOOK
|
||||||
|
from calibre.devices.kindle.driver import KINDLE
|
||||||
|
from calibre.devices.kindle.driver import KINDLE2
|
||||||
from calibre.devices.prs500.driver import PRS500
|
from calibre.devices.prs500.driver import PRS500
|
||||||
from calibre.devices.prs505.driver import PRS505
|
from calibre.devices.prs505.driver import PRS505
|
||||||
from calibre.devices.prs700.driver import PRS700
|
from calibre.devices.prs700.driver import PRS700
|
||||||
from calibre.devices.cybookg3.driver import CYBOOKG3
|
|
||||||
from calibre.devices.kindle.driver import KINDLE
|
|
||||||
from calibre.devices.kindle.driver import KINDLE2
|
|
||||||
from calibre.devices.blackberry.driver import BLACKBERRY
|
|
||||||
from calibre.devices.eb600.driver import EB600
|
|
||||||
from calibre.devices.jetbook.driver import JETBOOK
|
|
||||||
from calibre.devices.bebook.driver import BEBOOK, BEBOOK_MINI
|
|
||||||
|
|
||||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
|
||||||
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
|
plugins = []
|
||||||
FB2Input, FB2Output, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
|
plugins += [
|
||||||
PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput]
|
ComicInput,
|
||||||
plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
|
EPUBInput,
|
||||||
EB600, JETBOOK, BEBOOK, BEBOOK_MINI]
|
FB2Input,
|
||||||
|
HTMLInput,
|
||||||
|
LITInput,
|
||||||
|
MOBIInput,
|
||||||
|
ODTInput,
|
||||||
|
PDBInput,
|
||||||
|
PDFInput,
|
||||||
|
PMLInput,
|
||||||
|
RBInput,
|
||||||
|
RecipeInput,
|
||||||
|
RTFInput,
|
||||||
|
TXTInput,
|
||||||
|
]
|
||||||
|
plugins += [
|
||||||
|
EPUBOutput,
|
||||||
|
FB2Output,
|
||||||
|
LITOutput,
|
||||||
|
LRFOutput,
|
||||||
|
MOBIOutput,
|
||||||
|
OEBOutput,
|
||||||
|
PDBOutput,
|
||||||
|
PDFOutput,
|
||||||
|
PMLOutput,
|
||||||
|
RBOutput,
|
||||||
|
TXTOutput,
|
||||||
|
]
|
||||||
|
plugins += [
|
||||||
|
BEBOOK,
|
||||||
|
BEBOOK_MINI,
|
||||||
|
BLACKBERRY,
|
||||||
|
CYBOOKG3,
|
||||||
|
EB600,
|
||||||
|
JETBOOK,
|
||||||
|
KINDLE,
|
||||||
|
KINDLE2,
|
||||||
|
PRS500,
|
||||||
|
PRS505,
|
||||||
|
PRS700,
|
||||||
|
]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
|
5
src/calibre/ebooks/compression/__init__.py
Normal file
5
src/calibre/ebooks/compression/__init__.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
@ -9,8 +9,10 @@ Transform OEB content into FB2 markup
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
|
|
||||||
|
from calibre import entity_to_unicode
|
||||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
from calibre.ebooks.oeb.base import OEB_IMAGES
|
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||||
@ -25,15 +27,9 @@ TAG_MAP = {
|
|||||||
'div' : 'p',
|
'div' : 'p',
|
||||||
}
|
}
|
||||||
|
|
||||||
STYLE_MAP = {
|
|
||||||
'bold' : 'strong',
|
|
||||||
'bolder' : 'strong',
|
|
||||||
'italic' : 'emphasis',
|
|
||||||
}
|
|
||||||
|
|
||||||
STYLES = [
|
STYLES = [
|
||||||
'font-weight',
|
('font-weight', {'bold' : 'strong', 'bolder' : 'strong'}),
|
||||||
'font-style',
|
('font-style', {'italic' : 'emphasis'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
class FB2MLizer(object):
|
class FB2MLizer(object):
|
||||||
@ -81,7 +77,13 @@ class FB2MLizer(object):
|
|||||||
return images
|
return images
|
||||||
|
|
||||||
def clean_text(self, text):
|
def clean_text(self, text):
|
||||||
return text.replace('&', '')
|
for entity in set(re.findall('&.+?;', text)):
|
||||||
|
mo = re.search('(%s)' % entity[1:-1], text)
|
||||||
|
text = text.replace(entity, entity_to_unicode(mo))
|
||||||
|
|
||||||
|
text = text.replace('&', '')
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
def dump_text(self, elem, stylizer, tag_stack=[]):
|
def dump_text(self, elem, stylizer, tag_stack=[]):
|
||||||
if not isinstance(elem.tag, basestring) \
|
if not isinstance(elem.tag, basestring) \
|
||||||
@ -107,8 +109,9 @@ class FB2MLizer(object):
|
|||||||
fb2_text += '<%s>' % fb2_tag
|
fb2_text += '<%s>' % fb2_tag
|
||||||
tag_stack.append(fb2_tag)
|
tag_stack.append(fb2_tag)
|
||||||
|
|
||||||
|
# Processes style information
|
||||||
for s in STYLES:
|
for s in STYLES:
|
||||||
style_tag = STYLE_MAP.get(style[s], None)
|
style_tag = s[1].get(style[s[0]], None)
|
||||||
if style_tag:
|
if style_tag:
|
||||||
tag_count += 1
|
tag_count += 1
|
||||||
fb2_text += '<%s>' % style_tag
|
fb2_text += '<%s>' % style_tag
|
||||||
|
@ -8,11 +8,13 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
import struct
|
||||||
|
|
||||||
from calibre.ebooks.metadata import MetaInformation, authors_to_string
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.ebooks.pdb.header import PdbHeaderReader, PdbHeaderBuilder
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
from calibre.ebooks.pdb.ereader.reader import HeaderRecord
|
from calibre.ebooks.pdb.ereader.reader132 import HeaderRecord
|
||||||
|
from calibre.ebooks.pdb.header import PdbHeaderBuilder
|
||||||
|
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||||
|
|
||||||
def get_metadata(stream, extract_cover=True):
|
def get_metadata(stream, extract_cover=True):
|
||||||
"""
|
"""
|
||||||
@ -22,19 +24,22 @@ def get_metadata(stream, extract_cover=True):
|
|||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
|
|
||||||
pheader = PdbHeaderReader(stream)
|
pheader = PdbHeaderReader(stream)
|
||||||
hr = HeaderRecord(pheader.section_data(0))
|
|
||||||
|
|
||||||
if hr.version in (2, 10) and hr.has_metadata == 1:
|
# Only Dropbook produced 132 byte record0 files are supported
|
||||||
try:
|
if len(pheader.section_data(0)) == 132:
|
||||||
mdata = pheader.section_data(hr.metadata_offset)
|
hr = HeaderRecord(pheader.section_data(0))
|
||||||
|
|
||||||
mdata = mdata.split('\x00')
|
if hr.version in (2, 10) and hr.has_metadata == 1:
|
||||||
mi.title = mdata[0]
|
try:
|
||||||
mi.authors = [mdata[1]]
|
mdata = pheader.section_data(hr.metadata_offset)
|
||||||
mi.publisher = mdata[3]
|
|
||||||
mi.isbn = mdata[4]
|
mdata = mdata.split('\x00')
|
||||||
except:
|
mi.title = mdata[0]
|
||||||
pass
|
mi.authors = [mdata[1]]
|
||||||
|
mi.publisher = mdata[3]
|
||||||
|
mi.isbn = mdata[4]
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
if not mi.title:
|
if not mi.title:
|
||||||
mi.title = pheader.title if pheader.title else _('Unknown')
|
mi.title = pheader.title if pheader.title else _('Unknown')
|
||||||
@ -43,6 +48,11 @@ def get_metadata(stream, extract_cover=True):
|
|||||||
|
|
||||||
def set_metadata(stream, mi):
|
def set_metadata(stream, mi):
|
||||||
pheader = PdbHeaderReader(stream)
|
pheader = PdbHeaderReader(stream)
|
||||||
|
|
||||||
|
# Only Dropbook produced 132 byte record0 files are supported
|
||||||
|
if pheader.section_data(0) != 132:
|
||||||
|
return
|
||||||
|
|
||||||
sections = [pheader.section_data(x) for x in range(0, pheader.section_count())]
|
sections = [pheader.section_data(x) for x in range(0, pheader.section_count())]
|
||||||
hr = HeaderRecord(sections[0])
|
hr = HeaderRecord(sections[0])
|
||||||
|
|
||||||
@ -55,9 +65,9 @@ def set_metadata(stream, mi):
|
|||||||
last_data = len(sections) - 1
|
last_data = len(sections) - 1
|
||||||
|
|
||||||
for i in range(0, 132, 2):
|
for i in range(0, 132, 2):
|
||||||
val, = struct.unpack('>H', sections[0][i:i+2])
|
val, = struct.unpack('>H', sections[0][i:i + 2])
|
||||||
if val >= hr.last_data_offset:
|
if val >= hr.last_data_offset:
|
||||||
sections[0][i:i+2] = struct.pack('>H', last_data)
|
sections[0][i:i + 2] = struct.pack('>H', last_data)
|
||||||
|
|
||||||
sections[0][24:26] = struct.pack('>H', 1) # Set has metadata
|
sections[0][24:26] = struct.pack('>H', 1) # Set has metadata
|
||||||
sections[0][44:46] = struct.pack('>H', last_data - 1) # Set location of metadata
|
sections[0][44:46] = struct.pack('>H', last_data - 1) # Set location of metadata
|
||||||
@ -79,4 +89,3 @@ def set_metadata(stream, mi):
|
|||||||
# Write the data back to the file
|
# Write the data back to the file
|
||||||
for item in sections:
|
for item in sections:
|
||||||
stream.write(item)
|
stream.write(item)
|
||||||
|
|
||||||
|
@ -38,7 +38,6 @@ def get_metadata(stream, extract_cover=True):
|
|||||||
if MetadataReader is None:
|
if MetadataReader is None:
|
||||||
return MetaInformation(pheader.title, [_('Unknown')])
|
return MetaInformation(pheader.title, [_('Unknown')])
|
||||||
|
|
||||||
|
|
||||||
return MetadataReader(stream, extract_cover)
|
return MetadataReader(stream, extract_cover)
|
||||||
|
|
||||||
def set_metadata(stream, mi):
|
def set_metadata(stream, mi):
|
||||||
|
@ -1,11 +1,17 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
'''
|
'''
|
||||||
Read data from .mobi files
|
Read data from .mobi files
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import struct, os, cStringIO, re, functools, datetime, textwrap
|
import datetime
|
||||||
|
import functools
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import struct
|
||||||
|
import textwrap
|
||||||
|
|
||||||
|
import cStringIO
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from PIL import Image as PILImage
|
from PIL import Image as PILImage
|
||||||
@ -21,8 +27,8 @@ from calibre.ebooks import DRMError
|
|||||||
from calibre.ebooks.chardet import ENCODING_PATS
|
from calibre.ebooks.chardet import ENCODING_PATS
|
||||||
from calibre.ebooks.mobi import MobiError
|
from calibre.ebooks.mobi import MobiError
|
||||||
from calibre.ebooks.mobi.huffcdic import HuffReader
|
from calibre.ebooks.mobi.huffcdic import HuffReader
|
||||||
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
|
||||||
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||||
|
from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
|
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
@ -40,8 +46,8 @@ class EXTHHeader(object):
|
|||||||
|
|
||||||
while left > 0:
|
while left > 0:
|
||||||
left -= 1
|
left -= 1
|
||||||
id, size = struct.unpack('>LL', raw[pos:pos+8])
|
id, size = struct.unpack('>LL', raw[pos:pos + 8])
|
||||||
content = raw[pos+8:pos+size]
|
content = raw[pos + 8:pos + size]
|
||||||
pos += size
|
pos += size
|
||||||
if id >= 100 and id < 200:
|
if id >= 100 and id < 200:
|
||||||
self.process_metadata(id, content, codec)
|
self.process_metadata(id, content, codec)
|
||||||
@ -87,7 +93,7 @@ class EXTHHeader(object):
|
|||||||
elif id == 106:
|
elif id == 106:
|
||||||
try:
|
try:
|
||||||
self.mi.publish_date = datetime.datetime.strptime(
|
self.mi.publish_date = datetime.datetime.strptime(
|
||||||
content, '%Y-%m-%d',).date()
|
content, '%Y-%m-%d', ).date()
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
elif id == 108:
|
elif id == 108:
|
||||||
@ -123,13 +129,13 @@ class BookHeader(object):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
self.codec = {
|
self.codec = {
|
||||||
1252 : 'cp1252',
|
1252: 'cp1252',
|
||||||
65001 : 'utf-8',
|
65001: 'utf-8',
|
||||||
}[self.codepage]
|
}[self.codepage]
|
||||||
except (IndexError, KeyError):
|
except (IndexError, KeyError):
|
||||||
self.codec = 'cp1252' if user_encoding is None else user_encoding
|
self.codec = 'cp1252' if user_encoding is None else user_encoding
|
||||||
log.warn('Unknown codepage %d. Assuming %s'%(self.codepage,
|
log.warn('Unknown codepage %d. Assuming %s' % (self.codepage,
|
||||||
self.codec))
|
self.codec))
|
||||||
if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length:
|
if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length:
|
||||||
self.extra_flags = 0
|
self.extra_flags = 0
|
||||||
else:
|
else:
|
||||||
@ -147,14 +153,14 @@ class BookHeader(object):
|
|||||||
self.language = main_language.get(langid, 'ENGLISH')
|
self.language = main_language.get(langid, 'ENGLISH')
|
||||||
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
|
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
|
||||||
self.mobi_version = struct.unpack('>I', raw[0x68:0x6c])[0]
|
self.mobi_version = struct.unpack('>I', raw[0x68:0x6c])[0]
|
||||||
self.first_image_index = struct.unpack('>L', raw[0x6c:0x6c+4])[0]
|
self.first_image_index = struct.unpack('>L', raw[0x6c:0x6c + 4])[0]
|
||||||
|
|
||||||
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
|
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
|
||||||
self.exth = None
|
self.exth = None
|
||||||
if not isinstance(self.title, unicode):
|
if not isinstance(self.title, unicode):
|
||||||
self.title = self.title.decode(self.codec, 'replace')
|
self.title = self.title.decode(self.codec, 'replace')
|
||||||
if self.exth_flag & 0x40:
|
if self.exth_flag & 0x40:
|
||||||
self.exth = EXTHHeader(raw[16+self.length:], self.codec, self.title)
|
self.exth = EXTHHeader(raw[16 + self.length:], self.codec, self.title)
|
||||||
self.exth.mi.uid = self.unique_id
|
self.exth.mi.uid = self.unique_id
|
||||||
self.exth.mi.language = self.language
|
self.exth.mi.language = self.language
|
||||||
|
|
||||||
@ -182,7 +188,7 @@ class MetadataHeader(BookHeader):
|
|||||||
return struct.unpack('>H', self.stream.read(2))[0]
|
return struct.unpack('>H', self.stream.read(2))[0]
|
||||||
|
|
||||||
def section_offset(self, number):
|
def section_offset(self, number):
|
||||||
self.stream.seek(78+number*8)
|
self.stream.seek(78 + number * 8)
|
||||||
return struct.unpack('>LBBBB', self.stream.read(8))[0]
|
return struct.unpack('>LBBBB', self.stream.read(8))[0]
|
||||||
|
|
||||||
def header(self):
|
def header(self):
|
||||||
@ -242,15 +248,15 @@ class MobiReader(object):
|
|||||||
self.name = self.header[:32].replace('\x00', '')
|
self.name = self.header[:32].replace('\x00', '')
|
||||||
self.num_sections, = struct.unpack('>H', raw[76:78])
|
self.num_sections, = struct.unpack('>H', raw[76:78])
|
||||||
|
|
||||||
self.ident = self.header[0x3C:0x3C+8].upper()
|
self.ident = self.header[0x3C:0x3C + 8].upper()
|
||||||
if self.ident not in ['BOOKMOBI', 'TEXTREAD']:
|
if self.ident not in ['BOOKMOBI', 'TEXTREAD']:
|
||||||
raise MobiError('Unknown book type: %s'%self.ident)
|
raise MobiError('Unknown book type: %s' % self.ident)
|
||||||
|
|
||||||
self.sections = []
|
self.sections = []
|
||||||
self.section_headers = []
|
self.section_headers = []
|
||||||
for i in range(self.num_sections):
|
for i in range(self.num_sections):
|
||||||
offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', raw[78+i*8:78+i*8+8])
|
offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', raw[78 + i * 8:78 + i * 8 + 8])
|
||||||
flags, val = a1, a2<<16 | a3<<8 | a4
|
flags, val = a1, a2 << 16 | a3 << 8 | a4
|
||||||
self.section_headers.append((offset, flags, val))
|
self.section_headers.append((offset, flags, val))
|
||||||
|
|
||||||
def section(section_number):
|
def section(section_number):
|
||||||
@ -266,7 +272,7 @@ class MobiReader(object):
|
|||||||
|
|
||||||
|
|
||||||
self.book_header = BookHeader(self.sections[0][0], self.ident,
|
self.book_header = BookHeader(self.sections[0][0], self.ident,
|
||||||
user_encoding, self.log)
|
user_encoding, self.log)
|
||||||
self.name = self.name.decode(self.book_header.codec, 'replace')
|
self.name = self.name.decode(self.book_header.codec, 'replace')
|
||||||
|
|
||||||
def extract_content(self, output_dir, parse_cache):
|
def extract_content(self, output_dir, parse_cache):
|
||||||
@ -279,13 +285,13 @@ class MobiReader(object):
|
|||||||
parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
|
parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
|
||||||
self.add_anchors()
|
self.add_anchors()
|
||||||
self.processed_html = self.processed_html.decode(self.book_header.codec,
|
self.processed_html = self.processed_html.decode(self.book_header.codec,
|
||||||
'ignore')
|
'ignore')
|
||||||
for pat in ENCODING_PATS:
|
for pat in ENCODING_PATS:
|
||||||
self.processed_html = pat.sub('', self.processed_html)
|
self.processed_html = pat.sub('', self.processed_html)
|
||||||
e2u = functools.partial(entity_to_unicode,
|
e2u = functools.partial(entity_to_unicode,
|
||||||
exceptions=['lt', 'gt', 'amp', 'apos', 'quot'])
|
exceptions=['lt', 'gt', 'amp', 'apos', 'quot'])
|
||||||
self.processed_html = re.sub(r'&(\S+?);', e2u,
|
self.processed_html = re.sub(r'&(\S+?);', e2u,
|
||||||
self.processed_html)
|
self.processed_html)
|
||||||
self.extract_images(processed_records, output_dir)
|
self.extract_images(processed_records, output_dir)
|
||||||
self.replace_page_breaks()
|
self.replace_page_breaks()
|
||||||
self.cleanup_html()
|
self.cleanup_html()
|
||||||
@ -295,7 +301,7 @@ class MobiReader(object):
|
|||||||
if root.xpath('descendant::p/descendant::p'):
|
if root.xpath('descendant::p/descendant::p'):
|
||||||
from lxml.html import soupparser
|
from lxml.html import soupparser
|
||||||
self.log.warning('Markup contains unclosed <p> tags, parsing using',
|
self.log.warning('Markup contains unclosed <p> tags, parsing using',
|
||||||
'BeatifulSoup')
|
'BeatifulSoup')
|
||||||
root = soupparser.fromstring(self.processed_html)
|
root = soupparser.fromstring(self.processed_html)
|
||||||
if root.tag != 'html':
|
if root.tag != 'html':
|
||||||
self.log.warn('File does not have opening <html> tag')
|
self.log.warn('File does not have opening <html> tag')
|
||||||
@ -346,45 +352,45 @@ class MobiReader(object):
|
|||||||
fname = self.name.encode('ascii', 'replace')
|
fname = self.name.encode('ascii', 'replace')
|
||||||
fname = re.sub(r'[\x08\x15\0]+', '', fname)
|
fname = re.sub(r'[\x08\x15\0]+', '', fname)
|
||||||
htmlfile = os.path.join(output_dir,
|
htmlfile = os.path.join(output_dir,
|
||||||
sanitize_file_name(fname)+'.html')
|
sanitize_file_name(fname) + '.html')
|
||||||
try:
|
try:
|
||||||
for ref in guide.xpath('descendant::reference'):
|
for ref in guide.xpath('descendant::reference'):
|
||||||
if ref.attrib.has_key('href'):
|
if ref.attrib.has_key('href'):
|
||||||
ref.attrib['href'] = os.path.basename(htmlfile)+ref.attrib['href']
|
ref.attrib['href'] = os.path.basename(htmlfile) + ref.attrib['href']
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass
|
pass
|
||||||
parse_cache[htmlfile] = root
|
parse_cache[htmlfile] = root
|
||||||
self.htmlfile = htmlfile
|
self.htmlfile = htmlfile
|
||||||
ncx = cStringIO.StringIO()
|
ncx = cStringIO.StringIO()
|
||||||
opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
|
opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
|
||||||
self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf'
|
self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf'
|
||||||
opf.render(open(self.created_opf_path, 'wb'), ncx,
|
opf.render(open(self.created_opf_path, 'wb'), ncx,
|
||||||
ncx_manifest_entry=ncx_manifest_entry)
|
ncx_manifest_entry=ncx_manifest_entry)
|
||||||
ncx = ncx.getvalue()
|
ncx = ncx.getvalue()
|
||||||
if ncx:
|
if ncx:
|
||||||
ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx')
|
ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx')
|
||||||
open(ncx_path, 'wb').write(ncx)
|
open(ncx_path, 'wb').write(ncx)
|
||||||
|
|
||||||
with open('styles.css', 'wb') as s:
|
with open('styles.css', 'wb') as s:
|
||||||
s.write(self.base_css_rules+'\n\n')
|
s.write(self.base_css_rules + '\n\n')
|
||||||
for cls, rule in self.tag_css_rules.items():
|
for cls, rule in self.tag_css_rules.items():
|
||||||
if isinstance(rule, unicode):
|
if isinstance(rule, unicode):
|
||||||
rule = rule.encode('utf-8')
|
rule = rule.encode('utf-8')
|
||||||
s.write('.%s { %s }\n\n'%(cls, rule))
|
s.write('.%s { %s }\n\n' % (cls, rule))
|
||||||
|
|
||||||
|
|
||||||
if self.book_header.exth is not None or self.embedded_mi is not None:
|
if self.book_header.exth is not None or self.embedded_mi is not None:
|
||||||
self.log.debug('Creating OPF...')
|
self.log.debug('Creating OPF...')
|
||||||
ncx = cStringIO.StringIO()
|
ncx = cStringIO.StringIO()
|
||||||
opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
|
opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
|
||||||
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx,
|
opf.render(open(os.path.splitext(htmlfile)[0] + '.opf', 'wb'), ncx,
|
||||||
ncx_manifest_entry )
|
ncx_manifest_entry)
|
||||||
ncx = ncx.getvalue()
|
ncx = ncx.getvalue()
|
||||||
if ncx:
|
if ncx:
|
||||||
open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
|
open(os.path.splitext(htmlfile)[0] + '.ncx', 'wb').write(ncx)
|
||||||
|
|
||||||
def read_embedded_metadata(self, root, elem, guide):
|
def read_embedded_metadata(self, root, elem, guide):
|
||||||
raw = '<package>'+html.tostring(elem, encoding='utf-8')+'</package>'
|
raw = '<package>' + html.tostring(elem, encoding='utf-8') + '</package>'
|
||||||
stream = cStringIO.StringIO(raw)
|
stream = cStringIO.StringIO(raw)
|
||||||
opf = OPF(stream)
|
opf = OPF(stream)
|
||||||
self.embedded_mi = MetaInformation(opf)
|
self.embedded_mi = MetaInformation(opf)
|
||||||
@ -394,7 +400,7 @@ class MobiReader(object):
|
|||||||
href = ref.get('href', '')
|
href = ref.get('href', '')
|
||||||
if href.startswith('#'):
|
if href.startswith('#'):
|
||||||
href = href[1:]
|
href = href[1:]
|
||||||
anchors = root.xpath('//*[@id="%s"]'%href)
|
anchors = root.xpath('//*[@id="%s"]' % href)
|
||||||
if anchors:
|
if anchors:
|
||||||
cpos = anchors[0]
|
cpos = anchors[0]
|
||||||
reached = False
|
reached = False
|
||||||
@ -412,26 +418,26 @@ class MobiReader(object):
|
|||||||
self.log.debug('Cleaning up HTML...')
|
self.log.debug('Cleaning up HTML...')
|
||||||
self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
|
self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
|
||||||
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
|
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
|
||||||
self.processed_html = '<html><p>'+self.processed_html.replace('\n\n', '<p>')+'</html>'
|
self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>'
|
||||||
self.processed_html = self.processed_html.replace('\r\n', '\n')
|
self.processed_html = self.processed_html.replace('\r\n', '\n')
|
||||||
self.processed_html = self.processed_html.replace('> <', '>\n<')
|
self.processed_html = self.processed_html.replace('> <', '>\n<')
|
||||||
|
|
||||||
def upshift_markup(self, root):
|
def upshift_markup(self, root):
|
||||||
self.log.debug('Converting style information to CSS...')
|
self.log.debug('Converting style information to CSS...')
|
||||||
size_map = {
|
size_map = {
|
||||||
'xx-small' : '0.5',
|
'xx-small': '0.5',
|
||||||
'x-small' : '1',
|
'x-small': '1',
|
||||||
'small' : '2',
|
'small': '2',
|
||||||
'medium' : '3',
|
'medium': '3',
|
||||||
'large' : '4',
|
'large': '4',
|
||||||
'x-large' : '5',
|
'x-large': '5',
|
||||||
'xx-large' : '6',
|
'xx-large': '6',
|
||||||
}
|
}
|
||||||
mobi_version = self.book_header.mobi_version
|
mobi_version = self.book_header.mobi_version
|
||||||
for i, tag in enumerate(root.iter(etree.Element)):
|
for i, tag in enumerate(root.iter(etree.Element)):
|
||||||
tag.attrib.pop('xmlns', '')
|
tag.attrib.pop('xmlns', '')
|
||||||
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
|
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
|
||||||
'state', 'city', 'street', 'address', 'content'):
|
'state', 'city', 'street', 'address', 'content'):
|
||||||
tag.tag = 'div' if tag.tag == 'content' else 'span'
|
tag.tag = 'div' if tag.tag == 'content' else 'span'
|
||||||
for key in tag.attrib.keys():
|
for key in tag.attrib.keys():
|
||||||
tag.attrib.pop(key)
|
tag.attrib.pop(key)
|
||||||
@ -450,7 +456,7 @@ class MobiReader(object):
|
|||||||
if width:
|
if width:
|
||||||
styles.append('text-indent: %s' % width)
|
styles.append('text-indent: %s' % width)
|
||||||
if width.startswith('-'):
|
if width.startswith('-'):
|
||||||
styles.append('margin-left: %s'%(width[1:]))
|
styles.append('margin-left: %s' % (width[1:]))
|
||||||
if attrib.has_key('align'):
|
if attrib.has_key('align'):
|
||||||
align = attrib.pop('align').strip()
|
align = attrib.pop('align').strip()
|
||||||
if align:
|
if align:
|
||||||
@ -502,7 +508,7 @@ class MobiReader(object):
|
|||||||
cls = sel
|
cls = sel
|
||||||
break
|
break
|
||||||
if cls is None:
|
if cls is None:
|
||||||
ncls = 'calibre_%d'%i
|
ncls = 'calibre_%d' % i
|
||||||
self.tag_css_rules[ncls] = rule
|
self.tag_css_rules[ncls] = rule
|
||||||
cls = attrib.get('class', '')
|
cls = attrib.get('class', '')
|
||||||
cls = cls + (' ' if cls else '') + ncls
|
cls = cls + (' ' if cls else '') + ncls
|
||||||
@ -514,17 +520,17 @@ class MobiReader(object):
|
|||||||
mi = MetaInformation(self.book_header.title, [_('Unknown')])
|
mi = MetaInformation(self.book_header.title, [_('Unknown')])
|
||||||
opf = OPFCreator(os.path.dirname(htmlfile), mi)
|
opf = OPFCreator(os.path.dirname(htmlfile), mi)
|
||||||
if hasattr(self.book_header.exth, 'cover_offset'):
|
if hasattr(self.book_header.exth, 'cover_offset'):
|
||||||
opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1)
|
opf.cover = 'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1)
|
||||||
elif mi.cover is not None:
|
elif mi.cover is not None:
|
||||||
opf.cover = mi.cover
|
opf.cover = mi.cover
|
||||||
else:
|
else:
|
||||||
opf.cover = 'images/%05d.jpg'%1
|
opf.cover = 'images/%05d.jpg' % 1
|
||||||
if not os.path.exists(os.path.join(os.path.dirname(htmlfile),
|
if not os.path.exists(os.path.join(os.path.dirname(htmlfile),
|
||||||
*opf.cover.split('/'))):
|
* opf.cover.split('/'))):
|
||||||
opf.cover = None
|
opf.cover = None
|
||||||
|
|
||||||
manifest = [(htmlfile, 'text/x-oeb1-document'),
|
manifest = [(htmlfile, 'text/x-oeb1-document'),
|
||||||
(os.path.abspath('styles.css'), 'text/css')]
|
(os.path.abspath('styles.css'), 'text/css')]
|
||||||
bp = os.path.dirname(htmlfile)
|
bp = os.path.dirname(htmlfile)
|
||||||
for i in getattr(self, 'image_names', []):
|
for i in getattr(self, 'image_names', []):
|
||||||
manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg'))
|
manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg'))
|
||||||
@ -541,7 +547,7 @@ class MobiReader(object):
|
|||||||
ncx_manifest_entry = None
|
ncx_manifest_entry = None
|
||||||
if toc:
|
if toc:
|
||||||
ncx_manifest_entry = 'toc.ncx'
|
ncx_manifest_entry = 'toc.ncx'
|
||||||
elems = root.xpath('//*[@id="%s"]'%toc.partition('#')[-1])
|
elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1])
|
||||||
tocobj = None
|
tocobj = None
|
||||||
ent_pat = re.compile(r'&(\S+?);')
|
ent_pat = re.compile(r'&(\S+?);')
|
||||||
if elems:
|
if elems:
|
||||||
@ -556,12 +562,12 @@ class MobiReader(object):
|
|||||||
if href and re.match('\w+://', href) is None:
|
if href and re.match('\w+://', href) is None:
|
||||||
try:
|
try:
|
||||||
text = u' '.join([t.strip() for t in \
|
text = u' '.join([t.strip() for t in \
|
||||||
x.xpath('descendant::text()')])
|
x.xpath('descendant::text()')])
|
||||||
except:
|
except:
|
||||||
text = ''
|
text = ''
|
||||||
text = ent_pat.sub(entity_to_unicode, text)
|
text = ent_pat.sub(entity_to_unicode, text)
|
||||||
tocobj.add_item(toc.partition('#')[0], href[1:],
|
tocobj.add_item(toc.partition('#')[0], href[1:],
|
||||||
text)
|
text)
|
||||||
if reached and x.get('class', None) == 'mbp_pagebreak':
|
if reached and x.get('class', None) == 'mbp_pagebreak':
|
||||||
break
|
break
|
||||||
if tocobj is not None:
|
if tocobj is not None:
|
||||||
@ -599,17 +605,17 @@ class MobiReader(object):
|
|||||||
|
|
||||||
def extract_text(self):
|
def extract_text(self):
|
||||||
self.log.debug('Extracting text...')
|
self.log.debug('Extracting text...')
|
||||||
text_sections = [self.text_section(i) for i in range(1, self.book_header.records+1)]
|
text_sections = [self.text_section(i) for i in range(1, self.book_header.records + 1)]
|
||||||
processed_records = list(range(0, self.book_header.records+1))
|
processed_records = list(range(0, self.book_header.records + 1))
|
||||||
|
|
||||||
self.mobi_html = ''
|
self.mobi_html = ''
|
||||||
|
|
||||||
if self.book_header.compression_type == 'DH':
|
if self.book_header.compression_type == 'DH':
|
||||||
huffs = [self.sections[i][0] for i in
|
huffs = [self.sections[i][0] for i in
|
||||||
range(self.book_header.huff_offset,
|
range(self.book_header.huff_offset,
|
||||||
self.book_header.huff_offset+self.book_header.huff_number)]
|
self.book_header.huff_offset + self.book_header.huff_number)]
|
||||||
processed_records += list(range(self.book_header.huff_offset,
|
processed_records += list(range(self.book_header.huff_offset,
|
||||||
self.book_header.huff_offset+self.book_header.huff_number))
|
self.book_header.huff_offset + self.book_header.huff_number))
|
||||||
huff = HuffReader(huffs)
|
huff = HuffReader(huffs)
|
||||||
self.mobi_html = huff.decompress(text_sections)
|
self.mobi_html = huff.decompress(text_sections)
|
||||||
|
|
||||||
@ -620,7 +626,7 @@ class MobiReader(object):
|
|||||||
elif self.book_header.compression_type == '\x00\x01':
|
elif self.book_header.compression_type == '\x00\x01':
|
||||||
self.mobi_html = ''.join(text_sections)
|
self.mobi_html = ''.join(text_sections)
|
||||||
else:
|
else:
|
||||||
raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type))
|
raise MobiError('Unknown compression algorithm: %s' % repr(self.book_header.compression_type))
|
||||||
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
|
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
|
||||||
self.mobi_html = self.mobi_html.replace('\r ', '\n\n ')
|
self.mobi_html = self.mobi_html.replace('\r ', '\n\n ')
|
||||||
self.mobi_html = self.mobi_html.replace('\0', '')
|
self.mobi_html = self.mobi_html.replace('\0', '')
|
||||||
@ -636,7 +642,7 @@ class MobiReader(object):
|
|||||||
self.log.debug('Adding anchors...')
|
self.log.debug('Adding anchors...')
|
||||||
positions = set([])
|
positions = set([])
|
||||||
link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''',
|
link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''',
|
||||||
re.IGNORECASE)
|
re.IGNORECASE)
|
||||||
for match in link_pattern.finditer(self.mobi_html):
|
for match in link_pattern.finditer(self.mobi_html):
|
||||||
positions.add(int(match.group(1)))
|
positions.add(int(match.group(1)))
|
||||||
pos = 0
|
pos = 0
|
||||||
@ -652,10 +658,10 @@ class MobiReader(object):
|
|||||||
if r > -1 and (r < l or l == end or l == -1):
|
if r > -1 and (r < l or l == end or l == -1):
|
||||||
p = self.mobi_html.rfind('<', 0, end + 1)
|
p = self.mobi_html.rfind('<', 0, end + 1)
|
||||||
if pos < end and p > -1 and \
|
if pos < end and p > -1 and \
|
||||||
not end_tag_re.match(self.mobi_html[p:r]) and \
|
not end_tag_re.match(self.mobi_html[p:r]) and \
|
||||||
not self.mobi_html[p:r+1].endswith('/>'):
|
not self.mobi_html[p:r + 1].endswith('/>'):
|
||||||
anchor = ' filepos-id="filepos%d"'
|
anchor = ' filepos-id="filepos%d"'
|
||||||
end = r
|
end = r
|
||||||
else:
|
else:
|
||||||
end = r + 1
|
end = r + 1
|
||||||
self.processed_html += self.mobi_html[pos:end] + (anchor % oend)
|
self.processed_html += self.mobi_html[pos:end] + (anchor % oend)
|
||||||
@ -673,7 +679,7 @@ class MobiReader(object):
|
|||||||
start = getattr(self.book_header, 'first_image_index', -1)
|
start = getattr(self.book_header, 'first_image_index', -1)
|
||||||
if start > self.num_sections or start < 0:
|
if start > self.num_sections or start < 0:
|
||||||
# BAEN PRC files have bad headers
|
# BAEN PRC files have bad headers
|
||||||
start=0
|
start = 0
|
||||||
for i in range(start, self.num_sections):
|
for i in range(start, self.num_sections):
|
||||||
if i in processed_records:
|
if i in processed_records:
|
||||||
continue
|
continue
|
||||||
@ -687,7 +693,7 @@ class MobiReader(object):
|
|||||||
except IOError:
|
except IOError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
path = os.path.join(output_dir, '%05d.jpg'%image_index)
|
path = os.path.join(output_dir, '%05d.jpg' % image_index)
|
||||||
self.image_names.append(os.path.basename(path))
|
self.image_names.append(os.path.basename(path))
|
||||||
im.save(open(path, 'wb'), format='JPEG')
|
im.save(open(path, 'wb'), format='JPEG')
|
||||||
|
|
||||||
|
@ -1,27 +1,32 @@
|
|||||||
'''
|
'''
|
||||||
Write content to Mobipocket books.
|
Write content to Mobipocket books.
|
||||||
'''
|
'''
|
||||||
from __future__ import with_statement
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
from itertools import count
|
||||||
|
from itertools import izip
|
||||||
|
import random
|
||||||
|
import re
|
||||||
from struct import pack
|
from struct import pack
|
||||||
import time
|
import time
|
||||||
import random
|
|
||||||
from cStringIO import StringIO
|
|
||||||
import re
|
|
||||||
from itertools import izip, count
|
|
||||||
from collections import defaultdict
|
|
||||||
from urlparse import urldefrag
|
from urlparse import urldefrag
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
|
from cStringIO import StringIO
|
||||||
OEB_RASTER_IMAGES
|
|
||||||
from calibre.ebooks.oeb.base import namespace, prefixname
|
|
||||||
from calibre.ebooks.oeb.base import urlnormalize
|
|
||||||
from calibre.ebooks.mobi.palmdoc import compress_doc
|
|
||||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
from calibre.ebooks.mobi.mobiml import MBP_NS
|
from calibre.ebooks.mobi.mobiml import MBP_NS
|
||||||
|
from calibre.ebooks.oeb.base import OEB_DOCS
|
||||||
|
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
||||||
|
from calibre.ebooks.oeb.base import XHTML
|
||||||
|
from calibre.ebooks.oeb.base import XHTML_NS
|
||||||
|
from calibre.ebooks.oeb.base import XML_NS
|
||||||
|
from calibre.ebooks.oeb.base import namespace
|
||||||
|
from calibre.ebooks.oeb.base import prefixname
|
||||||
|
from calibre.ebooks.oeb.base import urlnormalize
|
||||||
|
from calibre.ebooks.compression.palmdoc import compress_doc
|
||||||
|
|
||||||
# TODO:
|
# TODO:
|
||||||
# - Allow override CSS (?)
|
# - Allow override CSS (?)
|
||||||
@ -174,7 +179,7 @@ class Serializer(object):
|
|||||||
item = hrefs[path] if path else None
|
item = hrefs[path] if path else None
|
||||||
if item and item.spine_position is None:
|
if item and item.spine_position is None:
|
||||||
return False
|
return False
|
||||||
path = item.href if item else base.href
|
path = item.href if item else base.href
|
||||||
href = '#'.join((path, frag)) if frag else path
|
href = '#'.join((path, frag)) if frag else path
|
||||||
buffer.write('filepos=')
|
buffer.write('filepos=')
|
||||||
self.href_offsets[href].append(buffer.tell())
|
self.href_offsets[href].append(buffer.tell())
|
||||||
@ -211,8 +216,8 @@ class Serializer(object):
|
|||||||
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
|
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
|
||||||
buffer = self.buffer
|
buffer = self.buffer
|
||||||
if not isinstance(elem.tag, basestring) \
|
if not isinstance(elem.tag, basestring) \
|
||||||
or namespace(elem.tag) not in nsrmap:
|
or namespace(elem.tag) not in nsrmap:
|
||||||
return
|
return
|
||||||
tag = prefixname(elem.tag, nsrmap)
|
tag = prefixname(elem.tag, nsrmap)
|
||||||
# Previous layers take care of @name
|
# Previous layers take care of @name
|
||||||
id = elem.attrib.pop('id', None)
|
id = elem.attrib.pop('id', None)
|
||||||
@ -221,9 +226,9 @@ class Serializer(object):
|
|||||||
offset = self.anchor_offset or buffer.tell()
|
offset = self.anchor_offset or buffer.tell()
|
||||||
self.id_offsets[href] = offset
|
self.id_offsets[href] = offset
|
||||||
if self.anchor_offset is not None and \
|
if self.anchor_offset is not None and \
|
||||||
tag == 'a' and not elem.attrib and \
|
tag == 'a' and not elem.attrib and \
|
||||||
not len(elem) and not elem.text:
|
not len(elem) and not elem.text:
|
||||||
return
|
return
|
||||||
self.anchor_offset = buffer.tell()
|
self.anchor_offset = buffer.tell()
|
||||||
buffer.write('<')
|
buffer.write('<')
|
||||||
buffer.write(tag)
|
buffer.write(tag)
|
||||||
@ -286,7 +291,7 @@ class MobiWriter(object):
|
|||||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||||
|
|
||||||
def __init__(self, compression=PALMDOC, imagemax=None,
|
def __init__(self, compression=PALMDOC, imagemax=None,
|
||||||
prefer_author_sort=False):
|
prefer_author_sort=False):
|
||||||
self._compression = compression or UNCOMPRESSED
|
self._compression = compression or UNCOMPRESSED
|
||||||
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
|
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
|
||||||
self._prefer_author_sort = prefer_author_sort
|
self._prefer_author_sort = prefer_author_sort
|
||||||
@ -297,7 +302,7 @@ class MobiWriter(object):
|
|||||||
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
|
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
|
||||||
prefer_author_sort = opts.prefer_author_sort
|
prefer_author_sort = opts.prefer_author_sort
|
||||||
return cls(compression=PALMDOC, imagemax=imagemax,
|
return cls(compression=PALMDOC, imagemax=imagemax,
|
||||||
prefer_author_sort=prefer_author_sort)
|
prefer_author_sort=prefer_author_sort)
|
||||||
|
|
||||||
def __call__(self, oeb, path):
|
def __call__(self, oeb, path):
|
||||||
if hasattr(path, 'write'):
|
if hasattr(path, 'write'):
|
||||||
@ -305,7 +310,7 @@ class MobiWriter(object):
|
|||||||
with open(path, 'w+b') as stream:
|
with open(path, 'w+b') as stream:
|
||||||
return self._dump_stream(oeb, stream)
|
return self._dump_stream(oeb, stream)
|
||||||
|
|
||||||
def _write(self, *data):
|
def _write(self, * data):
|
||||||
for datum in data:
|
for datum in data:
|
||||||
self._stream.write(datum)
|
self._stream.write(datum)
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import with_statement
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
@ -10,14 +9,14 @@ class PDBError(Exception):
|
|||||||
|
|
||||||
|
|
||||||
from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
|
from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
|
||||||
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
|
|
||||||
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
|
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
|
||||||
|
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
|
||||||
|
|
||||||
FORMAT_READERS = {
|
FORMAT_READERS = {
|
||||||
'PNPdPPrs' : ereader_reader,
|
'PNPdPPrs': ereader_reader,
|
||||||
'PNRdPPrs' : ereader_reader,
|
'PNRdPPrs': ereader_reader,
|
||||||
'zTXTGPlm' : ztxt_reader,
|
'zTXTGPlm': ztxt_reader,
|
||||||
'TEXtREAd' : palmdoc_reader,
|
'TEXtREAd': palmdoc_reader,
|
||||||
}
|
}
|
||||||
|
|
||||||
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
|
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
|
||||||
@ -25,41 +24,41 @@ from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer
|
|||||||
from calibre.ebooks.pdb.ereader.writer import Writer as ereader_writer
|
from calibre.ebooks.pdb.ereader.writer import Writer as ereader_writer
|
||||||
|
|
||||||
FORMAT_WRITERS = {
|
FORMAT_WRITERS = {
|
||||||
'doc' : palmdoc_writer,
|
'doc': palmdoc_writer,
|
||||||
'ztxt' : ztxt_writer,
|
'ztxt': ztxt_writer,
|
||||||
'ereader' : ereader_writer,
|
'ereader': ereader_writer,
|
||||||
}
|
}
|
||||||
|
|
||||||
IDENTITY_TO_NAME = {
|
IDENTITY_TO_NAME = {
|
||||||
'PNPdPPrs' : 'eReader',
|
'PNPdPPrs': 'eReader',
|
||||||
'PNRdPPrs' : 'eReader',
|
'PNRdPPrs': 'eReader',
|
||||||
'zTXTGPlm' : 'zTXT',
|
'zTXTGPlm': 'zTXT',
|
||||||
'TEXtREAd' : 'PalmDOC',
|
'TEXtREAd': 'PalmDOC',
|
||||||
|
|
||||||
'.pdfADBE' : 'Adobe Reader',
|
'.pdfADBE': 'Adobe Reader',
|
||||||
'BVokBDIC' : 'BDicty',
|
'BVokBDIC': 'BDicty',
|
||||||
'DB99DBOS' : 'DB (Database program)',
|
'DB99DBOS': 'DB (Database program)',
|
||||||
'vIMGView' : 'FireViewer (ImageViewer)',
|
'vIMGView': 'FireViewer (ImageViewer)',
|
||||||
'PmDBPmDB' : 'HanDBase',
|
'PmDBPmDB': 'HanDBase',
|
||||||
'InfoINDB' : 'InfoView',
|
'InfoINDB': 'InfoView',
|
||||||
'ToGoToGo' : 'iSilo',
|
'ToGoToGo': 'iSilo',
|
||||||
'SDocSilX' : 'iSilo 3',
|
'SDocSilX': 'iSilo 3',
|
||||||
'JbDbJBas' : 'JFile',
|
'JbDbJBas': 'JFile',
|
||||||
'JfDbJFil' : 'JFile Pro',
|
'JfDbJFil': 'JFile Pro',
|
||||||
'DATALSdb' : 'LIST',
|
'DATALSdb': 'LIST',
|
||||||
'Mdb1Mdb1' : 'MobileDB',
|
'Mdb1Mdb1': 'MobileDB',
|
||||||
'BOOKMOBI' : 'MobiPocket',
|
'BOOKMOBI': 'MobiPocket',
|
||||||
'DataPlkr' : 'Plucker',
|
'DataPlkr': 'Plucker',
|
||||||
'DataSprd' : 'QuickSheet',
|
'DataSprd': 'QuickSheet',
|
||||||
'SM01SMem' : 'SuperMemo',
|
'SM01SMem': 'SuperMemo',
|
||||||
'TEXtTlDc' : 'TealDoc',
|
'TEXtTlDc': 'TealDoc',
|
||||||
'InfoTlIf' : 'TealInfo',
|
'InfoTlIf': 'TealInfo',
|
||||||
'DataTlMl' : 'TealMeal',
|
'DataTlMl': 'TealMeal',
|
||||||
'DataTlPt' : 'TealPaint',
|
'DataTlPt': 'TealPaint',
|
||||||
'dataTDBP' : 'ThinkDB',
|
'dataTDBP': 'ThinkDB',
|
||||||
'TdatTide' : 'Tides',
|
'TdatTide': 'Tides',
|
||||||
'ToRaTRPW' : 'TomeRaider',
|
'ToRaTRPW': 'TomeRaider',
|
||||||
'BDOCWrdS' : 'WordSmith',
|
'BDOCWrdS': 'WordSmith',
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_reader(identity):
|
def get_reader(identity):
|
||||||
|
@ -7,10 +7,27 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import struct, sys
|
import struct
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from calibre.ebooks.pdb.ereader import EreaderError
|
||||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||||
from calibre.ebooks.pdb.ereader.reader import HeaderRecord
|
|
||||||
|
def ereader_header_info(header):
|
||||||
|
h0 = header.section_data(0)
|
||||||
|
|
||||||
|
print 'Header Size: %s' % len(h0)
|
||||||
|
|
||||||
|
if len(h0) == 132:
|
||||||
|
print 'Header Type: Dropbook compatible'
|
||||||
|
print ''
|
||||||
|
ereader_header_info132(h0)
|
||||||
|
elif len(h0) == 202:
|
||||||
|
print 'Header Type: Makebook compatible'
|
||||||
|
print ''
|
||||||
|
ereader_header_info202(h0)
|
||||||
|
else:
|
||||||
|
raise EreaderError('Size mismatch. eReader header record size %i KB is not supported.' % len(h0))
|
||||||
|
|
||||||
def pdb_header_info(header):
|
def pdb_header_info(header):
|
||||||
print 'PDB Header Info:'
|
print 'PDB Header Info:'
|
||||||
@ -20,44 +37,75 @@ def pdb_header_info(header):
|
|||||||
print 'Title: %s' % header.title
|
print 'Title: %s' % header.title
|
||||||
print ''
|
print ''
|
||||||
|
|
||||||
def ereader_header_info(header):
|
def ereader_header_info132(h0):
|
||||||
h0 = header.section_data(0)
|
|
||||||
|
|
||||||
print 'Ereader Record 0 (Header) Info:'
|
print 'Ereader Record 0 (Header) Info:'
|
||||||
print ''
|
print ''
|
||||||
print '0-2 Version: %i' % struct.unpack('>H', h0[0:2])[0]
|
print '0-2 Version: %i' % struct.unpack('>H', h0[0:2])[0]
|
||||||
print '2-4: %i' % struct.unpack('>H', h0[2:4])[0]
|
print '2-4: %i' % struct.unpack('>H', h0[2:4])[0]
|
||||||
print '4-6: %i' % struct.unpack('>H', h0[4:6])[0]
|
print '4-6: %i' % struct.unpack('>H', h0[4:6])[0]
|
||||||
print '6-8: %i' % struct.unpack('>H', h0[6:8])[0]
|
print '6-8 Codepage: %i' % struct.unpack('>H', h0[6:8])[0]
|
||||||
print '8-10: %i' % struct.unpack('>H', h0[8:10])[0]
|
print '8-10: %i' % struct.unpack('>H', h0[8:10])[0]
|
||||||
print '10-12: %i' % struct.unpack('>H', h0[10:12])[0]
|
print '10-12: %i' % struct.unpack('>H', h0[10:12])[0]
|
||||||
print '12-14 Non-Text: %i' % struct.unpack('>H', h0[12:14])[0]
|
print '12-14 Non-Text offset: %i' % struct.unpack('>H', h0[12:14])[0]
|
||||||
print '14-16: %i' % struct.unpack('>H', h0[14:16])[0]
|
print '14-16: %i' % struct.unpack('>H', h0[14:16])[0]
|
||||||
print '16-18: %i' % struct.unpack('>H', h0[16:18])[0]
|
print '16-18: %i' % struct.unpack('>H', h0[16:18])[0]
|
||||||
print '18-20: %i' % struct.unpack('>H', h0[18:20])[0]
|
print '18-20: %i' % struct.unpack('>H', h0[18:20])[0]
|
||||||
print '20-22: %i' % struct.unpack('>H', h0[20:22])[0]
|
print '20-22 Image Count: %i' % struct.unpack('>H', h0[20:22])[0]
|
||||||
print '22-24: %i' % struct.unpack('>H', h0[22:24])[0]
|
print '22-24: %i' % struct.unpack('>H', h0[22:24])[0]
|
||||||
print '24-26: %i' % struct.unpack('>H', h0[24:26])[0]
|
print '24-26 Has Metadata?: %i' % struct.unpack('>H', h0[24:26])[0]
|
||||||
print '26-28: %i' % struct.unpack('>H', h0[26:28])[0]
|
print '26-28: %i' % struct.unpack('>H', h0[26:28])[0]
|
||||||
print '28-30 footnote_rec: %i' % struct.unpack('>H', h0[28:30])[0]
|
print '28-30 Footnote Count: %i' % struct.unpack('>H', h0[28:30])[0]
|
||||||
print '30-32 sidebar_rec: %i' % struct.unpack('>H', h0[30:32])[0]
|
print '30-32 Sidebar Count: %i' % struct.unpack('>H', h0[30:32])[0]
|
||||||
print '32-34 bookmark_offset: %i' % struct.unpack('>H', h0[32:34])[0]
|
print '32-34 Bookmark Offset: %i' % struct.unpack('>H', h0[32:34])[0]
|
||||||
print '34-36: %i' % struct.unpack('>H', h0[34:36])[0]
|
print '34-36 MAGIC: %i' % struct.unpack('>H', h0[34:36])[0]
|
||||||
print '36-38: %i' % struct.unpack('>H', h0[36:38])[0]
|
print '36-38: %i' % struct.unpack('>H', h0[36:38])[0]
|
||||||
print '38-40: %i' % struct.unpack('>H', h0[38:40])[0]
|
print '38-40: %i' % struct.unpack('>H', h0[38:40])[0]
|
||||||
print '40-42 image_data_offset: %i' % struct.unpack('>H', h0[40:42])[0]
|
print '40-42 Image Data Offset: %i' % struct.unpack('>H', h0[40:42])[0]
|
||||||
print '42-44: %i' % struct.unpack('>H', h0[42:44])[0]
|
print '42-44: %i' % struct.unpack('>H', h0[42:44])[0]
|
||||||
print '44-46 metadata_offset: %i' % struct.unpack('>H', h0[44:46])[0]
|
print '44-46 Metadata Offset: %i' % struct.unpack('>H', h0[44:46])[0]
|
||||||
print '46-48: %i' % struct.unpack('>H', h0[46:48])[0]
|
print '46-48: %i' % struct.unpack('>H', h0[46:48])[0]
|
||||||
print '48-50 footnote_offset: %i' % struct.unpack('>H', h0[48:50])[0]
|
print '48-50 Footnote Offset: %i' % struct.unpack('>H', h0[48:50])[0]
|
||||||
print '50-52 sidebar_offset: %i' % struct.unpack('>H', h0[50:52])[0]
|
print '50-52 Sidebar Offset: %i' % struct.unpack('>H', h0[50:52])[0]
|
||||||
print '52-54 last_data_offset: %i' % struct.unpack('>H', h0[52:54])[0]
|
print '52-54 Last Data Offset: %i' % struct.unpack('>H', h0[52:54])[0]
|
||||||
|
|
||||||
for i in range(54, 131, 2):
|
for i in range(54, 131, 2):
|
||||||
print '%i-%i: %i' % (i, i+2, struct.unpack('>H', h0[i:i+2])[0])
|
print '%i-%i: %i' % (i, i+2, struct.unpack('>H', h0[i:i+2])[0])
|
||||||
|
|
||||||
print ''
|
print ''
|
||||||
|
|
||||||
|
def ereader_header_info202(h0):
|
||||||
|
print 'Ereader Record 0 (Header) Info:'
|
||||||
|
print ''
|
||||||
|
print '0-2 Version: %i' % struct.unpack('>H', h0[0:2])[0]
|
||||||
|
print '2-4 Garbage: %i' % struct.unpack('>H', h0[2:4])[0]
|
||||||
|
print '4-6 Garbage: %i' % struct.unpack('>H', h0[4:6])[0]
|
||||||
|
print '6-8 Garbage: %i' % struct.unpack('>H', h0[6:8])[0]
|
||||||
|
print '8-10 Non-Text Offset: %i' % struct.unpack('>H', h0[8:10])[0]
|
||||||
|
print '10-12: %i' % struct.unpack('>H', h0[10:12])[0]
|
||||||
|
print '12-14: %i' % struct.unpack('>H', h0[12:14])[0]
|
||||||
|
print '14-16 Garbage: %i' % struct.unpack('>H', h0[14:16])[0]
|
||||||
|
print '16-18 Garbage: %i' % struct.unpack('>H', h0[16:18])[0]
|
||||||
|
print '18-20 Garbage: %i' % struct.unpack('>H', h0[18:20])[0]
|
||||||
|
print '20-22 Garbage: %i' % struct.unpack('>H', h0[20:22])[0]
|
||||||
|
print '22-24 Garbage: %i' % struct.unpack('>H', h0[22:24])[0]
|
||||||
|
print '24-26: %i' % struct.unpack('>H', h0[24:26])[0]
|
||||||
|
print '26-28: %i' % struct.unpack('>H', h0[26:28])[0]
|
||||||
|
for i in range(28, 98, 2):
|
||||||
|
print '%i-%i Garbage: %i' % (i, i+2, struct.unpack('>H', h0[i:i+2])[0])
|
||||||
|
print '98-100: %i' % struct.unpack('>H', h0[98:100])[0]
|
||||||
|
for i in range(100, 110, 2):
|
||||||
|
print '%i-%i Garbage: %i' % (i, i+2, struct.unpack('>H', h0[i:i+2])[0])
|
||||||
|
print '110-112: %i' % struct.unpack('>H', h0[110:112])[0]
|
||||||
|
print '112-114: %i' % struct.unpack('>H', h0[112:114])[0]
|
||||||
|
print '114-116 Garbage: %i' % struct.unpack('>H', h0[114:116])[0]
|
||||||
|
for i in range(116, 202, 2):
|
||||||
|
print '%i-%i: %i' % (i, i+2, struct.unpack('>H', h0[i:i+2])[0])
|
||||||
|
|
||||||
|
print ''
|
||||||
|
print '* Garbage: Random values.'
|
||||||
|
print ''
|
||||||
|
|
||||||
|
|
||||||
def section_lengths(header):
|
def section_lengths(header):
|
||||||
print 'Section Sizes'
|
print 'Section Sizes'
|
||||||
print ''
|
print ''
|
||||||
|
@ -8,183 +8,28 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, re, struct, zlib
|
|
||||||
|
|
||||||
from calibre import CurrentDir
|
|
||||||
from calibre.ebooks import DRMError
|
|
||||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
|
||||||
from calibre.ebooks.pdb.ereader import EreaderError
|
from calibre.ebooks.pdb.ereader import EreaderError
|
||||||
from calibre.ebooks.pml.pmlconverter import pml_to_html, \
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
footnote_sidebar_to_html
|
from calibre.ebooks.pdb.ereader.reader132 import Reader132
|
||||||
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
from calibre.ebooks.pdb.ereader.reader202 import Reader202
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
|
||||||
|
|
||||||
class HeaderRecord(object):
|
|
||||||
'''
|
|
||||||
The first record in the file is always the header record. It holds
|
|
||||||
information related to the location of text, images, and so on
|
|
||||||
in the file. This is used in conjunction with the sections
|
|
||||||
defined in the file header.
|
|
||||||
'''
|
|
||||||
|
|
||||||
def __init__(self, raw):
|
|
||||||
self.version, = struct.unpack('>H', raw[0:2])
|
|
||||||
self.non_text_offset, = struct.unpack('>H', raw[12:14])
|
|
||||||
self.has_metadata, = struct.unpack('>H', raw[24:26])
|
|
||||||
self.footnote_rec, = struct.unpack('>H', raw[28:30])
|
|
||||||
self.sidebar_rec, = struct.unpack('>H', raw[30:32])
|
|
||||||
self.bookmark_offset, = struct.unpack('>H', raw[32:34])
|
|
||||||
self.image_data_offset, = struct.unpack('>H', raw[40:42])
|
|
||||||
self.metadata_offset, = struct.unpack('>H', raw[44:46])
|
|
||||||
self.footnote_offset, = struct.unpack('>H', raw[48:50])
|
|
||||||
self.sidebar_offset, = struct.unpack('>H', raw[50:52])
|
|
||||||
self.last_data_offset, = struct.unpack('>H', raw[52:54])
|
|
||||||
|
|
||||||
self.num_text_pages = self.non_text_offset - 1
|
|
||||||
self.num_image_pages = self.metadata_offset - self.image_data_offset
|
|
||||||
|
|
||||||
|
|
||||||
class Reader(FormatReader):
|
class Reader(FormatReader):
|
||||||
|
|
||||||
def __init__(self, header, stream, log, encoding=None):
|
def __init__(self, header, stream, log, encoding=None):
|
||||||
self.log = log
|
record0_size = len(header.section_data(0))
|
||||||
self.encoding = encoding
|
|
||||||
|
|
||||||
self.sections = []
|
if record0_size == 132:
|
||||||
for i in range(header.num_sections):
|
self.reader = Reader132(header, stream, log, encoding)
|
||||||
self.sections.append(header.section_data(i))
|
elif record0_size == 202:
|
||||||
|
self.reader = Reader202(header, stream, log, encoding)
|
||||||
self.header_record = HeaderRecord(self.section_data(0))
|
else:
|
||||||
|
raise EreaderError('Size mismatch. eReader header record size %s KB is not supported.' % record0_size)
|
||||||
if self.header_record.version not in (2, 10):
|
|
||||||
if self.header_record.version in (260, 272):
|
|
||||||
raise DRMError('eReader DRM is not supported.')
|
|
||||||
else:
|
|
||||||
raise EreaderError('Unknown book version %i.' % self.header_record.version)
|
|
||||||
|
|
||||||
from calibre.ebooks.metadata.pdb import get_metadata
|
|
||||||
self.mi = get_metadata(stream, False)
|
|
||||||
|
|
||||||
def section_data(self, number):
|
|
||||||
return self.sections[number]
|
|
||||||
|
|
||||||
def decompress_text(self, number):
|
|
||||||
if self.header_record.version == 2:
|
|
||||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
|
||||||
if self.header_record.version == 10:
|
|
||||||
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
|
||||||
|
|
||||||
|
|
||||||
def get_image(self, number):
|
|
||||||
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
|
|
||||||
return 'empty', ''
|
|
||||||
data = self.section_data(number)
|
|
||||||
name = data[4:4+32].strip('\x00')
|
|
||||||
img = data[62:]
|
|
||||||
return name, img
|
|
||||||
|
|
||||||
def get_text_page(self, number):
|
|
||||||
'''
|
|
||||||
Only palmdoc and zlib compressed are supported. The text is
|
|
||||||
assumed to be encoded as Windows-1252. The encoding is part of
|
|
||||||
the eReader file spec and should always be this encoding.
|
|
||||||
'''
|
|
||||||
if number not in range(1, self.header_record.num_text_pages + 1):
|
|
||||||
return ''
|
|
||||||
|
|
||||||
return self.decompress_text(number)
|
|
||||||
|
|
||||||
def extract_content(self, output_dir):
|
def extract_content(self, output_dir):
|
||||||
output_dir = os.path.abspath(output_dir)
|
return self.reader.extract_content(output_dir)
|
||||||
|
|
||||||
if not os.path.exists(output_dir):
|
|
||||||
os.makedirs(output_dir)
|
|
||||||
|
|
||||||
html = u'<html><head><title></title></head><body>'
|
|
||||||
|
|
||||||
for i in range(1, self.header_record.num_text_pages + 1):
|
|
||||||
self.log.debug('Extracting text page %i' % i)
|
|
||||||
html += pml_to_html(self.get_text_page(i))
|
|
||||||
|
|
||||||
if self.header_record.footnote_rec > 0:
|
|
||||||
html += '<br /><h1>%s</h1>' % _('Footnotes')
|
|
||||||
footnoteids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding))
|
|
||||||
for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_rec)):
|
|
||||||
self.log.debug('Extracting footnote page %i' % i)
|
|
||||||
html += '<dl>'
|
|
||||||
html += footnote_sidebar_to_html(footnoteids[fid], self.decompress_text(i))
|
|
||||||
html += '</dl>'
|
|
||||||
|
|
||||||
if self.header_record.sidebar_rec > 0:
|
|
||||||
html += '<br /><h1>%s</h1>' % _('Sidebar')
|
|
||||||
sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding))
|
|
||||||
for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_rec)):
|
|
||||||
self.log.debug('Extracting sidebar page %i' % i)
|
|
||||||
html += '<dl>'
|
|
||||||
html += footnote_sidebar_to_html(sidebarids[sid], self.decompress_text(i))
|
|
||||||
html += '</dl>'
|
|
||||||
|
|
||||||
html += '</body></html>'
|
|
||||||
|
|
||||||
with CurrentDir(output_dir):
|
|
||||||
with open('index.html', 'wb') as index:
|
|
||||||
self.log.debug('Writing text to index.html')
|
|
||||||
index.write(html.encode('utf-8'))
|
|
||||||
|
|
||||||
if not os.path.exists(os.path.join(output_dir, 'images/')):
|
|
||||||
os.makedirs(os.path.join(output_dir, 'images/'))
|
|
||||||
images = []
|
|
||||||
with CurrentDir(os.path.join(output_dir, 'images/')):
|
|
||||||
for i in range(0, self.header_record.num_image_pages):
|
|
||||||
name, img = self.get_image(self.header_record.image_data_offset + i)
|
|
||||||
images.append(name)
|
|
||||||
with open(name, 'wb') as imgf:
|
|
||||||
self.log.debug('Writing image %s to images/' % name)
|
|
||||||
imgf.write(img)
|
|
||||||
|
|
||||||
opf_path = self.create_opf(output_dir, images)
|
|
||||||
|
|
||||||
return opf_path
|
|
||||||
|
|
||||||
def create_opf(self, output_dir, images):
|
|
||||||
with CurrentDir(output_dir):
|
|
||||||
opf = OPFCreator(output_dir, self.mi)
|
|
||||||
|
|
||||||
manifest = [('index.html', None)]
|
|
||||||
|
|
||||||
for i in images:
|
|
||||||
manifest.append((os.path.join('images/', i), None))
|
|
||||||
|
|
||||||
opf.create_manifest(manifest)
|
|
||||||
opf.create_spine(['index.html'])
|
|
||||||
with open('metadata.opf', 'wb') as opffile:
|
|
||||||
opf.render(opffile)
|
|
||||||
|
|
||||||
return os.path.join(output_dir, 'metadata.opf')
|
|
||||||
|
|
||||||
def dump_pml(self):
|
def dump_pml(self):
|
||||||
'''
|
return self.reader.dump_pml()
|
||||||
This is primarily used for debugging and 3rd party tools to
|
|
||||||
get the plm markup that comprises the text in the file.
|
|
||||||
'''
|
|
||||||
pml = ''
|
|
||||||
|
|
||||||
for i in range(1, self.header_record.num_text_pages + 1):
|
|
||||||
pml += self.get_text_page(i)
|
|
||||||
|
|
||||||
return pml
|
|
||||||
|
|
||||||
def dump_images(self, output_dir):
|
|
||||||
'''
|
|
||||||
This is primarily used for debugging and 3rd party tools to
|
|
||||||
get the images in the file.
|
|
||||||
'''
|
|
||||||
if not os.path.exists(output_dir):
|
|
||||||
os.makedirs(output_dir)
|
|
||||||
|
|
||||||
with CurrentDir(output_dir):
|
|
||||||
for i in range(0, self.header_record.num_image_pages):
|
|
||||||
name, img = self.get_image(self.header_record.image_data_offset + i)
|
|
||||||
with open(name, 'wb') as imgf:
|
|
||||||
imgf.write(img)
|
|
||||||
|
|
||||||
|
def dump_images(self):
|
||||||
|
return self.reader.dump_images()
|
||||||
|
192
src/calibre/ebooks/pdb/ereader/reader132.py
Normal file
192
src/calibre/ebooks/pdb/ereader/reader132.py
Normal file
@ -0,0 +1,192 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
'''
|
||||||
|
Read content from ereader pdb file with a 132 byte header created by Dropbook.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import struct
|
||||||
|
import zlib
|
||||||
|
|
||||||
|
from calibre import CurrentDir
|
||||||
|
from calibre.ebooks import DRMError
|
||||||
|
from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
from calibre.ebooks.pdb.ereader import EreaderError
|
||||||
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
|
from calibre.ebooks.pml.pmlconverter import footnote_sidebar_to_html
|
||||||
|
from calibre.ebooks.pml.pmlconverter import pml_to_html
|
||||||
|
|
||||||
|
class HeaderRecord(object):
|
||||||
|
'''
|
||||||
|
The first record in the file is always the header record. It holds
|
||||||
|
information related to the location of text, images, and so on
|
||||||
|
in the file. This is used in conjunction with the sections
|
||||||
|
defined in the file header.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
self.version, = struct.unpack('>H', raw[0:2])
|
||||||
|
self.non_text_offset, = struct.unpack('>H', raw[12:14])
|
||||||
|
self.has_metadata, = struct.unpack('>H', raw[24:26])
|
||||||
|
self.footnote_rec, = struct.unpack('>H', raw[28:30])
|
||||||
|
self.sidebar_rec, = struct.unpack('>H', raw[30:32])
|
||||||
|
self.bookmark_offset, = struct.unpack('>H', raw[32:34])
|
||||||
|
self.image_data_offset, = struct.unpack('>H', raw[40:42])
|
||||||
|
self.metadata_offset, = struct.unpack('>H', raw[44:46])
|
||||||
|
self.footnote_offset, = struct.unpack('>H', raw[48:50])
|
||||||
|
self.sidebar_offset, = struct.unpack('>H', raw[50:52])
|
||||||
|
self.last_data_offset, = struct.unpack('>H', raw[52:54])
|
||||||
|
|
||||||
|
self.num_text_pages = self.non_text_offset - 1
|
||||||
|
self.num_image_pages = self.metadata_offset - self.image_data_offset
|
||||||
|
|
||||||
|
|
||||||
|
class Reader132(FormatReader):
|
||||||
|
|
||||||
|
def __init__(self, header, stream, log, encoding=None):
|
||||||
|
self.log = log
|
||||||
|
self.encoding = encoding
|
||||||
|
|
||||||
|
self.sections = []
|
||||||
|
for i in range(header.num_sections):
|
||||||
|
self.sections.append(header.section_data(i))
|
||||||
|
|
||||||
|
self.header_record = HeaderRecord(self.section_data(0))
|
||||||
|
|
||||||
|
if self.header_record.version not in (2, 10):
|
||||||
|
if self.header_record.version in (260, 272):
|
||||||
|
raise DRMError('eReader DRM is not supported.')
|
||||||
|
else:
|
||||||
|
raise EreaderError('Unknown book version %i.' % self.header_record.version)
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata.pdb import get_metadata
|
||||||
|
self.mi = get_metadata(stream, False)
|
||||||
|
|
||||||
|
def section_data(self, number):
|
||||||
|
return self.sections[number]
|
||||||
|
|
||||||
|
def decompress_text(self, number):
|
||||||
|
if self.header_record.version == 2:
|
||||||
|
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
|
if self.header_record.version == 10:
|
||||||
|
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
|
|
||||||
|
def get_image(self, number):
|
||||||
|
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
|
||||||
|
return 'empty', ''
|
||||||
|
data = self.section_data(number)
|
||||||
|
name = data[4:4 + 32].strip('\x00')
|
||||||
|
img = data[62:]
|
||||||
|
return name, img
|
||||||
|
|
||||||
|
def get_text_page(self, number):
|
||||||
|
'''
|
||||||
|
Only palmdoc and zlib compressed are supported. The text is
|
||||||
|
assumed to be encoded as Windows-1252. The encoding is part of
|
||||||
|
the eReader file spec and should always be this encoding.
|
||||||
|
'''
|
||||||
|
if number not in range(1, self.header_record.num_text_pages + 1):
|
||||||
|
return ''
|
||||||
|
|
||||||
|
return self.decompress_text(number)
|
||||||
|
|
||||||
|
def extract_content(self, output_dir):
|
||||||
|
output_dir = os.path.abspath(output_dir)
|
||||||
|
|
||||||
|
if not os.path.exists(output_dir):
|
||||||
|
os.makedirs(output_dir)
|
||||||
|
|
||||||
|
html = u'<html><head><title></title></head><body>'
|
||||||
|
|
||||||
|
for i in range(1, self.header_record.num_text_pages + 1):
|
||||||
|
self.log.debug('Extracting text page %i' % i)
|
||||||
|
html += pml_to_html(self.get_text_page(i))
|
||||||
|
|
||||||
|
if self.header_record.footnote_rec > 0:
|
||||||
|
html += '<br /><h1>%s</h1>' % _('Footnotes')
|
||||||
|
footnoteids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding))
|
||||||
|
for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_rec)):
|
||||||
|
self.log.debug('Extracting footnote page %i' % i)
|
||||||
|
html += '<dl>'
|
||||||
|
html += footnote_sidebar_to_html(footnoteids[fid], self.decompress_text(i))
|
||||||
|
html += '</dl>'
|
||||||
|
|
||||||
|
if self.header_record.sidebar_rec > 0:
|
||||||
|
html += '<br /><h1>%s</h1>' % _('Sidebar')
|
||||||
|
sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding))
|
||||||
|
for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_rec)):
|
||||||
|
self.log.debug('Extracting sidebar page %i' % i)
|
||||||
|
html += '<dl>'
|
||||||
|
html += footnote_sidebar_to_html(sidebarids[sid], self.decompress_text(i))
|
||||||
|
html += '</dl>'
|
||||||
|
|
||||||
|
html += '</body></html>'
|
||||||
|
|
||||||
|
with CurrentDir(output_dir):
|
||||||
|
with open('index.html', 'wb') as index:
|
||||||
|
self.log.debug('Writing text to index.html')
|
||||||
|
index.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
if not os.path.exists(os.path.join(output_dir, 'images/')):
|
||||||
|
os.makedirs(os.path.join(output_dir, 'images/'))
|
||||||
|
images = []
|
||||||
|
with CurrentDir(os.path.join(output_dir, 'images/')):
|
||||||
|
for i in range(0, self.header_record.num_image_pages):
|
||||||
|
name, img = self.get_image(self.header_record.image_data_offset + i)
|
||||||
|
images.append(name)
|
||||||
|
with open(name, 'wb') as imgf:
|
||||||
|
self.log.debug('Writing image %s to images/' % name)
|
||||||
|
imgf.write(img)
|
||||||
|
|
||||||
|
opf_path = self.create_opf(output_dir, images)
|
||||||
|
|
||||||
|
return opf_path
|
||||||
|
|
||||||
|
def create_opf(self, output_dir, images):
|
||||||
|
with CurrentDir(output_dir):
|
||||||
|
opf = OPFCreator(output_dir, self.mi)
|
||||||
|
|
||||||
|
manifest = [('index.html', None)]
|
||||||
|
|
||||||
|
for i in images:
|
||||||
|
manifest.append((os.path.join('images/', i), None))
|
||||||
|
|
||||||
|
opf.create_manifest(manifest)
|
||||||
|
opf.create_spine(['index.html'])
|
||||||
|
with open('metadata.opf', 'wb') as opffile:
|
||||||
|
opf.render(opffile)
|
||||||
|
|
||||||
|
return os.path.join(output_dir, 'metadata.opf')
|
||||||
|
|
||||||
|
def dump_pml(self):
|
||||||
|
'''
|
||||||
|
This is primarily used for debugging and 3rd party tools to
|
||||||
|
get the plm markup that comprises the text in the file.
|
||||||
|
'''
|
||||||
|
pml = ''
|
||||||
|
|
||||||
|
for i in range(1, self.header_record.num_text_pages + 1):
|
||||||
|
pml += self.get_text_page(i)
|
||||||
|
|
||||||
|
return pml
|
||||||
|
|
||||||
|
def dump_images(self, output_dir):
|
||||||
|
'''
|
||||||
|
This is primarily used for debugging and 3rd party tools to
|
||||||
|
get the images in the file.
|
||||||
|
'''
|
||||||
|
if not os.path.exists(output_dir):
|
||||||
|
os.makedirs(output_dir)
|
||||||
|
|
||||||
|
with CurrentDir(output_dir):
|
||||||
|
for i in range(0, self.header_record.num_image_pages):
|
||||||
|
name, img = self.get_image(self.header_record.image_data_offset + i)
|
||||||
|
with open(name, 'wb') as imgf:
|
||||||
|
imgf.write(img)
|
||||||
|
|
157
src/calibre/ebooks/pdb/ereader/reader202.py
Normal file
157
src/calibre/ebooks/pdb/ereader/reader202.py
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
'''
|
||||||
|
Read content from ereader pdb file with a 202 byte header created by Makebook.
|
||||||
|
'''
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
import struct
|
||||||
|
|
||||||
|
from calibre import CurrentDir
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
from calibre.ebooks.pml.pmlconverter import pml_to_html
|
||||||
|
from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||||
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
|
from calibre.ebooks.pdb.ereader import EreaderError
|
||||||
|
|
||||||
|
class HeaderRecord(object):
|
||||||
|
'''
|
||||||
|
The first record in the file is always the header record. It holds
|
||||||
|
information related to the location of text, images, and so on
|
||||||
|
in the file. This is used in conjunction with the sections
|
||||||
|
defined in the file header.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
self.version, = struct.unpack('>H', raw[0:2])
|
||||||
|
self.non_text_offset, = struct.unpack('>H', raw[8:10])
|
||||||
|
|
||||||
|
self.num_text_pages = self.non_text_offset - 1
|
||||||
|
|
||||||
|
|
||||||
|
class Reader202(FormatReader):
|
||||||
|
|
||||||
|
def __init__(self, header, stream, log, encoding=None):
|
||||||
|
self.log = log
|
||||||
|
self.encoding = encoding
|
||||||
|
|
||||||
|
self.sections = []
|
||||||
|
for i in range(header.num_sections):
|
||||||
|
self.sections.append(header.section_data(i))
|
||||||
|
|
||||||
|
self.header_record = HeaderRecord(self.section_data(0))
|
||||||
|
|
||||||
|
if self.header_record.version != 4:
|
||||||
|
raise EreaderError('Unknown book version %i.' % self.header_record.version)
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata.pdb import get_metadata
|
||||||
|
self.mi = get_metadata(stream, False)
|
||||||
|
|
||||||
|
def section_data(self, number):
|
||||||
|
return self.sections[number]
|
||||||
|
|
||||||
|
def decompress_text(self, number):
|
||||||
|
return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
|
|
||||||
|
def get_image(self, number):
|
||||||
|
name = None
|
||||||
|
img = None
|
||||||
|
|
||||||
|
data = self.section_data(number)
|
||||||
|
if data.startswith('PNG'):
|
||||||
|
name = data[4:4 + 32].strip('\x00')
|
||||||
|
img = data[62:]
|
||||||
|
|
||||||
|
return name, img
|
||||||
|
|
||||||
|
def get_text_page(self, number):
|
||||||
|
'''
|
||||||
|
Only palmdoc compression is supported. The text is xored with 0xA5 and
|
||||||
|
assumed to be encoded as Windows-1252. The encoding is part of
|
||||||
|
the eReader file spec and should always be this encoding.
|
||||||
|
'''
|
||||||
|
if number not in range(1, self.header_record.num_text_pages + 1):
|
||||||
|
return ''
|
||||||
|
|
||||||
|
return self.decompress_text(number)
|
||||||
|
|
||||||
|
def extract_content(self, output_dir):
|
||||||
|
output_dir = os.path.abspath(output_dir)
|
||||||
|
|
||||||
|
if not os.path.exists(output_dir):
|
||||||
|
os.makedirs(output_dir)
|
||||||
|
|
||||||
|
html = u'<html><head><title></title></head><body>'
|
||||||
|
|
||||||
|
for i in range(1, self.header_record.num_text_pages + 1):
|
||||||
|
self.log.debug('Extracting text page %i' % i)
|
||||||
|
html += pml_to_html(self.get_text_page(i))
|
||||||
|
|
||||||
|
|
||||||
|
html += '</body></html>'
|
||||||
|
|
||||||
|
with CurrentDir(output_dir):
|
||||||
|
with open('index.html', 'wb') as index:
|
||||||
|
self.log.debug('Writing text to index.html')
|
||||||
|
index.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
if not os.path.exists(os.path.join(output_dir, 'images/')):
|
||||||
|
os.makedirs(os.path.join(output_dir, 'images/'))
|
||||||
|
images = []
|
||||||
|
with CurrentDir(os.path.join(output_dir, 'images/')):
|
||||||
|
for i in range(self.header_record.non_text_offset, len(self.sections)):
|
||||||
|
name, img = self.get_image(i)
|
||||||
|
if name:
|
||||||
|
images.append(name)
|
||||||
|
with open(name, 'wb') as imgf:
|
||||||
|
self.log.debug('Writing image %s to images/' % name)
|
||||||
|
imgf.write(img)
|
||||||
|
|
||||||
|
opf_path = self.create_opf(output_dir, images)
|
||||||
|
|
||||||
|
return opf_path
|
||||||
|
|
||||||
|
def create_opf(self, output_dir, images):
|
||||||
|
with CurrentDir(output_dir):
|
||||||
|
opf = OPFCreator(output_dir, self.mi)
|
||||||
|
|
||||||
|
manifest = [('index.html', None)]
|
||||||
|
|
||||||
|
for i in images:
|
||||||
|
manifest.append((os.path.join('images/', i), None))
|
||||||
|
|
||||||
|
opf.create_manifest(manifest)
|
||||||
|
opf.create_spine(['index.html'])
|
||||||
|
with open('metadata.opf', 'wb') as opffile:
|
||||||
|
opf.render(opffile)
|
||||||
|
|
||||||
|
return os.path.join(output_dir, 'metadata.opf')
|
||||||
|
|
||||||
|
def dump_pml(self):
|
||||||
|
'''
|
||||||
|
This is primarily used for debugging and 3rd party tools to
|
||||||
|
get the plm markup that comprises the text in the file.
|
||||||
|
'''
|
||||||
|
pml = ''
|
||||||
|
|
||||||
|
for i in range(1, self.header_record.num_text_pages + 1):
|
||||||
|
pml += self.get_text_page(i)
|
||||||
|
|
||||||
|
return pml
|
||||||
|
|
||||||
|
def dump_images(self, output_dir):
|
||||||
|
'''
|
||||||
|
This is primarily used for debugging and 3rd party tools to
|
||||||
|
get the images in the file.
|
||||||
|
'''
|
||||||
|
if not os.path.exists(output_dir):
|
||||||
|
os.makedirs(output_dir)
|
||||||
|
|
||||||
|
with CurrentDir(output_dir):
|
||||||
|
for i in range(0, self.header_record.num_image_pages):
|
||||||
|
name, img = self.get_image(self.header_record.image_data_offset + i)
|
||||||
|
with open(name, 'wb') as imgf:
|
||||||
|
imgf.write(img)
|
@ -8,9 +8,11 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import struct, zlib
|
import struct
|
||||||
|
import zlib
|
||||||
|
|
||||||
import Image, cStringIO
|
import Image
|
||||||
|
import cStringIO
|
||||||
|
|
||||||
from calibre.ebooks.pdb.formatwriter import FormatWriter
|
from calibre.ebooks.pdb.formatwriter import FormatWriter
|
||||||
from calibre.ebooks.oeb.base import OEB_IMAGES
|
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||||
@ -97,7 +99,7 @@ class Writer(FormatWriter):
|
|||||||
publisher = ''
|
publisher = ''
|
||||||
isbn = ''
|
isbn = ''
|
||||||
|
|
||||||
if metadata != None:
|
if metadata:
|
||||||
if len(metadata.title) >= 1:
|
if len(metadata.title) >= 1:
|
||||||
title = metadata.title[0].value
|
title = metadata.title[0].value
|
||||||
if len(metadata.creator) >= 1:
|
if len(metadata.creator) >= 1:
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import with_statement
|
|
||||||
'''
|
'''
|
||||||
Read the header data from a pdb file.
|
Read the header data from a pdb file.
|
||||||
'''
|
'''
|
||||||
@ -8,7 +7,9 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re, struct, time
|
import re
|
||||||
|
import struct
|
||||||
|
import time
|
||||||
|
|
||||||
class PdbHeaderReader(object):
|
class PdbHeaderReader(object):
|
||||||
|
|
||||||
@ -35,16 +36,16 @@ class PdbHeaderReader(object):
|
|||||||
if number not in range(0, self.num_sections):
|
if number not in range(0, self.num_sections):
|
||||||
raise ValueError('Not a valid section number %i' % number)
|
raise ValueError('Not a valid section number %i' % number)
|
||||||
|
|
||||||
self.stream.seek(78+number*8)
|
self.stream.seek(78 + number * 8)
|
||||||
offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', self.stream.read(8))[0]
|
offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', self.stream.read(8))[0]
|
||||||
flags, val = a1, a2<<16 | a3<<8 | a4
|
flags, val = a1, a2 << 16 | a3 << 8 | a4
|
||||||
return (offset, flags, val)
|
return (offset, flags, val)
|
||||||
|
|
||||||
def section_offset(self, number):
|
def section_offset(self, number):
|
||||||
if number not in range(0, self.num_sections):
|
if number not in range(0, self.num_sections):
|
||||||
raise ValueError('Not a valid section number %i' % number)
|
raise ValueError('Not a valid section number %i' % number)
|
||||||
|
|
||||||
self.stream.seek(78+number*8)
|
self.stream.seek(78 + number * 8)
|
||||||
return struct.unpack('>LBBBB', self.stream.read(8))[0]
|
return struct.unpack('>LBBBB', self.stream.read(8))[0]
|
||||||
|
|
||||||
def section_data(self, number):
|
def section_data(self, number):
|
||||||
|
@ -8,11 +8,13 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, struct, zlib
|
import os
|
||||||
|
import struct
|
||||||
|
|
||||||
|
from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
from calibre.ebooks.txt.processor import opf_writer
|
||||||
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
|
from calibre.ebooks.txt.processor import txt_to_markdown
|
||||||
|
|
||||||
class HeaderRecord(object):
|
class HeaderRecord(object):
|
||||||
'''
|
'''
|
||||||
|
@ -10,10 +10,11 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
|
from calibre.ebooks.compression.palmdoc import compress_doc
|
||||||
from calibre.ebooks.pdb.formatwriter import FormatWriter
|
from calibre.ebooks.pdb.formatwriter import FormatWriter
|
||||||
from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines
|
|
||||||
from calibre.ebooks.mobi.palmdoc import compress_doc
|
|
||||||
from calibre.ebooks.pdb.header import PdbHeaderBuilder
|
from calibre.ebooks.pdb.header import PdbHeaderBuilder
|
||||||
|
from calibre.ebooks.txt.writer import TxtNewlines
|
||||||
|
from calibre.ebooks.txt.writer import TxtWriter
|
||||||
|
|
||||||
MAX_RECORD_SIZE = 4096
|
MAX_RECORD_SIZE = 4096
|
||||||
|
|
||||||
@ -40,7 +41,7 @@ class Writer(FormatWriter):
|
|||||||
hb = PdbHeaderBuilder('TEXtREAd', title)
|
hb = PdbHeaderBuilder('TEXtREAd', title)
|
||||||
hb.build_header(section_lengths, out_stream)
|
hb.build_header(section_lengths, out_stream)
|
||||||
|
|
||||||
for record in [header_record]+txt_records:
|
for record in [header_record] + txt_records:
|
||||||
out_stream.write(record)
|
out_stream.write(record)
|
||||||
|
|
||||||
def _generate_text(self, spine):
|
def _generate_text(self, spine):
|
||||||
@ -51,7 +52,7 @@ class Writer(FormatWriter):
|
|||||||
|
|
||||||
txt_records = []
|
txt_records = []
|
||||||
for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1):
|
for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1):
|
||||||
txt_records.append(txt[i * MAX_RECORD_SIZE : (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])
|
txt_records.append(txt[i * MAX_RECORD_SIZE: (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])
|
||||||
|
|
||||||
return txt_records, txt_length
|
return txt_records, txt_length
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import with_statement
|
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
@ -23,11 +22,13 @@ class PDFInput(InputFormatPlugin):
|
|||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
html = pdftohtml(stream.name)
|
html = pdftohtml(stream.name)
|
||||||
|
|
||||||
if self._preprocess_html_for_viewer:
|
if self._preprocess_html_for_viewer:
|
||||||
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
||||||
prepro = HTMLPreProcessor(lambda x:x, False)
|
prepro = HTMLPreProcessor(lambda x:x, False)
|
||||||
html = prepro(html.decode('utf-8')).encode('utf-8')
|
html = prepro(html.decode('utf-8')).encode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
with open('index.html', 'wb') as index:
|
with open('index.html', 'wb') as index:
|
||||||
index.write(html)
|
index.write(html)
|
||||||
|
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import with_statement
|
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>, ' \
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>, ' \
|
||||||
'2009, John Schember <john@nachtimwald.com>'
|
'2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import errno, os, sys, subprocess
|
import errno
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from calibre.ebooks import ConversionError, DRMError
|
from calibre.ebooks import ConversionError, DRMError
|
||||||
|
@ -8,7 +8,8 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Transform OEB content into PML markup
|
Transform OEB content into PML markup
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os, re
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
@ -40,6 +41,31 @@ STYLES = [
|
|||||||
('text-align', {'right' : 'r', 'center' : 'c'}),
|
('text-align', {'right' : 'r', 'center' : 'c'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
BLOCK_TAGS = [
|
||||||
|
'p',
|
||||||
|
]
|
||||||
|
|
||||||
|
BLOCK_STYLES = [
|
||||||
|
'block',
|
||||||
|
]
|
||||||
|
|
||||||
|
LINK_TAGS = [
|
||||||
|
'a',
|
||||||
|
]
|
||||||
|
|
||||||
|
SEPARATE_TAGS = [
|
||||||
|
'h1',
|
||||||
|
'h2',
|
||||||
|
'h3',
|
||||||
|
'h4',
|
||||||
|
'h5',
|
||||||
|
'h6',
|
||||||
|
'p',
|
||||||
|
'div',
|
||||||
|
'li',
|
||||||
|
'tr',
|
||||||
|
]
|
||||||
|
|
||||||
class PMLMLizer(object):
|
class PMLMLizer(object):
|
||||||
def __init__(self, ignore_tables=False):
|
def __init__(self, ignore_tables=False):
|
||||||
self.ignore_tables = ignore_tables
|
self.ignore_tables = ignore_tables
|
||||||
@ -62,7 +88,7 @@ class PMLMLizer(object):
|
|||||||
|
|
||||||
def add_page_anchor(self, href):
|
def add_page_anchor(self, href):
|
||||||
href = os.path.splitext(os.path.basename(href))[0]
|
href = os.path.splitext(os.path.basename(href))[0]
|
||||||
return '\\Q="%s"' % href
|
return u'\\Q="%s"' % href
|
||||||
|
|
||||||
def clean_text(self, text):
|
def clean_text(self, text):
|
||||||
# Remove excess spaces at beginning and end of lines
|
# Remove excess spaces at beginning and end of lines
|
||||||
@ -84,7 +110,8 @@ class PMLMLizer(object):
|
|||||||
text = text.replace('\\Q="%s"' % unused, '')
|
text = text.replace('\\Q="%s"' % unused, '')
|
||||||
|
|
||||||
for entity in set(re.findall('&.+?;', text)):
|
for entity in set(re.findall('&.+?;', text)):
|
||||||
text = text.replace(entity, entity_to_unicode(entity[1:-1]))
|
mo = re.search('(%s)' % entity[1:-1], text)
|
||||||
|
text = text.replace(entity, entity_to_unicode(mo))
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
@ -104,7 +131,7 @@ class PMLMLizer(object):
|
|||||||
tag_count = 0
|
tag_count = 0
|
||||||
|
|
||||||
# Are we in a paragraph block?
|
# Are we in a paragraph block?
|
||||||
if tag == 'p' or style['display'] in ('block'):
|
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
|
||||||
if 'block' not in tag_stack:
|
if 'block' not in tag_stack:
|
||||||
tag_count += 1
|
tag_count += 1
|
||||||
tag_stack.append('block')
|
tag_stack.append('block')
|
||||||
@ -136,7 +163,7 @@ class PMLMLizer(object):
|
|||||||
|
|
||||||
# Special processing of tags that require an argument.
|
# Special processing of tags that require an argument.
|
||||||
# Anchors links
|
# Anchors links
|
||||||
if tag == 'a' and 'q' not in tag_stack:
|
if tag in LINK_TAGS and 'q' not in tag_stack:
|
||||||
href = elem.get('href')
|
href = elem.get('href')
|
||||||
if href and '://' not in href:
|
if href and '://' not in href:
|
||||||
if '#' in href:
|
if '#' in href:
|
||||||
@ -168,7 +195,7 @@ class PMLMLizer(object):
|
|||||||
for i in range(0, tag_count):
|
for i in range(0, tag_count):
|
||||||
close_tag_list.insert(0, tag_stack.pop())
|
close_tag_list.insert(0, tag_stack.pop())
|
||||||
text += self.close_tags(close_tag_list)
|
text += self.close_tags(close_tag_list)
|
||||||
if tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'li', 'tr'):
|
if tag in SEPARATE_TAGS:
|
||||||
text += os.linesep + os.linesep
|
text += os.linesep + os.linesep
|
||||||
|
|
||||||
if 'block' not in tag_stack:
|
if 'block' not in tag_stack:
|
||||||
|
26
src/calibre/ebooks/rb/__init__.py
Normal file
26
src/calibre/ebooks/rb/__init__.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
HEADER = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
|
||||||
|
|
||||||
|
class RocketBookError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def unique_name(name, used_names):
|
||||||
|
name = os.path.basename(name)
|
||||||
|
if len(name) < 32 and name not in used_names:
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
ext = os.path.splitext(name)[1][:3]
|
||||||
|
base_name = name[:22]
|
||||||
|
for i in range(0, 9999):
|
||||||
|
name = '%s-%s.%s' % (str(i).rjust('0', 4)[:4], base_name, ext)
|
||||||
|
if name not in used_names:
|
||||||
|
break
|
||||||
|
return name
|
24
src/calibre/ebooks/rb/input.py
Normal file
24
src/calibre/ebooks/rb/input.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from calibre.ebooks.rb.reader import Reader
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
|
||||||
|
class RBInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'RB Input'
|
||||||
|
author = 'John Schember'
|
||||||
|
description = 'Convert RB files to HTML'
|
||||||
|
file_types = set(['rb'])
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
accelerators):
|
||||||
|
reader = Reader(stream, log, options.input_encoding)
|
||||||
|
opf = reader.extract_content(os.getcwd())
|
||||||
|
|
||||||
|
return opf
|
36
src/calibre/ebooks/rb/output.py
Normal file
36
src/calibre/ebooks/rb/output.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from calibre.customize.conversion import OutputFormatPlugin
|
||||||
|
from calibre.ebooks.rb.writer import RBWriter
|
||||||
|
|
||||||
|
class RBOutput(OutputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'RB Output'
|
||||||
|
author = 'John Schember'
|
||||||
|
file_type = 'rb'
|
||||||
|
|
||||||
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
|
close = False
|
||||||
|
if not hasattr(output_path, 'write'):
|
||||||
|
close = True
|
||||||
|
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
|
||||||
|
os.makedirs(os.path.dirname(output_path))
|
||||||
|
out_stream = open(output_path, 'wb')
|
||||||
|
else:
|
||||||
|
out_stream = output_path
|
||||||
|
|
||||||
|
writer = RBWriter(opts, log)
|
||||||
|
|
||||||
|
out_stream.seek(0)
|
||||||
|
out_stream.truncate()
|
||||||
|
|
||||||
|
writer.write_content(oeb_book, out_stream, oeb_book.metadata)
|
||||||
|
|
||||||
|
if close:
|
||||||
|
out_stream.close()
|
166
src/calibre/ebooks/rb/rbml.py
Normal file
166
src/calibre/ebooks/rb/rbml.py
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Transform OEB content into RB compatible markup.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||||
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
|
|
||||||
|
TAGS = [
|
||||||
|
'b',
|
||||||
|
'big',
|
||||||
|
'blockquote',
|
||||||
|
'br',
|
||||||
|
'center',
|
||||||
|
'code',
|
||||||
|
'div',
|
||||||
|
'h1',
|
||||||
|
'h2',
|
||||||
|
'h3',
|
||||||
|
'h4',
|
||||||
|
'h5',
|
||||||
|
'h6',
|
||||||
|
'hr',
|
||||||
|
'i',
|
||||||
|
'li',
|
||||||
|
'ol',
|
||||||
|
'p',
|
||||||
|
'pre',
|
||||||
|
'small',
|
||||||
|
'sub',
|
||||||
|
'sup',
|
||||||
|
'ul',
|
||||||
|
]
|
||||||
|
|
||||||
|
LINK_TAGS = [
|
||||||
|
'a',
|
||||||
|
]
|
||||||
|
|
||||||
|
STYLES = [
|
||||||
|
('font-weight', {'bold' : 'b', 'bolder' : 'b'}),
|
||||||
|
('font-style', {'italic' : 'i'}),
|
||||||
|
('text-align', {'center' : 'center'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
class RBMLizer(object):
|
||||||
|
|
||||||
|
def __init__(self, name_map={}, ignore_tables=False):
|
||||||
|
self.name_map = name_map
|
||||||
|
self.ignore_tables = ignore_tables
|
||||||
|
|
||||||
|
def extract_content(self, oeb_book, opts):
|
||||||
|
oeb_book.logger.info('Converting XHTML to RB markup...')
|
||||||
|
self.oeb_book = oeb_book
|
||||||
|
self.opts = opts
|
||||||
|
return self.mlize_spine()
|
||||||
|
|
||||||
|
|
||||||
|
def mlize_spine(self):
|
||||||
|
output = u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>'
|
||||||
|
for item in self.oeb_book.spine:
|
||||||
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||||
|
output += self.add_page_anchor(item.href)
|
||||||
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||||
|
output += u'</BODY></HTML>'
|
||||||
|
output = self.clean_text(output)
|
||||||
|
return output
|
||||||
|
|
||||||
|
def add_page_anchor(self, href):
|
||||||
|
href = os.path.splitext(os.path.basename(href))[0]
|
||||||
|
return u'<A NAME="%s"></A>' % href
|
||||||
|
|
||||||
|
def clean_text(self, text):
|
||||||
|
# Remove anchors that do not have links
|
||||||
|
anchors = set(re.findall(r'(?<=<A NAME=").+?(?="></A>)', text))
|
||||||
|
links = set(re.findall(r'(?<=<A HREF="#).+?(?=">)', text))
|
||||||
|
for unused in anchors.difference(links):
|
||||||
|
text = text.replace('<A NAME="%s"></A>' % unused, '')
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
def dump_text(self, elem, stylizer, tag_stack=[]):
|
||||||
|
if not isinstance(elem.tag, basestring) \
|
||||||
|
or namespace(elem.tag) != XHTML_NS:
|
||||||
|
return u''
|
||||||
|
|
||||||
|
text = u''
|
||||||
|
style = stylizer.style(elem)
|
||||||
|
|
||||||
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||||
|
or style['visibility'] == 'hidden':
|
||||||
|
return u''
|
||||||
|
|
||||||
|
tag = barename(elem.tag)
|
||||||
|
tag_count = 0
|
||||||
|
|
||||||
|
# Process tags that need special processing and that do not have inner
|
||||||
|
# text. Usually these require an argument
|
||||||
|
if tag == 'img':
|
||||||
|
src = os.path.basename(elem.get('src'))
|
||||||
|
name = self.name_map.get(src, src)
|
||||||
|
text += '<IMG SRC="%s">' % name
|
||||||
|
|
||||||
|
rb_tag = tag.upper() if tag in TAGS else None
|
||||||
|
if rb_tag:
|
||||||
|
tag_count += 1
|
||||||
|
text += '<%s>' % rb_tag
|
||||||
|
tag_stack.append(rb_tag)
|
||||||
|
|
||||||
|
if tag in LINK_TAGS:
|
||||||
|
href = elem.get('href')
|
||||||
|
if href:
|
||||||
|
if '://' not in href:
|
||||||
|
if '#' in href:
|
||||||
|
href = href.partition('#')[2]
|
||||||
|
href = os.path.splitext(os.path.basename(href))[0]
|
||||||
|
tag_count += 1
|
||||||
|
text += '<A HREF="#%s">' % href
|
||||||
|
tag_stack.append('A')
|
||||||
|
|
||||||
|
# Anchor ids
|
||||||
|
id_name = elem.get('id')
|
||||||
|
if id_name:
|
||||||
|
text += '<A NAME="%s"></A>' % os.path.splitext(id_name)[0]
|
||||||
|
|
||||||
|
# Processes style information
|
||||||
|
for s in STYLES:
|
||||||
|
style_tag = s[1].get(style[s[0]], None)
|
||||||
|
if style_tag:
|
||||||
|
style_tag = style_tag.upper()
|
||||||
|
tag_count += 1
|
||||||
|
text += '<%s>' % style_tag
|
||||||
|
tag_stack.append(style_tag)
|
||||||
|
|
||||||
|
# Proccess tags that contain text.
|
||||||
|
if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
|
||||||
|
text += elem.text
|
||||||
|
|
||||||
|
for item in elem:
|
||||||
|
text += self.dump_text(item, stylizer, tag_stack)
|
||||||
|
|
||||||
|
close_tag_list = []
|
||||||
|
for i in range(0, tag_count):
|
||||||
|
close_tag_list.insert(0, tag_stack.pop())
|
||||||
|
|
||||||
|
text += self.close_tags(close_tag_list)
|
||||||
|
|
||||||
|
if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
|
||||||
|
text += elem.tail
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
def close_tags(self, tags):
|
||||||
|
text = u''
|
||||||
|
for i in range(0, len(tags)):
|
||||||
|
tag = tags.pop()
|
||||||
|
text += '</%s>' % tag
|
||||||
|
|
||||||
|
return text
|
133
src/calibre/ebooks/rb/reader.py
Normal file
133
src/calibre/ebooks/rb/reader.py
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
import struct
|
||||||
|
import zlib
|
||||||
|
from urllib import unquote as urlunquote
|
||||||
|
|
||||||
|
from calibre import CurrentDir
|
||||||
|
from calibre.ebooks.rb import HEADER
|
||||||
|
from calibre.ebooks.rb import RocketBookError
|
||||||
|
from calibre.ebooks.metadata.rb import get_metadata
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
|
||||||
|
class RBToc(list):
|
||||||
|
|
||||||
|
class Item(object):
|
||||||
|
|
||||||
|
def __init__(self, name='', size=0, offset=0, flags=0):
|
||||||
|
self.name = name
|
||||||
|
self.size = size
|
||||||
|
self.offset = offset
|
||||||
|
self.flags = flags
|
||||||
|
|
||||||
|
|
||||||
|
class Reader(object):
|
||||||
|
|
||||||
|
def __init__(self, stream, log, encoding=None):
|
||||||
|
self.stream = stream
|
||||||
|
self.log = log
|
||||||
|
self.encoding = encoding
|
||||||
|
|
||||||
|
self.verify_file()
|
||||||
|
|
||||||
|
self.mi = get_metadata(self.stream)
|
||||||
|
self.toc = self.get_toc()
|
||||||
|
|
||||||
|
def read_i32(self):
|
||||||
|
return struct.unpack('<I', self.stream.read(4))[0]
|
||||||
|
|
||||||
|
def verify_file(self):
|
||||||
|
self.stream.seek(0)
|
||||||
|
if self.stream.read(14) != HEADER:
|
||||||
|
raise RocketBookError('Could not read file: %s. Does not contain a valid RocketBook Header.' % self.stream.name)
|
||||||
|
|
||||||
|
self.stream.seek(28)
|
||||||
|
size = self.read_i32()
|
||||||
|
self.stream.seek(0, os.SEEK_END)
|
||||||
|
real_size = self.stream.tell()
|
||||||
|
if size != real_size:
|
||||||
|
raise RocketBookError('File is corrupt. The file size recorded in the header does not match the actual file size.')
|
||||||
|
|
||||||
|
def get_toc(self):
|
||||||
|
self.stream.seek(24)
|
||||||
|
toc_offset = self.read_i32()
|
||||||
|
|
||||||
|
self.stream.seek(toc_offset)
|
||||||
|
pages = self.read_i32()
|
||||||
|
|
||||||
|
toc = RBToc()
|
||||||
|
for i in range(pages):
|
||||||
|
name = urlunquote(self.stream.read(32).strip('\x00'))
|
||||||
|
size, offset, flags = self.read_i32(), self.read_i32(), self.read_i32()
|
||||||
|
toc.append(RBToc.Item(name=name, size=size, offset=offset, flags=flags))
|
||||||
|
|
||||||
|
return toc
|
||||||
|
|
||||||
|
def get_text(self, toc_item, output_dir):
|
||||||
|
if toc_item.flags in (1, 2):
|
||||||
|
return
|
||||||
|
|
||||||
|
output = u''
|
||||||
|
self.stream.seek(toc_item.offset)
|
||||||
|
|
||||||
|
if toc_item.flags == 8:
|
||||||
|
count = self.read_i32()
|
||||||
|
self.read_i32() # Uncompressed size.
|
||||||
|
chunck_sizes = []
|
||||||
|
for i in range(count):
|
||||||
|
chunck_sizes.append(self.read_i32())
|
||||||
|
|
||||||
|
for size in chunck_sizes:
|
||||||
|
cm_chunck = self.stream.read(size)
|
||||||
|
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
|
else:
|
||||||
|
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
|
|
||||||
|
with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
|
||||||
|
html.write(output.encode('utf-8'))
|
||||||
|
|
||||||
|
def get_image(self, toc_item, output_dir):
|
||||||
|
if toc_item.flags != 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.stream.seek(toc_item.offset)
|
||||||
|
data = self.stream.read(toc_item.size)
|
||||||
|
|
||||||
|
with open(os.path.join(output_dir, toc_item.name), 'wb') as img:
|
||||||
|
img.write(data)
|
||||||
|
|
||||||
|
def extract_content(self, output_dir):
|
||||||
|
html = []
|
||||||
|
images = []
|
||||||
|
|
||||||
|
for item in self.toc:
|
||||||
|
if item.name.lower().endswith('html'):
|
||||||
|
html.append(item.name)
|
||||||
|
self.get_text(item, output_dir)
|
||||||
|
if item.name.lower().endswith('png'):
|
||||||
|
images.append(item.name)
|
||||||
|
self.get_image(item, output_dir)
|
||||||
|
|
||||||
|
opf_path = self.create_opf(output_dir, html, images)
|
||||||
|
|
||||||
|
return opf_path
|
||||||
|
|
||||||
|
def create_opf(self, output_dir, pages, images):
|
||||||
|
with CurrentDir(output_dir):
|
||||||
|
opf = OPFCreator(output_dir, self.mi)
|
||||||
|
|
||||||
|
manifest = []
|
||||||
|
for page in pages+images:
|
||||||
|
manifest.append((page, None))
|
||||||
|
|
||||||
|
opf.create_manifest(manifest)
|
||||||
|
opf.create_spine(pages)
|
||||||
|
with open('metadata.opf', 'wb') as opffile:
|
||||||
|
opf.render(opffile)
|
||||||
|
|
||||||
|
return os.path.join(output_dir, 'metadata.opf')
|
143
src/calibre/ebooks/rb/writer.py
Normal file
143
src/calibre/ebooks/rb/writer.py
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
import os.path
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
import struct
|
||||||
|
import zlib
|
||||||
|
|
||||||
|
import Image
|
||||||
|
import cStringIO
|
||||||
|
|
||||||
|
from calibre.ebooks.rb.rbml import RBMLizer
|
||||||
|
from calibre.ebooks.rb import HEADER
|
||||||
|
from calibre.ebooks.rb import unique_name
|
||||||
|
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||||
|
from calibre.constants import __appname__, __version__
|
||||||
|
|
||||||
|
TEXT_RECORD_SIZE = 4096
|
||||||
|
|
||||||
|
class TocItem(object):
|
||||||
|
|
||||||
|
def __init__(self, name, size, flags):
|
||||||
|
self.name = name
|
||||||
|
self.size = size
|
||||||
|
self.flags = flags
|
||||||
|
|
||||||
|
|
||||||
|
class RBWriter(object):
|
||||||
|
|
||||||
|
def __init__(self, opts, log):
|
||||||
|
self.opts = opts
|
||||||
|
self.log = log
|
||||||
|
self.name_map = {}
|
||||||
|
|
||||||
|
def write_content(self, oeb_book, out_stream, metadata=None):
|
||||||
|
info = [('info.info', self._info_section(metadata))]
|
||||||
|
images = self._images(oeb_book.manifest)
|
||||||
|
text_size, chuncks = self._text(oeb_book)
|
||||||
|
chunck_sizes = [len(x) for x in chuncks]
|
||||||
|
text = [('index.html', chuncks)]
|
||||||
|
hidx = [('index.hidx', ' ')]
|
||||||
|
|
||||||
|
toc_items = []
|
||||||
|
page_count = 0
|
||||||
|
for name, data in info+text+hidx+images:
|
||||||
|
page_count += 1
|
||||||
|
size = len(data)
|
||||||
|
if (name, data) in text:
|
||||||
|
flags = 8
|
||||||
|
size = 0
|
||||||
|
for c in chunck_sizes:
|
||||||
|
size += c
|
||||||
|
size += 8 + (len(chunck_sizes) * 4)
|
||||||
|
elif (name, data) in info:
|
||||||
|
flags = 2
|
||||||
|
else:
|
||||||
|
flags = 0
|
||||||
|
toc_items.append(TocItem(name.ljust(32, '\x00')[:32], size, flags))
|
||||||
|
|
||||||
|
out_stream.write(HEADER)
|
||||||
|
out_stream.write(struct.pack('<I', 0))
|
||||||
|
out_stream.write(struct.pack('<IH', 0, 0))
|
||||||
|
out_stream.write(struct.pack('<I', 0x128))
|
||||||
|
out_stream.write(struct.pack('<I', 0))
|
||||||
|
for i in range(0x20, 0x128, 4):
|
||||||
|
out_stream.write(struct.pack('<I', 0))
|
||||||
|
out_stream.write(struct.pack('<I', page_count))
|
||||||
|
offset = out_stream.tell() + (len(toc_items) * 44)
|
||||||
|
for item in toc_items:
|
||||||
|
out_stream.write(item.name)
|
||||||
|
out_stream.write(struct.pack('<I', item.size))
|
||||||
|
out_stream.write(struct.pack('<I', offset))
|
||||||
|
out_stream.write(struct.pack('<I', item.flags))
|
||||||
|
offset += item.size
|
||||||
|
|
||||||
|
out_stream.write(info[0][1])
|
||||||
|
|
||||||
|
# Compressed text with proper heading
|
||||||
|
out_stream.write(struct.pack('<I', len(text[0][1])))
|
||||||
|
out_stream.write(struct.pack('<I', text_size))
|
||||||
|
for size in chunck_sizes:
|
||||||
|
out_stream.write(struct.pack('<I', size))
|
||||||
|
for chunck in text[0][1]:
|
||||||
|
out_stream.write(chunck)
|
||||||
|
|
||||||
|
for item in hidx+images:
|
||||||
|
out_stream.write(item[1])
|
||||||
|
|
||||||
|
total_size = out_stream.tell()
|
||||||
|
out_stream.seek(0x1c)
|
||||||
|
out_stream.write(struct.pack('<I', total_size))
|
||||||
|
|
||||||
|
def _text(self, oeb_book):
|
||||||
|
rbmlizer = RBMLizer(name_map=self.name_map, ignore_tables=self.opts.linearize_tables)
|
||||||
|
text = rbmlizer.extract_content(oeb_book, self.opts).encode('cp1252', 'xmlcharrefreplace')
|
||||||
|
size = len(text)
|
||||||
|
|
||||||
|
pages = []
|
||||||
|
for i in range(0, (len(text) / TEXT_RECORD_SIZE) + 1):
|
||||||
|
pages.append(zlib.compress(text[i * TEXT_RECORD_SIZE : (i * TEXT_RECORD_SIZE) + TEXT_RECORD_SIZE], 9))
|
||||||
|
|
||||||
|
return (size, pages)
|
||||||
|
|
||||||
|
def _images(self, manifest):
|
||||||
|
images = []
|
||||||
|
used_names = []
|
||||||
|
|
||||||
|
for item in manifest:
|
||||||
|
if item.media_type in OEB_IMAGES:
|
||||||
|
data = ''
|
||||||
|
|
||||||
|
im = Image.open(cStringIO.StringIO(item.data)).convert('L')
|
||||||
|
data = cStringIO.StringIO()
|
||||||
|
im.save(data, 'PNG')
|
||||||
|
data = data.getvalue()
|
||||||
|
|
||||||
|
name = '%s.png' % os.path.splitext(os.path.basename(item.href))[0]
|
||||||
|
name = unique_name(name, used_names)
|
||||||
|
used_names.append(name)
|
||||||
|
self.name_map[os.path.basename(item.href)] = name
|
||||||
|
|
||||||
|
images.append((name, data))
|
||||||
|
|
||||||
|
return images
|
||||||
|
|
||||||
|
def _info_section(self, metadata):
|
||||||
|
text = 'TYPE=2\n'
|
||||||
|
if metadata:
|
||||||
|
if len(metadata.title) >= 1:
|
||||||
|
text += 'TITLE=%s\n' % metadata.title[0].value
|
||||||
|
if len(metadata.creator) >= 1:
|
||||||
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
|
text += 'AUTHOR=%s\n' % authors_to_string([x.value for x in metadata.creator])
|
||||||
|
text += 'GENERATOR=%s - %s\n' % (__appname__, __version__)
|
||||||
|
text += 'PARSE=1\n'
|
||||||
|
text += 'OUTPUT=1\n'
|
||||||
|
text += 'BODY=index.html\n'
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
@ -1,5 +1,4 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import with_statement
|
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -1,15 +1,17 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import with_statement
|
|
||||||
'''
|
|
||||||
Write content to TXT.
|
|
||||||
'''
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, re, sys
|
'''
|
||||||
|
Write content to TXT.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
from calibre import entity_to_unicode
|
||||||
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
|
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup
|
from BeautifulSoup import BeautifulSoup
|
||||||
@ -83,6 +85,11 @@ class TxtWriter(object):
|
|||||||
for symbol in HTML_SYMBOLS:
|
for symbol in HTML_SYMBOLS:
|
||||||
for code in HTML_SYMBOLS[symbol]:
|
for code in HTML_SYMBOLS[symbol]:
|
||||||
content = content.replace(code, symbol)
|
content = content.replace(code, symbol)
|
||||||
|
|
||||||
|
for entity in set(re.findall('&.+?;', content)):
|
||||||
|
mo = re.search('(%s)' % entity[1:-1], content)
|
||||||
|
content = content.replace(entity, entity_to_unicode(mo))
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def cleanup_text(self, text):
|
def cleanup_text(self, text):
|
||||||
|
@ -640,15 +640,15 @@ class DeviceGUI(object):
|
|||||||
', '.join(sent_mails), 3000)
|
', '.join(sent_mails), 3000)
|
||||||
|
|
||||||
|
|
||||||
def sync_news(self, send_ids=None, do_auto=True):
|
def sync_news(self, send_ids=None, do_auto_convert=True):
|
||||||
if self.device_connected:
|
if self.device_connected:
|
||||||
ids = list(dynamic.get('news_to_be_synced', set([]))) if send_ids is None else send_ids
|
ids = list(dynamic.get('news_to_be_synced', set([]))) if send_ids is None else send_ids
|
||||||
ids = [id for id in ids if self.library_view.model().db.has_id(id)]
|
ids = [id for id in ids if self.library_view.model().db.has_id(id)]
|
||||||
files, _auto_ids = self.library_view.model().get_preferred_formats_from_ids(
|
files, _auto_ids = self.library_view.model().get_preferred_formats_from_ids(
|
||||||
ids, self.device_manager.device_class.settings().format_map,
|
ids, self.device_manager.device_class.settings().format_map,
|
||||||
exclude_auto=do_auto)
|
exclude_auto=do_auto_convert)
|
||||||
auto = []
|
auto = []
|
||||||
if _auto_ids:
|
if do_auto_convert and _auto_ids:
|
||||||
for id in _auto_ids:
|
for id in _auto_ids:
|
||||||
formats = [f.lower() for f in self.library_view.model().db.formats(id, index_is_id=True).split(',')]
|
formats = [f.lower() for f in self.library_view.model().db.formats(id, index_is_id=True).split(',')]
|
||||||
formats = formats if formats != None else []
|
formats = formats if formats != None else []
|
||||||
|
@ -133,7 +133,7 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
|
|||||||
self._map = dict(self.category_map)
|
self._map = dict(self.category_map)
|
||||||
|
|
||||||
def scheduled_recipes(self):
|
def scheduled_recipes(self):
|
||||||
for recipe in self.category_map[_('Scheduled')]:
|
for recipe in self.category_map.get(_('Scheduled'), []):
|
||||||
yield recipe
|
yield recipe
|
||||||
|
|
||||||
def sort_categories(self, x, y):
|
def sort_categories(self, x, y):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user