Pull from trunk

This commit is contained in:
Kovid Goyal 2009-03-01 10:08:53 -08:00
commit 549e2b9efb
27 changed files with 355 additions and 790 deletions

View File

@ -1,50 +0,0 @@
[epydoc] # Epydoc section marker (required by ConfigParser)
# Information about the project.
name: calibre
url: http://calibre.kovidgoyal.net
# The list of modules to document. Modules can be named using
# dotted names, module filenames, or package directory names.
# This option may be repeated.
modules: calibre.devices, calibre.ebooks.lrf.web.profiles
output: pdf
target: docs/pdf
frames: no
# graph
# The list of graph types that should be automatically included
# in the output. Graphs are generated using the Graphviz "dot"
# executable. Graph types include: "classtree", "callgraph",
# "umlclass". Use "all" to include all graph types
graph: classtree
# css
# The CSS stylesheet for HTML output. Can be the name of a builtin
# stylesheet, or the name of a file.
css: white
# link
# HTML code for the project link in the navigation bar. If left
# unspecified, the project link will be generated based on the
# project's name and URL.
#link: <a href="somewhere">My Cool Project</a>
# top
# The "top" page for the documentation. Can be a URL, the name
# of a module or class, or one of the special names "trees.html",
# "indices.html", or "help.html"
# top: calibre
# verbosity
# An integer indicating how verbose epydoc should be. The default
# value is 0; negative values will supress warnings and errors;
# positive values will give more verbose output.
#verbosity: 0
# separate-classes
# Whether each class should be listed in its own section when
# generating LaTeX or PDF output.
#separate-classes: no

View File

@ -1,51 +0,0 @@
[epydoc] # Epydoc section marker (required by ConfigParser)
# Information about the project.
name: calibre - API documentation
url: http://calibre.kovidgoyal.net
# The list of modules to document. Modules can be named using
# dotted names, module filenames, or package directory names.
# This option may be repeated.
modules: calibre.devices, calibre.ebooks.lrf.web.profiles
# Write html output to the directory "docs"
output: html
target: docs/html
frames: no
# graph
# The list of graph types that should be automatically included
# in the output. Graphs are generated using the Graphviz "dot"
# executable. Graph types include: "classtree", "callgraph",
# "umlclass". Use "all" to include all graph types
graph: classtree
# css
# The CSS stylesheet for HTML output. Can be the name of a builtin
# stylesheet, or the name of a file.
css: white
# link
# HTML code for the project link in the navigation bar. If left
# unspecified, the project link will be generated based on the
# project's name and URL.
link: <a href="http://calibre.kovidgoyal.net">calibre</a>
# top
# The "top" page for the documentation. Can be a URL, the name
# of a module or class, or one of the special names "trees.html",
# "indices.html", or "help.html"
#top:
# verbosity
# An integer indicating how verbose epydoc should be. The default
# value is 0; negative values will supress warnings and errors;
# positive values will give more verbose output.
#verbosity: 0
# separate-classes
# Whether each class should be listed in its own section when
# generating LaTeX or PDF output.
#separate-classes: no

View File

@ -1,602 +0,0 @@
# lint Python modules using external checkers.
#
# This is the main checker controling the other ones and the reports
# generation. It is itself both a raw checker and an astng checker in order
# to:
# * handle message activation / deactivation at the module level
# * handle some basic but necessary stats'data (number of classes, methods...)
#
[MASTER]
# Specify a configuration file.
#rcfile=
# Profiled execution.
profile=no
# Add <file or directory> to the black list. It should be a base name, not a
# path. You may set this option multiple times.
ignore=CVS
# Pickle collected data for later comparisons.
persistent=yes
# Set the cache size for astng objects.
cache-size=500
# List of plugins (as comma separated values of python modules names) to load,
# usually to register additional checkers.
load-plugins=
[MESSAGES CONTROL]
# Enable only checker(s) with the given id(s). This option conflict with the
# disable-checker option
#enable-checker=
# Enable all checker(s) except those with the given id(s). This option conflict
# with the disable-checker option
#disable-checker=
# Enable all messages in the listed categories.
#enable-msg-cat=
# Disable all messages in the listed categories.
#disable-msg-cat=
# Enable the message(s) with the given id(s).
#enable-msg=
# Disable the message(s) with the given id(s).
#disable-msg=
[REPORTS]
# set the output format. Available formats are text, parseable, colorized and
# html
output-format=colorized
# Include message's id in output
include-ids=no
# Put messages in a separate file for each module / package specified on the
# command line instead of printing them on stdout. Reports (if any) will be
# written in a file name "pylint_global.[txt|html]".
files-output=no
# Tells wether to display a full report or only the messages
reports=yes
# Python expression which should return a note less than 10 (10 is the highest
# note).You have access to the variables errors warning, statement which
# respectivly contain the number of errors / warnings messages and the total
# number of statements analyzed. This is used by the global evaluation report
# (R0004).
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
# Add a comment according to your evaluation note. This is used by the global
# evaluation report (R0004).
comment=no
# Enable the report(s) with the given id(s).
#enable-report=
# Disable the report(s) with the given id(s).
#disable-report=
# checks for
# * unused variables / imports
# * undefined variables
# * redefinition of variable from builtins or from an outer scope
# * use of variable before assigment
#
[VARIABLES]
# Tells wether we should check for unused import in __init__ files.
init-import=no
# A regular expression matching names used for dummy variables (i.e. not used).
dummy-variables-rgx=_|dummy
# List of additional names supposed to be defined in builtins. Remember that
# you should avoid to define new builtins when possible.
additional-builtins=
# try to find bugs in the code using type inference
#
[TYPECHECK]
# Tells wether missing members accessed in mixin class should be ignored. A
# mixin class is detected if its name ends with "mixin" (case insensitive).
ignore-mixin-members=yes
# When zope mode is activated, consider the acquired-members option to ignore
# access to some undefined attributes.
zope=no
# List of members which are usually get through zope's acquisition mecanism and
# so shouldn't trigger E0201 when accessed (need zope=yes to be considered).
acquired-members=REQUEST,acl_users,aq_parent
# checks for :
# * doc strings
# * modules / classes / functions / methods / arguments / variables name
# * number of arguments, local variables, branchs, returns and statements in
# functions, methods
# * required module attributes
# * dangerous default values as arguments
# * redefinition of function / method / class
# * uses of the global statement
#
[BASIC]
# Required attributes for module, separated by a comma
required-attributes=
# Regular expression which should only match functions or classes name which do
# not require a docstring
no-docstring-rgx=__.*__
# Regular expression which should only match correct module names
module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
# Regular expression which should only match correct module level names
const-rgx=(([A-Z_][A-Z1-9_]*)|(__.*__))$
# Regular expression which should only match correct class names
class-rgx=[A-Z_][a-zA-Z0-9]+$
# Regular expression which should only match correct function names
function-rgx=[a-z_][a-z0-9_]{2,30}$
# Regular expression which should only match correct method names
method-rgx=[a-z_][a-z0-9_]{2,30}$
# Regular expression which should only match correct instance attribute names
attr-rgx=[a-z_][a-z0-9_]{2,30}$
# Regular expression which should only match correct argument names
argument-rgx=[a-z_][a-z0-9_]{2,30}$
# Regular expression which should only match correct variable names
variable-rgx=[a-z_][a-z0-9_]{2,30}$
# Regular expression which should only match correct list comprehension /
# generator expression variable names
inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
# Good variable names which should always be accepted, separated by a comma
good-names=i,j,k,ex,Run,_
# Bad variable names which should always be refused, separated by a comma
bad-names=foo,bar,baz,toto,tutu,tata
# List of builtins function names that should not be used, separated by a comma
bad-functions=map,filter,apply,input
# checks for sign of poor/misdesign:
# * number of methods, attributes, local variables...
# * size, complexity of functions, methods
#
[DESIGN]
# Maximum number of arguments for function / method
max-args=5
# Maximum number of locals for function / method body
max-locals=15
# Maximum number of return / yield for function / method body
max-returns=6
# Maximum number of branch for function / method body
max-branchs=12
# Maximum number of statements in function / method body
max-statements=50
# Maximum number of parents for a class (see R0901).
max-parents=7
# Maximum number of attributes for a class (see R0902).
max-attributes=7
# Minimum number of public methods for a class (see R0903).
min-public-methods=2
# Maximum number of public methods for a class (see R0904).
max-public-methods=20
# checks for
# * external modules dependencies
# * relative / wildcard imports
# * cyclic imports
# * uses of deprecated modules
#
[IMPORTS]
# Deprecated modules which should not be used, separated by a comma
deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
# Create a graph of every (i.e. internal and external) dependencies in the
# given file (report R0402 must not be disabled)
import-graph=
# Create a graph of external dependencies in the given file (report R0402 must
# not be disabled)
ext-import-graph=
# Create a graph of internal dependencies in the given file (report R0402 must
# not be disabled)
int-import-graph=
# checks for :
# * methods without self as first argument
# * overridden methods signature
# * access only to existant members via self
# * attributes not defined in the __init__ method
# * supported interfaces implementation
# * unreachable code
#
[CLASSES]
# List of interface methods to ignore, separated by a comma. This is used for
# instance to not check methods defines in Zope's Interface base class.
ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
# List of method names used to declare (i.e. assign) instance attributes.
defining-attr-methods=__init__,__new__,setUp
# checks for similarities and duplicated code. This computation may be
# memory / CPU intensive, so you should disable it if you experiments some
# problems.
#
[SIMILARITIES]
# Minimum lines number of a similarity.
min-similarity-lines=4
# Ignore comments when computing similarities.
ignore-comments=yes
# Ignore docstrings when computing similarities.
ignore-docstrings=yes
# checks for:
# * warning notes in the code like FIXME, XXX
# * PEP 263: source code with non ascii character but no encoding declaration
#
[MISCELLANEOUS]
# List of note tags to take in consideration, separated by a comma.
notes=FIXME,XXX,TODO
# checks for :
# * unauthorized constructions
# * strict indentation
# * line length
# * use of <> instead of !=
#
[FORMAT]
# Maximum number of characters on a single line.
max-line-length=80
# Maximum number of lines in a module
max-module-lines=1000
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
# tab).
indent-string=' '

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.4.140' __version__ = '0.4.141'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
''' '''
Various run time constants. Various run time constants.

View File

@ -12,7 +12,8 @@ def devices():
from calibre.devices.cybookg3.driver import CYBOOKG3 from calibre.devices.cybookg3.driver import CYBOOKG3
from calibre.devices.kindle.driver import KINDLE from calibre.devices.kindle.driver import KINDLE
from calibre.devices.kindle.driver import KINDLE2 from calibre.devices.kindle.driver import KINDLE2
return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2) from calibre.devices.blackberry.driver import BLACKBERRY
return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY)
import time import time

View File

@ -0,0 +1,6 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@ -0,0 +1,30 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.devices.usbms.driver import USBMS
class BLACKBERRY(USBMS):
# Ordered list of supported formats
FORMATS = ['mobi', 'prc']
VENDOR_ID = [0x0fca]
PRODUCT_ID = [0x8004]
BCD = [0x0200]
VENDOR_NAME = 'RIM'
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
#WINDOWS_CARD_MEM = 'CARD_STORAGE'
#OSX_MAIN_MEM = 'Kindle Internal Storage Media'
#OSX_CARD_MEM = 'Kindle Card Storage Media'
MAIN_MEMORY_VOLUME_LABEL = 'Blackberry Main Memory'
#STORAGE_CARD_VOLUME_LABEL = 'Kindle Storage Card'
EBOOK_DIR_MAIN = 'ebooks'
#EBOOK_DIR_CARD = "documents"
SUPPORTS_SUB_DIRS = True

View File

@ -1,3 +1,4 @@
from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john at nachtimwald.com>' __copyright__ = '2009, John Schember <john at nachtimwald.com>'
''' '''
@ -165,8 +166,8 @@ class USBMS(Device):
def get_file(self, path, outfile, end_session=True): def get_file(self, path, outfile, end_session=True):
path = self.munge_path(path) path = self.munge_path(path)
src = open(path, 'rb') with open(path, 'rb') as src:
shutil.copyfileobj(src, outfile, 10*1024*1024) shutil.copyfileobj(src, outfile, 10*1024*1024)
def put_file(self, infile, path, replace_file=False, end_session=True): def put_file(self, infile, path, replace_file=False, end_session=True):
path = self.munge_path(path) path = self.munge_path(path)

View File

@ -30,12 +30,50 @@ def detect(aBuf):
# Added by Kovid # Added by Kovid
ENCODING_PATS = [ ENCODING_PATS = [
re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE), re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>',
re.compile(r'<meta.*?content=[\'"].*?charset=([^\s\'"]+).*?[\'"].*?>', re.IGNORECASE) re.IGNORECASE),
re.compile(r'<meta.*?content=[\'"].*?charset=([^\s\'"]+).*?[\'"].*?>',
re.IGNORECASE)
] ]
ENTITY_PATTERN = re.compile(r'&(\S+?);') ENTITY_PATTERN = re.compile(r'&(\S+?);')
def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, resolve_entities=False): def strip_encoding_declarations(raw):
for pat in ENCODING_PATS:
raw = pat.sub('', raw)
return raw
def substitute_entites(raw):
from calibre import entity_to_unicode
from functools import partial
f = partial(entity_to_unicode, exceptions=
['amp', 'apos', 'quot', 'lt', 'gt'])
return ENTITY_PATTERN.sub(f, raw)
_CHARSET_ALIASES = { "macintosh" : "mac-roman",
"x-sjis" : "shift-jis" }
def force_encoding(raw, verbose):
from calibre.constants import preferred_encoding
try:
chardet = detect(raw)
except:
chardet = {'encoding':preferred_encoding, 'confidence':0}
encoding = chardet['encoding']
if chardet['confidence'] < 1 and verbose:
print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
if not encoding:
encoding = preferred_encoding
encoding = encoding.lower()
if _CHARSET_ALIASES.has_key(encoding):
encoding = _CHARSET_ALIASES[encoding]
if encoding == 'ascii':
encoding = 'utf-8'
return encoding
def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
resolve_entities=False):
''' '''
Force conversion of byte string to unicode. Tries to look for XML/HTML Force conversion of byte string to unicode. Tries to look for XML/HTML
encoding declaration first, if not found uses the chardet library and encoding declaration first, if not found uses the chardet library and
@ -45,44 +83,27 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, resolve_entiti
encoding = None encoding = None
if not raw: if not raw:
return u'', encoding return u'', encoding
if isinstance(raw, unicode): if not isinstance(raw, unicode):
return raw, encoding if raw.startswith('\xff\xfe'):
for pat in ENCODING_PATS: raw, encoding = raw.decode('utf-16-le')[1:], 'utf-16-le'
match = pat.search(raw) elif raw.startswith('\xfe\xff'):
if match: raw, encoding = raw.decode('utf-16-be')[1:], 'utf-16-be'
encoding = match.group(1) if not isinstance(raw, unicode):
break
if strip_encoding_pats:
for pat in ENCODING_PATS: for pat in ENCODING_PATS:
raw = pat.sub('', raw) match = pat.search(raw)
if encoding is None: if match:
encoding = match.group(1)
break
if encoding is None:
encoding = force_encoding(raw, verbose)
try: try:
chardet = detect(raw) raw = raw.decode(encoding, 'replace')
except: except LookupError:
chardet = {'encoding':'utf-8', 'confidence':0} raw = raw.decode('utf-8', 'replace')
encoding = chardet['encoding']
if chardet['confidence'] < 1 and verbose:
print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
CHARSET_ALIASES = { "macintosh" : "mac-roman",
"x-sjis" : "shift-jis" }
if not encoding:
from calibre import preferred_encoding
encoding = preferred_encoding
if encoding:
encoding = encoding.lower()
if CHARSET_ALIASES.has_key(encoding):
encoding = CHARSET_ALIASES[encoding]
if encoding == 'ascii':
encoding = 'utf-8'
try: if strip_encoding_pats:
raw = raw.decode(encoding, 'replace') raw = strip_encoding_declarations(raw)
except LookupError:
raw = raw.decode('utf-8', 'replace')
if resolve_entities: if resolve_entities:
from calibre import entity_to_unicode raw = substitute_entites(raw)
from functools import partial
f = partial(entity_to_unicode, exceptions=['amp', 'apos', 'quot', 'lt', 'gt'])
raw = ENTITY_PATTERN.sub(f, raw)
return raw, encoding return raw, encoding

View File

@ -273,7 +273,12 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
hf.links.remove(link) hf.links.remove(link)
next_level = list(nl) next_level = list(nl)
return flat, list(depth_first(flat[0], flat)) orec = sys.getrecursionlimit()
sys.setrecursionlimit(500000)
try:
return flat, list(depth_first(flat[0], flat))
finally:
sys.setrecursionlimit(orec)
def opf_traverse(opf_reader, verbose=0, encoding=None): def opf_traverse(opf_reader, verbose=0, encoding=None):

View File

@ -73,7 +73,9 @@ class LrsParser(object):
return CharButton(self.parsed_objects[tag.get('refobj')], None) return CharButton(self.parsed_objects[tag.get('refobj')], None)
if tag.name == 'plot': if tag.name == 'plot':
return Plot(self.parsed_objects[tag.get('refobj')], **self.attrs_to_dict(tag, ['refobj'])) return Plot(self.parsed_objects[tag.get('refobj')], **self.attrs_to_dict(tag, ['refobj']))
return map[tag.name](**self.attrs_to_dict(tag)) settings = self.attrs_to_dict(tag)
settings.pop('spanstyle', '')
return map[tag.name](**settings)
def process_text_element(self, tag, elem): def process_text_element(self, tag, elem):
for item in tag.contents: for item in tag.contents:
@ -121,7 +123,8 @@ class LrsParser(object):
for tag in self.soup.findAll('page'): for tag in self.soup.findAll('page'):
page = self.parsed_objects[tag.get('objid')] page = self.parsed_objects[tag.get('objid')]
self.book.append(page) self.book.append(page)
for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock', 'ruledline']): for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock',
'ruledline', 'simpletextblock']):
if block_tag.name == 'ruledline': if block_tag.name == 'ruledline':
page.append(RuledLine(**self.attrs_to_dict(block_tag))) page.append(RuledLine(**self.attrs_to_dict(block_tag)))
else: else:
@ -134,7 +137,7 @@ class LrsParser(object):
self.book.append(jb) self.book.append(jb)
self.parsed_objects[tag.get('objid')] = jb self.parsed_objects[tag.get('objid')] = jb
for tag in self.soup.findAll('textblock'): for tag in self.soup.findAll(['textblock', 'simpletextblock']):
self.process_text_block(tag) self.process_text_block(tag)
toc = self.soup.find('toc') toc = self.soup.find('toc')
if toc: if toc:
@ -145,8 +148,10 @@ class LrsParser(object):
def third_pass(self): def third_pass(self):
map = { map = {
'page' : (Page, ['pagestyle', 'evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid']), 'page' : (Page, ['pagestyle', 'evenfooterid',
'oddfooterid', 'evenheaderid', 'oddheaderid']),
'textblock' : (TextBlock, ['textstyle', 'blockstyle']), 'textblock' : (TextBlock, ['textstyle', 'blockstyle']),
'simpletextblock' : (TextBlock, ['textstyle', 'blockstyle']),
'imageblock' : (ImageBlock, ['blockstyle', 'refstream']), 'imageblock' : (ImageBlock, ['blockstyle', 'refstream']),
'image' : (Image, ['refstream']), 'image' : (Image, ['refstream']),
'canvas' : (Canvas, ['canvaswidth', 'canvasheight']), 'canvas' : (Canvas, ['canvaswidth', 'canvasheight']),
@ -160,8 +165,12 @@ class LrsParser(object):
if tag.name in map.keys(): if tag.name in map.keys():
settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel']) settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel'])
for a in ('pagestyle', 'blockstyle', 'textstyle'): for a in ('pagestyle', 'blockstyle', 'textstyle'):
if tag.has_key(a): label = tag.get(a, False)
settings[attrmap[a]] = self.parsed_objects[tag.get(a)] if label:
_obj = self.parsed_objects[label] if \
self.parsed_objects.has_key(label) else \
self._style_labels[label]
settings[attrmap[a]] = _obj
for a in ('evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid'): for a in ('evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid'):
if tag.has_key(a): if tag.has_key(a):
settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)] settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
@ -182,6 +191,7 @@ class LrsParser(object):
'imagestream': (ImageStream, ['imagestreamlabel']), 'imagestream': (ImageStream, ['imagestreamlabel']),
'registfont' : (Font, []) 'registfont' : (Font, [])
} }
self._style_labels = {}
for id, tag in self.objects.items(): for id, tag in self.objects.items():
if tag.name in map.keys(): if tag.name in map.keys():
settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid']) settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid'])
@ -189,7 +199,11 @@ class LrsParser(object):
for a in ('evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid'): for a in ('evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid'):
if tag.has_key(a): if tag.has_key(a):
settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)] settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
settings.pop('autoindex', '')
self.parsed_objects[id] = map[tag.name][0](**settings) self.parsed_objects[id] = map[tag.name][0](**settings)
x = tag.get('stylelabel', False)
if x:
self._style_labels[x] = self.parsed_objects[id]
if tag.name == 'registfont': if tag.name == 'registfont':
self.book.append(self.parsed_objects[id]) self.book.append(self.parsed_objects[id])
@ -220,6 +234,8 @@ class LrsParser(object):
def me(base, tagname): def me(base, tagname):
tag = base.find(tagname.lower()) tag = base.find(tagname.lower())
if tag is None:
return ('', '', '')
tag = (self.tag_to_string(tag), tag.get('reading') if tag.has_key('reading') else '') tag = (self.tag_to_string(tag), tag.get('reading') if tag.has_key('reading') else '')
return tag return tag

View File

@ -80,6 +80,7 @@ class MetadataUpdater(object):
type = self.type = data[60:68] type = self.type = data[60:68]
self.nrecs, = unpack('>H', data[76:78]) self.nrecs, = unpack('>H', data[76:78])
record0 = self.record0 = self.record(0) record0 = self.record0 = self.record(0)
self.encryption_type, = unpack('>H', record0[12:14])
codepage, = unpack('>I', record0[28:32]) codepage, = unpack('>I', record0[28:32])
self.codec = 'utf-8' if codepage == 65001 else 'cp1252' self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
image_base, = unpack('>I', record0[108:112]) image_base, = unpack('>I', record0[108:112])
@ -133,6 +134,8 @@ class MetadataUpdater(object):
if self.thumbnail_record is not None: if self.thumbnail_record is not None:
recs.append((202, pack('>I', self.thumbnail_rindex))) recs.append((202, pack('>I', self.thumbnail_rindex)))
exth = StringIO() exth = StringIO()
if getattr(self, 'encryption_type', -1) != 0:
raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
for code, data in recs: for code, data in recs:
exth.write(pack('>II', code, len(data) + 8)) exth.write(pack('>II', code, len(data) + 8))
exth.write(data) exth.write(data)

View File

@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Read data from .mobi files Read data from .mobi files
''' '''
import sys, struct, os, cStringIO, re import sys, struct, os, cStringIO, re, functools
try: try:
from PIL import Image as PILImage from PIL import Image as PILImage
@ -186,7 +186,9 @@ class MobiReader(object):
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore') self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
for pat in ENCODING_PATS: for pat in ENCODING_PATS:
self.processed_html = pat.sub('', self.processed_html) self.processed_html = pat.sub('', self.processed_html)
self.processed_html = re.sub(r'&(\S+?);', entity_to_unicode, e2u = functools.partial(entity_to_unicode,
exceptions=['lt', 'gt', 'amp', 'apos', 'quot'])
self.processed_html = re.sub(r'&(\S+?);', e2u,
self.processed_html) self.processed_html)
self.extract_images(processed_records, output_dir) self.extract_images(processed_records, output_dir)
self.replace_page_breaks() self.replace_page_breaks()
@ -235,7 +237,7 @@ class MobiReader(object):
if self.verbose: if self.verbose:
print 'Creating OPF...' print 'Creating OPF...'
ncx = cStringIO.StringIO() ncx = cStringIO.StringIO()
opf = self.create_opf(htmlfile, guide) opf = self.create_opf(htmlfile, guide, root)
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx) opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)
ncx = ncx.getvalue() ncx = ncx.getvalue()
if ncx: if ncx:
@ -328,7 +330,7 @@ class MobiReader(object):
except ValueError: except ValueError:
pass pass
def create_opf(self, htmlfile, guide=None): def create_opf(self, htmlfile, guide=None, root=None):
mi = self.book_header.exth.mi mi = self.book_header.exth.mi
opf = OPFCreator(os.path.dirname(htmlfile), mi) opf = OPFCreator(os.path.dirname(htmlfile), mi)
if hasattr(self.book_header.exth, 'cover_offset'): if hasattr(self.book_header.exth, 'cover_offset'):
@ -347,21 +349,27 @@ class MobiReader(object):
if ref.type.lower() == 'toc': if ref.type.lower() == 'toc':
toc = ref.href() toc = ref.href()
if toc: if toc:
index = self.processed_html.find('<a id="%s" name="%s"'%(toc.partition('#')[-1], toc.partition('#')[-1])) elems = root.xpath('//*[@id="%s"]'%toc.partition('#')[-1])
tocobj = None tocobj = None
ent_pat = re.compile(r'&(\S+?);') ent_pat = re.compile(r'&(\S+?);')
if index > -1: if elems:
raw = '<html><body>'+self.processed_html[index:]
root = html.fromstring(raw)
tocobj = TOC() tocobj = TOC()
for a in root.xpath('//a[@href]'): reached = False
try: for x in root.iter():
text = u' '.join([t.strip() for t in a.xpath('descendant::text()')]) if x == elems[-1]:
except: reached = True
text = '' continue
text = ent_pat.sub(entity_to_unicode, text) if reached and x.tag == 'a':
if a.get('href', '').startswith('#'): href = x.get('href', '')
tocobj.add_item(toc.partition('#')[0], a.attrib['href'][1:], text) if href:
try:
text = u' '.join([t.strip() for t in \
x.xpath('descendant::text()')])
except:
text = ''
text = ent_pat.sub(entity_to_unicode, text)
tocobj.add_item(toc.partition('#')[0], href[1:],
text)
if tocobj is not None: if tocobj is not None:
opf.set_toc(tocobj) opf.set_toc(tocobj)

View File

@ -180,12 +180,12 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.toolbar_button_size.setCurrentIndex(0 if icons == self.ICON_SIZES[0] else 1 if icons == self.ICON_SIZES[1] else 2) self.toolbar_button_size.setCurrentIndex(0 if icons == self.ICON_SIZES[0] else 1 if icons == self.ICON_SIZES[1] else 2)
self.show_toolbar_text.setChecked(config['show_text_in_toolbar']) self.show_toolbar_text.setChecked(config['show_text_in_toolbar'])
book_exts = sorted(BOOK_EXTENSIONS) self.book_exts = sorted(BOOK_EXTENSIONS)
for ext in book_exts: for ext in self.book_exts:
self.single_format.addItem(ext.upper(), QVariant(ext)) self.single_format.addItem(ext.upper(), QVariant(ext))
single_format = config['save_to_disk_single_format'] single_format = config['save_to_disk_single_format']
self.single_format.setCurrentIndex(book_exts.index(single_format)) self.single_format.setCurrentIndex(self.book_exts.index(single_format))
self.cover_browse.setValue(config['cover_flow_queue_length']) self.cover_browse.setValue(config['cover_flow_queue_length'])
self.systray_notifications.setChecked(not config['disable_tray_notification']) self.systray_notifications.setChecked(not config['disable_tray_notification'])
from calibre.translations.compiled import translations from calibre.translations.compiled import translations
@ -204,7 +204,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.pdf_metadata.setChecked(prefs['read_file_metadata']) self.pdf_metadata.setChecked(prefs['read_file_metadata'])
added_html = False added_html = False
for ext in book_exts: for ext in self.book_exts:
ext = ext.lower() ext = ext.lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext) ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
if ext == 'lrf' or is_supported('book.'+ext): if ext == 'lrf' or is_supported('book.'+ext):
@ -402,7 +402,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
p = {0:'normal', 1:'high', 2:'low'}[self.priority.currentIndex()] p = {0:'normal', 1:'high', 2:'low'}[self.priority.currentIndex()]
prefs['worker_process_priority'] = p prefs['worker_process_priority'] = p
prefs['read_file_metadata'] = bool(self.pdf_metadata.isChecked()) prefs['read_file_metadata'] = bool(self.pdf_metadata.isChecked())
config['save_to_disk_single_format'] = BOOK_EXTENSIONS[self.single_format.currentIndex()] config['save_to_disk_single_format'] = self.book_exts[self.single_format.currentIndex()]
config['cover_flow_queue_length'] = self.cover_browse.value() config['cover_flow_queue_length'] = self.cover_browse.value()
prefs['language'] = str(self.language.itemData(self.language.currentIndex()).toString()) prefs['language'] = str(self.language.itemData(self.language.currentIndex()).toString())
config['systray_icon'] = self.systray_icon.checkState() == Qt.Checked config['systray_icon'] = self.systray_icon.checkState() == Qt.Checked

View File

@ -126,7 +126,8 @@ class Config(ResizableDialog, Ui_Dialog):
pix = QPixmap() pix = QPixmap()
pix.loadFromData(cover) pix.loadFromData(cover)
if pix.isNull(): if pix.isNull():
d = error_dialog(self.window, _file + _(" is not a valid picture")) d = error_dialog(self.window, _('Error reading file'),
_file + _(" is not a valid picture"))
d.exec_() d.exec_()
else: else:
self.cover_path.setText(_file) self.cover_path.setText(_file)

View File

@ -255,7 +255,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
self.gui_headerformat.setDisabled(True) self.gui_headerformat.setDisabled(True)
self.gui_header_separation.setDisabled(True) self.gui_header_separation.setDisabled(True)
self.gui_use_metadata_cover.setCheckState(Qt.Checked) self.gui_use_metadata_cover.setCheckState(Qt.Checked)
self.preprocess.addItem('No preprocessing') self.preprocess.addItem(_('No preprocessing'))
for opt in self.PREPROCESS_OPTIONS: for opt in self.PREPROCESS_OPTIONS:
self.preprocess.addItem(opt.get_opt_string()[2:]) self.preprocess.addItem(opt.get_opt_string()[2:])
ph = _('Preprocess the file before converting to LRF. This is useful if you know that the file is from a specific source. Known sources:') ph = _('Preprocess the file before converting to LRF. This is useful if you know that the file is from a specific source. Known sources:')
@ -338,7 +338,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
cmd.append(opt) cmd.append(opt)
text = qstring_to_unicode(self.preprocess.currentText()) text = qstring_to_unicode(self.preprocess.currentText())
if text != 'No preprocessing': if text != _('No preprocessing'):
cmd.append(u'--'+text) cmd.append(u'--'+text)
cmd.extend([u'--profile', qstring_to_unicode(self.gui_profile.currentText())]) cmd.extend([u'--profile', qstring_to_unicode(self.gui_profile.currentText())])

View File

@ -19,5 +19,4 @@ class Config(_Config):
self.opt_dont_split_on_page_breaks.setVisible(False) self.opt_dont_split_on_page_breaks.setVisible(False)
self.opt_preserve_tag_structure.setVisible(False) self.opt_preserve_tag_structure.setVisible(False)
self.opt_linearize_tables.setVisible(False) self.opt_linearize_tables.setVisible(False)
self.opt_no_justification.setVisible(False)
self.page_map_box.setVisible(False) self.page_map_box.setVisible(False)

Binary file not shown.

After

Width:  |  Height:  |  Size: 455 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 804 B

View File

@ -194,7 +194,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
def __init__(self, pathtoebook=None): def __init__(self, pathtoebook=None):
MainWindow.__init__(self, None) MainWindow.__init__(self, None)
self.setupUi(self) self.setupUi(self)
self.iterator = None self.iterator = None
self.current_page = None self.current_page = None
self.pending_search = None self.pending_search = None
@ -619,7 +618,7 @@ View an ebook.
def main(args=sys.argv): def main(args=sys.argv):
parser = option_parser() parser = option_parser()
args = parser.parse_args(args)[-1] args = parser.parse_args(args)[-1]
pid = os.fork() if islinux else -1 pid = os.fork() if False and islinux else -1
if pid <= 0: if pid <= 0:
app = Application(args) app = Application(args)
app.setWindowIcon(QIcon(':/images/viewer.svg')) app.setWindowIcon(QIcon(':/images/viewer.svg'))

View File

@ -19,7 +19,7 @@ except:
send_message = None send_message = None
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
from calibre.library.database2 import LibraryDatabase2 from calibre.library.database2 import LibraryDatabase2
from calibre.ebooks.metadata.opf import OPFCreator, OPFReader from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
from calibre.utils.genshi.template import MarkupTemplate from calibre.utils.genshi.template import MarkupTemplate
FIELDS = set(['title', 'authors', 'author_sort', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'formats', 'isbn', 'cover']) FIELDS = set(['title', 'authors', 'author_sort', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'formats', 'isbn', 'cover'])
@ -453,7 +453,7 @@ id is an id number from the list command.
return 0 return 0
def do_set_metadata(db, id, stream): def do_set_metadata(db, id, stream):
mi = OPFReader(stream) mi = OPF(stream)
db.set_metadata(id, mi) db.set_metadata(id, mi)
do_show_metadata(db, id, False) do_show_metadata(db, id, False)
if send_message is not None: if send_message is not None:

View File

@ -435,17 +435,16 @@ def post_install():
parser = option_parser() parser = option_parser()
opts = parser.parse_args()[0] opts = parser.parse_args()[0]
if not opts.no_root and os.geteuid() != 0:
print >> sys.stderr, 'You must be root to run this command.'
sys.exit(1)
global use_destdir global use_destdir
use_destdir = opts.destdir use_destdir = opts.destdir
manifest = [] manifest = []
manifest += setup_udev_rules(opts.group_file, not opts.dont_reload, opts.fatal_errors)
manifest += setup_completion(opts.fatal_errors)
setup_desktop_integration(opts.fatal_errors) setup_desktop_integration(opts.fatal_errors)
manifest += install_man_pages(opts.fatal_errors) if opts.no_root or os.geteuid() == 0:
manifest += setup_udev_rules(opts.group_file, not opts.dont_reload, opts.fatal_errors)
manifest += setup_completion(opts.fatal_errors)
manifest += install_man_pages(opts.fatal_errors)
else:
print "Skipping udev, completion, and man-page install for non-root user."
try: try:
from PyQt4 import Qt from PyQt4 import Qt

View File

@ -15,7 +15,7 @@ DEPENDENCIES = [
('ImageMagick', '6.3.5', 'imagemagick', 'imagemagick', 'ImageMagick'), ('ImageMagick', '6.3.5', 'imagemagick', 'imagemagick', 'ImageMagick'),
('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'), ('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'),
('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'), ('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'),
('lxml', '2.0.5', 'lxml', 'python-lxml', 'python-lxml'), ('lxml', '2.1.5', 'lxml', 'python-lxml', 'python-lxml'),
('python-dateutil', '1.4.1', 'python-dateutil', 'python-dateutil', 'python-dateutil'), ('python-dateutil', '1.4.1', 'python-dateutil', 'python-dateutil', 'python-dateutil'),
('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'), ('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'), ('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'),

View File

@ -32,6 +32,7 @@ recipe_modules = ['recipe_' + r for r in (
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline', 'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine', 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
'al_jazeera', 'winsupersite', 'borba', 'courrierinternational', 'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
'lamujerdemivida', 'soldiers', 'theonion',
)] )]
import re, imp, inspect, time, os import re, imp, inspect, time, os

View File

@ -0,0 +1,76 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
lamujerdemivida.com.ar
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class LaMujerDeMiVida(BasicNewsRecipe):
title = 'La Mujer de mi Vida'
__author__ = 'Darko Miletic'
description = 'Cultura de otra manera'
oldest_article = 90
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
publisher = 'La Mujer de mi Vida'
category = 'literatura, critica, arte, ensayos'
language = _('Spanish')
INDEX = 'http://www.lamujerdemivida.com.ar/'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
keep_only_tags = [dict(name='table', attrs={'width':'570'})]
feeds = [(u'Articulos', u'http://www.lamujerdemivida.com.ar/index.php')]
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'es-AR'
soup.html['lang'] = 'es-AR'
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('img',attrs={'alt':'Lamujerdemivida.'})
if cover_item:
cover_url = self.INDEX + cover_item['src']
return cover_url
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll('td', attrs={'width':'390'}):
atag = item.find('a',href=True)
if atag:
url = atag['href']
title = self.tag_to_string(atag)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':''
})
totalfeeds.append((feedtitle, articles))
return totalfeeds

View File

@ -0,0 +1,57 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.army.mil/soldiers/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Soldiers(BasicNewsRecipe):
title = 'Soldiers'
__author__ = 'Darko Miletic'
description = 'The Official U.S. Army Magazine'
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
simultaneous_downloads = 1
delay = 4
max_connections = 1
encoding = 'utf-8'
publisher = 'U.S. Army'
category = 'news, politics, war, weapons'
language = _('English')
INDEX = 'http://www.army.mil/soldiers/'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'rightCol'})]
remove_tags = [
dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
,dict(name=['object','link'])
]
feeds = [(u'Frontpage', u'http://www.army.mil/rss/feeds/soldiersfrontpage.xml' )]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('img',attrs={'alt':'Current Magazine Cover'})
if cover_item:
cover_url = cover_item['src']
return cover_url

View File

@ -0,0 +1,45 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
theonion.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TheOnion(BasicNewsRecipe):
title = 'The Onion'
__author__ = 'Darko Miletic'
description = "America's finest news source"
oldest_article = 2
max_articles_per_feed = 100
publisher = u'Onion, Inc.'
category = u'humor, news, USA'
language = _('English')
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher' , publisher
]
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
remove_tags = [
dict(name=['object','link','iframe','base'])
,dict(name='div', attrs={'class':['toolbar_side','graphical_feature','toolbar_bottom']})
,dict(name='div', attrs={'id':['recent_slider','sidebar','pagination','related_media']})
]
feeds = [
(u'Daily' , u'http://feeds.theonion.com/theonion/daily' )
,(u'Sports' , u'http://feeds.theonion.com/theonion/sports' )
]