diff --git a/epydoc-pdf.conf b/epydoc-pdf.conf
deleted file mode 100644
index 03ed6d104d..0000000000
--- a/epydoc-pdf.conf
+++ /dev/null
@@ -1,50 +0,0 @@
-[epydoc] # Epydoc section marker (required by ConfigParser)
-
-# Information about the project.
-name: calibre
-url: http://calibre.kovidgoyal.net
-
-# The list of modules to document. Modules can be named using
-# dotted names, module filenames, or package directory names.
-# This option may be repeated.
-modules: calibre.devices, calibre.ebooks.lrf.web.profiles
-
-output: pdf
-target: docs/pdf
-
-frames: no
-
-# graph
-# The list of graph types that should be automatically included
-# in the output. Graphs are generated using the Graphviz "dot"
-# executable. Graph types include: "classtree", "callgraph",
-# "umlclass". Use "all" to include all graph types
-graph: classtree
-
-# css
-# The CSS stylesheet for HTML output. Can be the name of a builtin
-# stylesheet, or the name of a file.
-css: white
-
-# link
-# HTML code for the project link in the navigation bar. If left
-# unspecified, the project link will be generated based on the
-# project's name and URL.
-#link: My Cool Project
-
-# top
-# The "top" page for the documentation. Can be a URL, the name
-# of a module or class, or one of the special names "trees.html",
-# "indices.html", or "help.html"
-# top: calibre
-
-# verbosity
-# An integer indicating how verbose epydoc should be. The default
-# value is 0; negative values will supress warnings and errors;
-# positive values will give more verbose output.
-#verbosity: 0
-
-# separate-classes
-# Whether each class should be listed in its own section when
-# generating LaTeX or PDF output.
-#separate-classes: no
diff --git a/epydoc.conf b/epydoc.conf
deleted file mode 100644
index 3259623054..0000000000
--- a/epydoc.conf
+++ /dev/null
@@ -1,51 +0,0 @@
-[epydoc] # Epydoc section marker (required by ConfigParser)
-
-# Information about the project.
-name: calibre - API documentation
-url: http://calibre.kovidgoyal.net
-
-# The list of modules to document. Modules can be named using
-# dotted names, module filenames, or package directory names.
-# This option may be repeated.
-modules: calibre.devices, calibre.ebooks.lrf.web.profiles
-
-# Write html output to the directory "docs"
-output: html
-target: docs/html
-
-frames: no
-
-# graph
-# The list of graph types that should be automatically included
-# in the output. Graphs are generated using the Graphviz "dot"
-# executable. Graph types include: "classtree", "callgraph",
-# "umlclass". Use "all" to include all graph types
-graph: classtree
-
-# css
-# The CSS stylesheet for HTML output. Can be the name of a builtin
-# stylesheet, or the name of a file.
-css: white
-
-# link
-# HTML code for the project link in the navigation bar. If left
-# unspecified, the project link will be generated based on the
-# project's name and URL.
-link: calibre
-
-# top
-# The "top" page for the documentation. Can be a URL, the name
-# of a module or class, or one of the special names "trees.html",
-# "indices.html", or "help.html"
-#top:
-
-# verbosity
-# An integer indicating how verbose epydoc should be. The default
-# value is 0; negative values will supress warnings and errors;
-# positive values will give more verbose output.
-#verbosity: 0
-
-# separate-classes
-# Whether each class should be listed in its own section when
-# generating LaTeX or PDF output.
-#separate-classes: no
diff --git a/pylint.conf b/pylint.conf
deleted file mode 100644
index 3eec91c1ce..0000000000
--- a/pylint.conf
+++ /dev/null
@@ -1,602 +0,0 @@
-# lint Python modules using external checkers.
-
-#
-
-# This is the main checker controling the other ones and the reports
-
-# generation. It is itself both a raw checker and an astng checker in order
-
-# to:
-
-# * handle message activation / deactivation at the module level
-
-# * handle some basic but necessary stats'data (number of classes, methods...)
-
-#
-
-[MASTER]
-
-
-
-# Specify a configuration file.
-
-#rcfile=
-
-
-
-# Profiled execution.
-
-profile=no
-
-
-
-# Add to the black list. It should be a base name, not a
-
-# path. You may set this option multiple times.
-
-ignore=CVS
-
-
-
-# Pickle collected data for later comparisons.
-
-persistent=yes
-
-
-
-# Set the cache size for astng objects.
-
-cache-size=500
-
-
-
-# List of plugins (as comma separated values of python modules names) to load,
-
-# usually to register additional checkers.
-
-load-plugins=
-
-
-
-
-
-[MESSAGES CONTROL]
-
-
-
-# Enable only checker(s) with the given id(s). This option conflict with the
-
-# disable-checker option
-
-#enable-checker=
-
-
-
-# Enable all checker(s) except those with the given id(s). This option conflict
-
-# with the disable-checker option
-
-#disable-checker=
-
-
-
-# Enable all messages in the listed categories.
-
-#enable-msg-cat=
-
-
-
-# Disable all messages in the listed categories.
-
-#disable-msg-cat=
-
-
-
-# Enable the message(s) with the given id(s).
-
-#enable-msg=
-
-
-
-# Disable the message(s) with the given id(s).
-
-#disable-msg=
-
-
-
-
-
-[REPORTS]
-
-
-
-# set the output format. Available formats are text, parseable, colorized and
-
-# html
-
-output-format=colorized
-
-
-
-# Include message's id in output
-
-include-ids=no
-
-
-
-# Put messages in a separate file for each module / package specified on the
-
-# command line instead of printing them on stdout. Reports (if any) will be
-
-# written in a file name "pylint_global.[txt|html]".
-
-files-output=no
-
-
-
-# Tells wether to display a full report or only the messages
-
-reports=yes
-
-
-
-# Python expression which should return a note less than 10 (10 is the highest
-
-# note).You have access to the variables errors warning, statement which
-
-# respectivly contain the number of errors / warnings messages and the total
-
-# number of statements analyzed. This is used by the global evaluation report
-
-# (R0004).
-
-evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
-
-
-
-# Add a comment according to your evaluation note. This is used by the global
-
-# evaluation report (R0004).
-
-comment=no
-
-
-
-# Enable the report(s) with the given id(s).
-
-#enable-report=
-
-
-
-# Disable the report(s) with the given id(s).
-
-#disable-report=
-
-
-
-
-
-# checks for
-
-# * unused variables / imports
-
-# * undefined variables
-
-# * redefinition of variable from builtins or from an outer scope
-
-# * use of variable before assigment
-
-#
-
-[VARIABLES]
-
-
-
-# Tells wether we should check for unused import in __init__ files.
-
-init-import=no
-
-
-
-# A regular expression matching names used for dummy variables (i.e. not used).
-
-dummy-variables-rgx=_|dummy
-
-
-
-# List of additional names supposed to be defined in builtins. Remember that
-
-# you should avoid to define new builtins when possible.
-
-additional-builtins=
-
-
-
-
-
-# try to find bugs in the code using type inference
-
-#
-
-[TYPECHECK]
-
-
-
-# Tells wether missing members accessed in mixin class should be ignored. A
-
-# mixin class is detected if its name ends with "mixin" (case insensitive).
-
-ignore-mixin-members=yes
-
-
-
-# When zope mode is activated, consider the acquired-members option to ignore
-
-# access to some undefined attributes.
-
-zope=no
-
-
-
-# List of members which are usually get through zope's acquisition mecanism and
-
-# so shouldn't trigger E0201 when accessed (need zope=yes to be considered).
-
-acquired-members=REQUEST,acl_users,aq_parent
-
-
-
-
-
-# checks for :
-
-# * doc strings
-
-# * modules / classes / functions / methods / arguments / variables name
-
-# * number of arguments, local variables, branchs, returns and statements in
-
-# functions, methods
-
-# * required module attributes
-
-# * dangerous default values as arguments
-
-# * redefinition of function / method / class
-
-# * uses of the global statement
-
-#
-
-[BASIC]
-
-
-
-# Required attributes for module, separated by a comma
-
-required-attributes=
-
-
-
-# Regular expression which should only match functions or classes name which do
-
-# not require a docstring
-
-no-docstring-rgx=__.*__
-
-
-
-# Regular expression which should only match correct module names
-
-module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
-
-
-
-# Regular expression which should only match correct module level names
-
-const-rgx=(([A-Z_][A-Z1-9_]*)|(__.*__))$
-
-
-
-# Regular expression which should only match correct class names
-
-class-rgx=[A-Z_][a-zA-Z0-9]+$
-
-
-
-# Regular expression which should only match correct function names
-
-function-rgx=[a-z_][a-z0-9_]{2,30}$
-
-
-
-# Regular expression which should only match correct method names
-
-method-rgx=[a-z_][a-z0-9_]{2,30}$
-
-
-
-# Regular expression which should only match correct instance attribute names
-
-attr-rgx=[a-z_][a-z0-9_]{2,30}$
-
-
-
-# Regular expression which should only match correct argument names
-
-argument-rgx=[a-z_][a-z0-9_]{2,30}$
-
-
-
-# Regular expression which should only match correct variable names
-
-variable-rgx=[a-z_][a-z0-9_]{2,30}$
-
-
-
-# Regular expression which should only match correct list comprehension /
-
-# generator expression variable names
-
-inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
-
-
-
-# Good variable names which should always be accepted, separated by a comma
-
-good-names=i,j,k,ex,Run,_
-
-
-
-# Bad variable names which should always be refused, separated by a comma
-
-bad-names=foo,bar,baz,toto,tutu,tata
-
-
-
-# List of builtins function names that should not be used, separated by a comma
-
-bad-functions=map,filter,apply,input
-
-
-
-
-
-# checks for sign of poor/misdesign:
-
-# * number of methods, attributes, local variables...
-
-# * size, complexity of functions, methods
-
-#
-
-[DESIGN]
-
-
-
-# Maximum number of arguments for function / method
-
-max-args=5
-
-
-
-# Maximum number of locals for function / method body
-
-max-locals=15
-
-
-
-# Maximum number of return / yield for function / method body
-
-max-returns=6
-
-
-
-# Maximum number of branch for function / method body
-
-max-branchs=12
-
-
-
-# Maximum number of statements in function / method body
-
-max-statements=50
-
-
-
-# Maximum number of parents for a class (see R0901).
-
-max-parents=7
-
-
-
-# Maximum number of attributes for a class (see R0902).
-
-max-attributes=7
-
-
-
-# Minimum number of public methods for a class (see R0903).
-
-min-public-methods=2
-
-
-
-# Maximum number of public methods for a class (see R0904).
-
-max-public-methods=20
-
-
-
-
-
-# checks for
-
-# * external modules dependencies
-
-# * relative / wildcard imports
-
-# * cyclic imports
-
-# * uses of deprecated modules
-
-#
-
-[IMPORTS]
-
-
-
-# Deprecated modules which should not be used, separated by a comma
-
-deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
-
-
-
-# Create a graph of every (i.e. internal and external) dependencies in the
-
-# given file (report R0402 must not be disabled)
-
-import-graph=
-
-
-
-# Create a graph of external dependencies in the given file (report R0402 must
-
-# not be disabled)
-
-ext-import-graph=
-
-
-
-# Create a graph of internal dependencies in the given file (report R0402 must
-
-# not be disabled)
-
-int-import-graph=
-
-
-
-
-
-# checks for :
-
-# * methods without self as first argument
-
-# * overridden methods signature
-
-# * access only to existant members via self
-
-# * attributes not defined in the __init__ method
-
-# * supported interfaces implementation
-
-# * unreachable code
-
-#
-
-[CLASSES]
-
-
-
-# List of interface methods to ignore, separated by a comma. This is used for
-
-# instance to not check methods defines in Zope's Interface base class.
-
-ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
-
-
-
-# List of method names used to declare (i.e. assign) instance attributes.
-
-defining-attr-methods=__init__,__new__,setUp
-
-
-
-
-
-# checks for similarities and duplicated code. This computation may be
-
-# memory / CPU intensive, so you should disable it if you experiments some
-
-# problems.
-
-#
-
-[SIMILARITIES]
-
-
-
-# Minimum lines number of a similarity.
-
-min-similarity-lines=4
-
-
-
-# Ignore comments when computing similarities.
-
-ignore-comments=yes
-
-
-
-# Ignore docstrings when computing similarities.
-
-ignore-docstrings=yes
-
-
-
-
-
-# checks for:
-
-# * warning notes in the code like FIXME, XXX
-
-# * PEP 263: source code with non ascii character but no encoding declaration
-
-#
-
-[MISCELLANEOUS]
-
-
-
-# List of note tags to take in consideration, separated by a comma.
-
-notes=FIXME,XXX,TODO
-
-
-
-
-
-# checks for :
-
-# * unauthorized constructions
-
-# * strict indentation
-
-# * line length
-
-# * use of <> instead of !=
-
-#
-
-[FORMAT]
-
-
-
-# Maximum number of characters on a single line.
-
-max-line-length=80
-
-
-
-# Maximum number of lines in a module
-
-max-module-lines=1000
-
-
-
-# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
-
-# tab).
-
-indent-string=' '
-
diff --git a/src/calibre/constants.py b/src/calibre/constants.py
index ef83336740..d9d5ee0cfc 100644
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
-__version__ = '0.4.140'
+__version__ = '0.4.141'
__author__ = "Kovid Goyal "
'''
Various run time constants.
diff --git a/src/calibre/devices/__init__.py b/src/calibre/devices/__init__.py
index ed16dba14b..a673d3fe09 100644
--- a/src/calibre/devices/__init__.py
+++ b/src/calibre/devices/__init__.py
@@ -12,7 +12,8 @@ def devices():
from calibre.devices.cybookg3.driver import CYBOOKG3
from calibre.devices.kindle.driver import KINDLE
from calibre.devices.kindle.driver import KINDLE2
- return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2)
+ from calibre.devices.blackberry.driver import BLACKBERRY
+ return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY)
import time
diff --git a/src/calibre/devices/blackberry/__init__.py b/src/calibre/devices/blackberry/__init__.py
new file mode 100644
index 0000000000..c8c1a8f015
--- /dev/null
+++ b/src/calibre/devices/blackberry/__init__.py
@@ -0,0 +1,6 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+
diff --git a/src/calibre/devices/blackberry/driver.py b/src/calibre/devices/blackberry/driver.py
new file mode 100644
index 0000000000..f6c615b0de
--- /dev/null
+++ b/src/calibre/devices/blackberry/driver.py
@@ -0,0 +1,30 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.devices.usbms.driver import USBMS
+
+class BLACKBERRY(USBMS):
+ # Ordered list of supported formats
+ FORMATS = ['mobi', 'prc']
+
+ VENDOR_ID = [0x0fca]
+ PRODUCT_ID = [0x8004]
+ BCD = [0x0200]
+
+ VENDOR_NAME = 'RIM'
+ WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
+ #WINDOWS_CARD_MEM = 'CARD_STORAGE'
+
+ #OSX_MAIN_MEM = 'Kindle Internal Storage Media'
+ #OSX_CARD_MEM = 'Kindle Card Storage Media'
+
+ MAIN_MEMORY_VOLUME_LABEL = 'Blackberry Main Memory'
+ #STORAGE_CARD_VOLUME_LABEL = 'Kindle Storage Card'
+
+ EBOOK_DIR_MAIN = 'ebooks'
+ #EBOOK_DIR_CARD = "documents"
+ SUPPORTS_SUB_DIRS = True
+
diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py
index cadc61e584..0c73c4412c 100644
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@@ -1,3 +1,4 @@
+from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember '
'''
@@ -165,8 +166,8 @@ class USBMS(Device):
def get_file(self, path, outfile, end_session=True):
path = self.munge_path(path)
- src = open(path, 'rb')
- shutil.copyfileobj(src, outfile, 10*1024*1024)
+ with open(path, 'rb') as src:
+ shutil.copyfileobj(src, outfile, 10*1024*1024)
def put_file(self, infile, path, replace_file=False, end_session=True):
path = self.munge_path(path)
diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py
index 8ad41c524f..af6d724883 100644
--- a/src/calibre/ebooks/chardet/__init__.py
+++ b/src/calibre/ebooks/chardet/__init__.py
@@ -30,12 +30,50 @@ def detect(aBuf):
# Added by Kovid
ENCODING_PATS = [
- re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE),
- re.compile(r'', re.IGNORECASE)
+ re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>',
+ re.IGNORECASE),
+ re.compile(r'',
+ re.IGNORECASE)
]
ENTITY_PATTERN = re.compile(r'&(\S+?);')
-def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, resolve_entities=False):
+def strip_encoding_declarations(raw):
+ for pat in ENCODING_PATS:
+ raw = pat.sub('', raw)
+ return raw
+
+def substitute_entites(raw):
+ from calibre import entity_to_unicode
+ from functools import partial
+ f = partial(entity_to_unicode, exceptions=
+ ['amp', 'apos', 'quot', 'lt', 'gt'])
+ return ENTITY_PATTERN.sub(f, raw)
+
+_CHARSET_ALIASES = { "macintosh" : "mac-roman",
+ "x-sjis" : "shift-jis" }
+
+
+def force_encoding(raw, verbose):
+ from calibre.constants import preferred_encoding
+ try:
+ chardet = detect(raw)
+ except:
+ chardet = {'encoding':preferred_encoding, 'confidence':0}
+ encoding = chardet['encoding']
+ if chardet['confidence'] < 1 and verbose:
+ print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
+ if not encoding:
+ encoding = preferred_encoding
+ encoding = encoding.lower()
+ if _CHARSET_ALIASES.has_key(encoding):
+ encoding = _CHARSET_ALIASES[encoding]
+ if encoding == 'ascii':
+ encoding = 'utf-8'
+ return encoding
+
+
+def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
+ resolve_entities=False):
'''
Force conversion of byte string to unicode. Tries to look for XML/HTML
encoding declaration first, if not found uses the chardet library and
@@ -45,44 +83,27 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, resolve_entiti
encoding = None
if not raw:
return u'', encoding
- if isinstance(raw, unicode):
- return raw, encoding
- for pat in ENCODING_PATS:
- match = pat.search(raw)
- if match:
- encoding = match.group(1)
- break
- if strip_encoding_pats:
+ if not isinstance(raw, unicode):
+ if raw.startswith('\xff\xfe'):
+ raw, encoding = raw.decode('utf-16-le')[1:], 'utf-16-le'
+ elif raw.startswith('\xfe\xff'):
+ raw, encoding = raw.decode('utf-16-be')[1:], 'utf-16-be'
+ if not isinstance(raw, unicode):
for pat in ENCODING_PATS:
- raw = pat.sub('', raw)
- if encoding is None:
+ match = pat.search(raw)
+ if match:
+ encoding = match.group(1)
+ break
+ if encoding is None:
+ encoding = force_encoding(raw, verbose)
try:
- chardet = detect(raw)
- except:
- chardet = {'encoding':'utf-8', 'confidence':0}
- encoding = chardet['encoding']
- if chardet['confidence'] < 1 and verbose:
- print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
- CHARSET_ALIASES = { "macintosh" : "mac-roman",
- "x-sjis" : "shift-jis" }
- if not encoding:
- from calibre import preferred_encoding
- encoding = preferred_encoding
- if encoding:
- encoding = encoding.lower()
- if CHARSET_ALIASES.has_key(encoding):
- encoding = CHARSET_ALIASES[encoding]
- if encoding == 'ascii':
- encoding = 'utf-8'
+ raw = raw.decode(encoding, 'replace')
+ except LookupError:
+ raw = raw.decode('utf-8', 'replace')
- try:
- raw = raw.decode(encoding, 'replace')
- except LookupError:
- raw = raw.decode('utf-8', 'replace')
+ if strip_encoding_pats:
+ raw = strip_encoding_declarations(raw)
if resolve_entities:
- from calibre import entity_to_unicode
- from functools import partial
- f = partial(entity_to_unicode, exceptions=['amp', 'apos', 'quot', 'lt', 'gt'])
- raw = ENTITY_PATTERN.sub(f, raw)
-
+ raw = substitute_entites(raw)
+
return raw, encoding
diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py
index b4e75d1c15..d89fc3c9a9 100644
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@@ -273,7 +273,12 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
hf.links.remove(link)
next_level = list(nl)
- return flat, list(depth_first(flat[0], flat))
+ orec = sys.getrecursionlimit()
+ sys.setrecursionlimit(500000)
+ try:
+ return flat, list(depth_first(flat[0], flat))
+ finally:
+ sys.setrecursionlimit(orec)
def opf_traverse(opf_reader, verbose=0, encoding=None):
diff --git a/src/calibre/ebooks/lrf/lrs/convert_from.py b/src/calibre/ebooks/lrf/lrs/convert_from.py
index 89a0eb5d44..495d9adb50 100644
--- a/src/calibre/ebooks/lrf/lrs/convert_from.py
+++ b/src/calibre/ebooks/lrf/lrs/convert_from.py
@@ -73,7 +73,9 @@ class LrsParser(object):
return CharButton(self.parsed_objects[tag.get('refobj')], None)
if tag.name == 'plot':
return Plot(self.parsed_objects[tag.get('refobj')], **self.attrs_to_dict(tag, ['refobj']))
- return map[tag.name](**self.attrs_to_dict(tag))
+ settings = self.attrs_to_dict(tag)
+ settings.pop('spanstyle', '')
+ return map[tag.name](**settings)
def process_text_element(self, tag, elem):
for item in tag.contents:
@@ -121,7 +123,8 @@ class LrsParser(object):
for tag in self.soup.findAll('page'):
page = self.parsed_objects[tag.get('objid')]
self.book.append(page)
- for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock', 'ruledline']):
+ for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock',
+ 'ruledline', 'simpletextblock']):
if block_tag.name == 'ruledline':
page.append(RuledLine(**self.attrs_to_dict(block_tag)))
else:
@@ -134,7 +137,7 @@ class LrsParser(object):
self.book.append(jb)
self.parsed_objects[tag.get('objid')] = jb
- for tag in self.soup.findAll('textblock'):
+ for tag in self.soup.findAll(['textblock', 'simpletextblock']):
self.process_text_block(tag)
toc = self.soup.find('toc')
if toc:
@@ -145,8 +148,10 @@ class LrsParser(object):
def third_pass(self):
map = {
- 'page' : (Page, ['pagestyle', 'evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid']),
+ 'page' : (Page, ['pagestyle', 'evenfooterid',
+ 'oddfooterid', 'evenheaderid', 'oddheaderid']),
'textblock' : (TextBlock, ['textstyle', 'blockstyle']),
+ 'simpletextblock' : (TextBlock, ['textstyle', 'blockstyle']),
'imageblock' : (ImageBlock, ['blockstyle', 'refstream']),
'image' : (Image, ['refstream']),
'canvas' : (Canvas, ['canvaswidth', 'canvasheight']),
@@ -160,8 +165,12 @@ class LrsParser(object):
if tag.name in map.keys():
settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel'])
for a in ('pagestyle', 'blockstyle', 'textstyle'):
- if tag.has_key(a):
- settings[attrmap[a]] = self.parsed_objects[tag.get(a)]
+ label = tag.get(a, False)
+ if label:
+ _obj = self.parsed_objects[label] if \
+ self.parsed_objects.has_key(label) else \
+ self._style_labels[label]
+ settings[attrmap[a]] = _obj
for a in ('evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid'):
if tag.has_key(a):
settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
@@ -182,6 +191,7 @@ class LrsParser(object):
'imagestream': (ImageStream, ['imagestreamlabel']),
'registfont' : (Font, [])
}
+ self._style_labels = {}
for id, tag in self.objects.items():
if tag.name in map.keys():
settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid'])
@@ -189,7 +199,11 @@ class LrsParser(object):
for a in ('evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid'):
if tag.has_key(a):
settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
+ settings.pop('autoindex', '')
self.parsed_objects[id] = map[tag.name][0](**settings)
+ x = tag.get('stylelabel', False)
+ if x:
+ self._style_labels[x] = self.parsed_objects[id]
if tag.name == 'registfont':
self.book.append(self.parsed_objects[id])
@@ -220,6 +234,8 @@ class LrsParser(object):
def me(base, tagname):
tag = base.find(tagname.lower())
+ if tag is None:
+ return ('', '', '')
tag = (self.tag_to_string(tag), tag.get('reading') if tag.has_key('reading') else '')
return tag
diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py
index 461210befe..1506647ca8 100644
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@@ -80,6 +80,7 @@ class MetadataUpdater(object):
type = self.type = data[60:68]
self.nrecs, = unpack('>H', data[76:78])
record0 = self.record0 = self.record(0)
+ self.encryption_type, = unpack('>H', record0[12:14])
codepage, = unpack('>I', record0[28:32])
self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
image_base, = unpack('>I', record0[108:112])
@@ -133,6 +134,8 @@ class MetadataUpdater(object):
if self.thumbnail_record is not None:
recs.append((202, pack('>I', self.thumbnail_rindex)))
exth = StringIO()
+ if getattr(self, 'encryption_type', -1) != 0:
+ raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
for code, data in recs:
exth.write(pack('>II', code, len(data) + 8))
exth.write(data)
diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index 6d26c81789..df728e400e 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal '
Read data from .mobi files
'''
-import sys, struct, os, cStringIO, re
+import sys, struct, os, cStringIO, re, functools
try:
from PIL import Image as PILImage
@@ -186,7 +186,9 @@ class MobiReader(object):
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
for pat in ENCODING_PATS:
self.processed_html = pat.sub('', self.processed_html)
- self.processed_html = re.sub(r'&(\S+?);', entity_to_unicode,
+ e2u = functools.partial(entity_to_unicode,
+ exceptions=['lt', 'gt', 'amp', 'apos', 'quot'])
+ self.processed_html = re.sub(r'&(\S+?);', e2u,
self.processed_html)
self.extract_images(processed_records, output_dir)
self.replace_page_breaks()
@@ -235,7 +237,7 @@ class MobiReader(object):
if self.verbose:
print 'Creating OPF...'
ncx = cStringIO.StringIO()
- opf = self.create_opf(htmlfile, guide)
+ opf = self.create_opf(htmlfile, guide, root)
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)
ncx = ncx.getvalue()
if ncx:
@@ -328,7 +330,7 @@ class MobiReader(object):
except ValueError:
pass
- def create_opf(self, htmlfile, guide=None):
+ def create_opf(self, htmlfile, guide=None, root=None):
mi = self.book_header.exth.mi
opf = OPFCreator(os.path.dirname(htmlfile), mi)
if hasattr(self.book_header.exth, 'cover_offset'):
@@ -347,21 +349,27 @@ class MobiReader(object):
if ref.type.lower() == 'toc':
toc = ref.href()
if toc:
- index = self.processed_html.find(' -1:
- raw = ''+self.processed_html[index:]
- root = html.fromstring(raw)
+ if elems:
tocobj = TOC()
- for a in root.xpath('//a[@href]'):
- try:
- text = u' '.join([t.strip() for t in a.xpath('descendant::text()')])
- except:
- text = ''
- text = ent_pat.sub(entity_to_unicode, text)
- if a.get('href', '').startswith('#'):
- tocobj.add_item(toc.partition('#')[0], a.attrib['href'][1:], text)
+ reached = False
+ for x in root.iter():
+ if x == elems[-1]:
+ reached = True
+ continue
+ if reached and x.tag == 'a':
+ href = x.get('href', '')
+ if href:
+ try:
+ text = u' '.join([t.strip() for t in \
+ x.xpath('descendant::text()')])
+ except:
+ text = ''
+ text = ent_pat.sub(entity_to_unicode, text)
+ tocobj.add_item(toc.partition('#')[0], href[1:],
+ text)
if tocobj is not None:
opf.set_toc(tocobj)
diff --git a/src/calibre/gui2/dialogs/config.py b/src/calibre/gui2/dialogs/config.py
index 2b093a45b5..1d5fad960e 100644
--- a/src/calibre/gui2/dialogs/config.py
+++ b/src/calibre/gui2/dialogs/config.py
@@ -180,12 +180,12 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.toolbar_button_size.setCurrentIndex(0 if icons == self.ICON_SIZES[0] else 1 if icons == self.ICON_SIZES[1] else 2)
self.show_toolbar_text.setChecked(config['show_text_in_toolbar'])
- book_exts = sorted(BOOK_EXTENSIONS)
- for ext in book_exts:
+ self.book_exts = sorted(BOOK_EXTENSIONS)
+ for ext in self.book_exts:
self.single_format.addItem(ext.upper(), QVariant(ext))
single_format = config['save_to_disk_single_format']
- self.single_format.setCurrentIndex(book_exts.index(single_format))
+ self.single_format.setCurrentIndex(self.book_exts.index(single_format))
self.cover_browse.setValue(config['cover_flow_queue_length'])
self.systray_notifications.setChecked(not config['disable_tray_notification'])
from calibre.translations.compiled import translations
@@ -204,7 +204,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.pdf_metadata.setChecked(prefs['read_file_metadata'])
added_html = False
- for ext in book_exts:
+ for ext in self.book_exts:
ext = ext.lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
if ext == 'lrf' or is_supported('book.'+ext):
@@ -402,7 +402,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
p = {0:'normal', 1:'high', 2:'low'}[self.priority.currentIndex()]
prefs['worker_process_priority'] = p
prefs['read_file_metadata'] = bool(self.pdf_metadata.isChecked())
- config['save_to_disk_single_format'] = BOOK_EXTENSIONS[self.single_format.currentIndex()]
+ config['save_to_disk_single_format'] = self.book_exts[self.single_format.currentIndex()]
config['cover_flow_queue_length'] = self.cover_browse.value()
prefs['language'] = str(self.language.itemData(self.language.currentIndex()).toString())
config['systray_icon'] = self.systray_icon.checkState() == Qt.Checked
diff --git a/src/calibre/gui2/dialogs/epub.py b/src/calibre/gui2/dialogs/epub.py
index fb8e6bf71e..88607b1f8d 100644
--- a/src/calibre/gui2/dialogs/epub.py
+++ b/src/calibre/gui2/dialogs/epub.py
@@ -126,7 +126,8 @@ class Config(ResizableDialog, Ui_Dialog):
pix = QPixmap()
pix.loadFromData(cover)
if pix.isNull():
- d = error_dialog(self.window, _file + _(" is not a valid picture"))
+ d = error_dialog(self.window, _('Error reading file'),
+ _file + _(" is not a valid picture"))
d.exec_()
else:
self.cover_path.setText(_file)
diff --git a/src/calibre/gui2/dialogs/lrf_single.py b/src/calibre/gui2/dialogs/lrf_single.py
index 9083d3e4df..fdcf908d1d 100644
--- a/src/calibre/gui2/dialogs/lrf_single.py
+++ b/src/calibre/gui2/dialogs/lrf_single.py
@@ -255,7 +255,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
self.gui_headerformat.setDisabled(True)
self.gui_header_separation.setDisabled(True)
self.gui_use_metadata_cover.setCheckState(Qt.Checked)
- self.preprocess.addItem('No preprocessing')
+ self.preprocess.addItem(_('No preprocessing'))
for opt in self.PREPROCESS_OPTIONS:
self.preprocess.addItem(opt.get_opt_string()[2:])
ph = _('Preprocess the file before converting to LRF. This is useful if you know that the file is from a specific source. Known sources:')
@@ -338,7 +338,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
cmd.append(opt)
text = qstring_to_unicode(self.preprocess.currentText())
- if text != 'No preprocessing':
+ if text != _('No preprocessing'):
cmd.append(u'--'+text)
cmd.extend([u'--profile', qstring_to_unicode(self.gui_profile.currentText())])
diff --git a/src/calibre/gui2/dialogs/mobi.py b/src/calibre/gui2/dialogs/mobi.py
index 7d0324e0f4..b9cff08200 100644
--- a/src/calibre/gui2/dialogs/mobi.py
+++ b/src/calibre/gui2/dialogs/mobi.py
@@ -19,5 +19,4 @@ class Config(_Config):
self.opt_dont_split_on_page_breaks.setVisible(False)
self.opt_preserve_tag_structure.setVisible(False)
self.opt_linearize_tables.setVisible(False)
- self.opt_no_justification.setVisible(False)
self.page_map_box.setVisible(False)
\ No newline at end of file
diff --git a/src/calibre/gui2/images/news/soldiers.png b/src/calibre/gui2/images/news/soldiers.png
new file mode 100644
index 0000000000..df04f108e6
Binary files /dev/null and b/src/calibre/gui2/images/news/soldiers.png differ
diff --git a/src/calibre/gui2/images/news/theonion.png b/src/calibre/gui2/images/news/theonion.png
new file mode 100644
index 0000000000..d29c69562d
Binary files /dev/null and b/src/calibre/gui2/images/news/theonion.png differ
diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py
index 79c42c2a81..c6277ff902 100644
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@@ -194,7 +194,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
def __init__(self, pathtoebook=None):
MainWindow.__init__(self, None)
self.setupUi(self)
-
self.iterator = None
self.current_page = None
self.pending_search = None
@@ -619,7 +618,7 @@ View an ebook.
def main(args=sys.argv):
parser = option_parser()
args = parser.parse_args(args)[-1]
- pid = os.fork() if islinux else -1
+ pid = os.fork() if False and islinux else -1
if pid <= 0:
app = Application(args)
app.setWindowIcon(QIcon(':/images/viewer.svg'))
diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py
index 164f865dca..ceae3094c4 100644
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@@ -19,7 +19,7 @@ except:
send_message = None
from calibre.ebooks.metadata.meta import get_metadata
from calibre.library.database2 import LibraryDatabase2
-from calibre.ebooks.metadata.opf import OPFCreator, OPFReader
+from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
from calibre.utils.genshi.template import MarkupTemplate
FIELDS = set(['title', 'authors', 'author_sort', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'formats', 'isbn', 'cover'])
@@ -453,7 +453,7 @@ id is an id number from the list command.
return 0
def do_set_metadata(db, id, stream):
- mi = OPFReader(stream)
+ mi = OPF(stream)
db.set_metadata(id, mi)
do_show_metadata(db, id, False)
if send_message is not None:
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index d960ef87f7..2bd1544e56 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -435,17 +435,16 @@ def post_install():
parser = option_parser()
opts = parser.parse_args()[0]
- if not opts.no_root and os.geteuid() != 0:
- print >> sys.stderr, 'You must be root to run this command.'
- sys.exit(1)
-
global use_destdir
use_destdir = opts.destdir
manifest = []
- manifest += setup_udev_rules(opts.group_file, not opts.dont_reload, opts.fatal_errors)
- manifest += setup_completion(opts.fatal_errors)
setup_desktop_integration(opts.fatal_errors)
- manifest += install_man_pages(opts.fatal_errors)
+ if opts.no_root or os.geteuid() == 0:
+ manifest += setup_udev_rules(opts.group_file, not opts.dont_reload, opts.fatal_errors)
+ manifest += setup_completion(opts.fatal_errors)
+ manifest += install_man_pages(opts.fatal_errors)
+ else:
+ print "Skipping udev, completion, and man-page install for non-root user."
try:
from PyQt4 import Qt
diff --git a/src/calibre/trac/plugins/download.py b/src/calibre/trac/plugins/download.py
index 8a5a81ac7f..9c852c554e 100644
--- a/src/calibre/trac/plugins/download.py
+++ b/src/calibre/trac/plugins/download.py
@@ -15,7 +15,7 @@ DEPENDENCIES = [
('ImageMagick', '6.3.5', 'imagemagick', 'imagemagick', 'ImageMagick'),
('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'),
('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'),
- ('lxml', '2.0.5', 'lxml', 'python-lxml', 'python-lxml'),
+ ('lxml', '2.1.5', 'lxml', 'python-lxml', 'python-lxml'),
('python-dateutil', '1.4.1', 'python-dateutil', 'python-dateutil', 'python-dateutil'),
('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'),
diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py
index 7ae997f90d..6018af4918 100644
--- a/src/calibre/web/feeds/recipes/__init__.py
+++ b/src/calibre/web/feeds/recipes/__init__.py
@@ -32,6 +32,7 @@ recipe_modules = ['recipe_' + r for r in (
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
+ 'lamujerdemivida', 'soldiers', 'theonion',
)]
import re, imp, inspect, time, os
diff --git a/src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py b/src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py
new file mode 100644
index 0000000000..a99be8f955
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, Darko Miletic '
+'''
+lamujerdemivida.com.ar
+'''
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LaMujerDeMiVida(BasicNewsRecipe):
+ title = 'La Mujer de mi Vida'
+ __author__ = 'Darko Miletic'
+ description = 'Cultura de otra manera'
+ oldest_article = 90
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ encoding = 'cp1252'
+ publisher = 'La Mujer de mi Vida'
+ category = 'literatura, critica, arte, ensayos'
+ language = _('Spanish')
+ INDEX = 'http://www.lamujerdemivida.com.ar/'
+ html2lrf_options = [
+ '--comment', description
+ , '--category', category
+ , '--publisher', publisher
+ , '--ignore-tables'
+ ]
+
+ html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
+
+ keep_only_tags = [dict(name='table', attrs={'width':'570'})]
+
+ feeds = [(u'Articulos', u'http://www.lamujerdemivida.com.ar/index.php')]
+
+ def preprocess_html(self, soup):
+ soup.html['xml:lang'] = 'es-AR'
+ soup.html['lang'] = 'es-AR'
+ mtag = ''
+ soup.head.insert(0,mtag)
+ for item in soup.findAll(style=True):
+ del item['style']
+ return soup
+
+ def get_cover_url(self):
+ cover_url = None
+ soup = self.index_to_soup(self.INDEX)
+ cover_item = soup.find('img',attrs={'alt':'Lamujerdemivida.'})
+ if cover_item:
+ cover_url = self.INDEX + cover_item['src']
+ return cover_url
+
+ def parse_index(self):
+ totalfeeds = []
+ lfeeds = self.get_feeds()
+ for feedobj in lfeeds:
+ feedtitle, feedurl = feedobj
+ self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+ articles = []
+ soup = self.index_to_soup(feedurl)
+ for item in soup.findAll('td', attrs={'width':'390'}):
+ atag = item.find('a',href=True)
+ if atag:
+ url = atag['href']
+ title = self.tag_to_string(atag)
+ date = strftime(self.timefmt)
+ articles.append({
+ 'title' :title
+ ,'date' :date
+ ,'url' :url
+ ,'description':''
+ })
+ totalfeeds.append((feedtitle, articles))
+ return totalfeeds
+
diff --git a/src/calibre/web/feeds/recipes/recipe_soldiers.py b/src/calibre/web/feeds/recipes/recipe_soldiers.py
new file mode 100644
index 0000000000..dfaa070928
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_soldiers.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, Darko Miletic '
+'''
+www.army.mil/soldiers/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Soldiers(BasicNewsRecipe):
+ title = 'Soldiers'
+ __author__ = 'Darko Miletic'
+ description = 'The Official U.S. Army Magazine'
+ oldest_article = 30
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ remove_javascript = True
+ simultaneous_downloads = 1
+ delay = 4
+ max_connections = 1
+ encoding = 'utf-8'
+ publisher = 'U.S. Army'
+ category = 'news, politics, war, weapons'
+ language = _('English')
+ INDEX = 'http://www.army.mil/soldiers/'
+
+ html2lrf_options = [
+ '--comment', description
+ , '--category', category
+ , '--publisher', publisher
+ ]
+
+ html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+ keep_only_tags = [dict(name='div', attrs={'id':'rightCol'})]
+
+ remove_tags = [
+ dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
+ ,dict(name=['object','link'])
+ ]
+
+ feeds = [(u'Frontpage', u'http://www.army.mil/rss/feeds/soldiersfrontpage.xml' )]
+
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+ return soup
+
+ def get_cover_url(self):
+ cover_url = None
+ soup = self.index_to_soup(self.INDEX)
+ cover_item = soup.find('img',attrs={'alt':'Current Magazine Cover'})
+ if cover_item:
+ cover_url = cover_item['src']
+ return cover_url
diff --git a/src/calibre/web/feeds/recipes/recipe_theonion.py b/src/calibre/web/feeds/recipes/recipe_theonion.py
new file mode 100644
index 0000000000..06f7edd32b
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_theonion.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, Darko Miletic '
+
+'''
+theonion.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TheOnion(BasicNewsRecipe):
+ title = 'The Onion'
+ __author__ = 'Darko Miletic'
+ description = "America's finest news source"
+ oldest_article = 2
+ max_articles_per_feed = 100
+ publisher = u'Onion, Inc.'
+ category = u'humor, news, USA'
+ language = _('English')
+ no_stylesheets = True
+ use_embedded_content = False
+ encoding = 'utf-8'
+ remove_javascript = True
+ html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+ html2lrf_options = [
+ '--comment' , description
+ , '--category' , category
+ , '--publisher' , publisher
+ ]
+
+ keep_only_tags = [dict(name='div', attrs={'id':'main'})]
+
+ remove_tags = [
+ dict(name=['object','link','iframe','base'])
+ ,dict(name='div', attrs={'class':['toolbar_side','graphical_feature','toolbar_bottom']})
+ ,dict(name='div', attrs={'id':['recent_slider','sidebar','pagination','related_media']})
+ ]
+
+
+ feeds = [
+ (u'Daily' , u'http://feeds.theonion.com/theonion/daily' )
+ ,(u'Sports' , u'http://feeds.theonion.com/theonion/sports' )
+ ]