Pull from trunk

2025-12-08 06:05:04 -05:00 · 2009-03-01 10:08:53 -08:00 · 2009-03-01 10:08:53 -08:00 · 549e2b9efb
commit 549e2b9efb
parent 43dbf24c75 7486b2ae61
27 changed files with 355 additions and 790 deletions
--- a/epydoc-pdf.conf
+++ b/epydoc-pdf.conf
@ -1,50 +0,0 @@
 [epydoc] # Epydoc section marker (required by ConfigParser)
 # Information about the project.
 name: calibre
 url: http://calibre.kovidgoyal.net
 # The list of modules to document.  Modules can be named using
 # dotted names, module filenames, or package directory names.
 # This option may be repeated.
 modules: calibre.devices, calibre.ebooks.lrf.web.profiles
 output: pdf
 target: docs/pdf
 frames: no
 # graph
 #   The list of graph types that should be automatically included
 #   in the output.  Graphs are generated using the Graphviz "dot"
 #   executable.  Graph types include: "classtree", "callgraph",
 #   "umlclass".  Use "all" to include all graph types
 graph: classtree
 # css
 #   The CSS stylesheet for HTML output.  Can be the name of a builtin
 #   stylesheet, or the name of a file.
 css: white
 # link
 #   HTML code for the project link in the navigation bar.  If left
 #   unspecified, the project link will be generated based on the
 #   project's name and URL.
 #link: <a href="somewhere">My Cool Project</a>
 # top
 #   The "top" page for the documentation.  Can be a URL, the name
 #   of a module or class, or one of the special names "trees.html",
 #   "indices.html", or "help.html"
 # top: calibre
 # verbosity
 #   An integer indicating how verbose epydoc should be.  The default
 #   value is 0; negative values will supress warnings and errors;
 #   positive values will give more verbose output.
 #verbosity: 0
 # separate-classes
 #   Whether each class should be listed in its own section when
 #   generating LaTeX or PDF output.
 #separate-classes: no
--- a/epydoc.conf
+++ b/epydoc.conf
@ -1,51 +0,0 @@
 [epydoc] # Epydoc section marker (required by ConfigParser)
 # Information about the project.
 name: calibre - API documentation
 url: http://calibre.kovidgoyal.net
 # The list of modules to document.  Modules can be named using
 # dotted names, module filenames, or package directory names.
 # This option may be repeated.
 modules: calibre.devices, calibre.ebooks.lrf.web.profiles 
 # Write html output to the directory "docs"
 output: html
 target: docs/html
 frames: no
 # graph
 #   The list of graph types that should be automatically included
 #   in the output.  Graphs are generated using the Graphviz "dot"
 #   executable.  Graph types include: "classtree", "callgraph",
 #   "umlclass".  Use "all" to include all graph types
 graph: classtree
 # css
 #   The CSS stylesheet for HTML output.  Can be the name of a builtin
 #   stylesheet, or the name of a file.
 css: white
 # link
 #   HTML code for the project link in the navigation bar.  If left
 #   unspecified, the project link will be generated based on the
 #   project's name and URL.
 link: <a href="http://calibre.kovidgoyal.net">calibre</a>
 # top
 #   The "top" page for the documentation.  Can be a URL, the name
 #   of a module or class, or one of the special names "trees.html",
 #   "indices.html", or "help.html"
 #top: 
 # verbosity
 #   An integer indicating how verbose epydoc should be.  The default
 #   value is 0; negative values will supress warnings and errors;
 #   positive values will give more verbose output.
 #verbosity: 0
 # separate-classes
 #   Whether each class should be listed in its own section when
 #   generating LaTeX or PDF output.
 #separate-classes: no
--- a/pylint.conf
+++ b/pylint.conf
@ -1,602 +0,0 @@
 # lint Python modules using external checkers.
 # 
 # This is the main checker controling the other ones and the reports
 # generation. It is itself both a raw checker and an astng checker in order
 # to:
 # * handle message activation / deactivation at the module level
 # * handle some basic but necessary stats'data (number of classes, methods...)
 # 
 [MASTER]
 # Specify a configuration file.
 #rcfile=
 # Profiled execution.
 profile=no
 # Add <file or directory> to the black list. It should be a base name, not a
 # path. You may set this option multiple times.
 ignore=CVS
 # Pickle collected data for later comparisons.
 persistent=yes
 # Set the cache size for astng objects.
 cache-size=500
 # List of plugins (as comma separated values of python modules names) to load,
 # usually to register additional checkers.
 load-plugins=
 [MESSAGES CONTROL]
 # Enable only checker(s) with the given id(s). This option conflict with the
 # disable-checker option
 #enable-checker=
 # Enable all checker(s) except those with the given id(s). This option conflict
 # with the disable-checker option
 #disable-checker=
 # Enable all messages in the listed categories.
 #enable-msg-cat=
 # Disable all messages in the listed categories.
 #disable-msg-cat=
 # Enable the message(s) with the given id(s).
 #enable-msg=
 # Disable the message(s) with the given id(s).
 #disable-msg=
 [REPORTS]
 # set the output format. Available formats are text, parseable, colorized and
 # html
 output-format=colorized
 # Include message's id in output
 include-ids=no
 # Put messages in a separate file for each module / package specified on the
 # command line instead of printing them on stdout. Reports (if any) will be
 # written in a file name "pylint_global.[txt|html]".
 files-output=no
 # Tells wether to display a full report or only the messages
 reports=yes
 # Python expression which should return a note less than 10 (10 is the highest
 # note).You have access to the variables errors warning, statement which
 # respectivly contain the number of errors / warnings messages and the total
 # number of statements analyzed. This is used by the global evaluation report
 # (R0004).
 evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
 # Add a comment according to your evaluation note. This is used by the global
 # evaluation report (R0004).
 comment=no
 # Enable the report(s) with the given id(s).
 #enable-report=
 # Disable the report(s) with the given id(s).
 #disable-report=
 # checks for
 # * unused variables / imports
 # * undefined variables
 # * redefinition of variable from builtins or from an outer scope
 # * use of variable before assigment
 # 
 [VARIABLES]
 # Tells wether we should check for unused import in __init__ files.
 init-import=no
 # A regular expression matching names used for dummy variables (i.e. not used).
 dummy-variables-rgx=_|dummy
 # List of additional names supposed to be defined in builtins. Remember that
 # you should avoid to define new builtins when possible.
 additional-builtins=
 # try to find bugs in the code using type inference
 # 
 [TYPECHECK]
 # Tells wether missing members accessed in mixin class should be ignored. A
 # mixin class is detected if its name ends with "mixin" (case insensitive).
 ignore-mixin-members=yes
 # When zope mode is activated, consider the acquired-members option to ignore
 # access to some undefined attributes.
 zope=no
 # List of members which are usually get through zope's acquisition mecanism and
 # so shouldn't trigger E0201 when accessed (need zope=yes to be considered).
 acquired-members=REQUEST,acl_users,aq_parent
 # checks for :
 # * doc strings
 # * modules / classes / functions / methods / arguments / variables name
 # * number of arguments, local variables, branchs, returns and statements in
 # functions, methods
 # * required module attributes
 # * dangerous default values as arguments
 # * redefinition of function / method / class
 # * uses of the global statement
 # 
 [BASIC]
 # Required attributes for module, separated by a comma
 required-attributes=
 # Regular expression which should only match functions or classes name which do
 # not require a docstring
 no-docstring-rgx=__.*__
 # Regular expression which should only match correct module names
 module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
 # Regular expression which should only match correct module level names
 const-rgx=(([A-Z_][A-Z1-9_]*)|(__.*__))$
 # Regular expression which should only match correct class names
 class-rgx=[A-Z_][a-zA-Z0-9]+$
 # Regular expression which should only match correct function names
 function-rgx=[a-z_][a-z0-9_]{2,30}$
 # Regular expression which should only match correct method names
 method-rgx=[a-z_][a-z0-9_]{2,30}$
 # Regular expression which should only match correct instance attribute names
 attr-rgx=[a-z_][a-z0-9_]{2,30}$
 # Regular expression which should only match correct argument names
 argument-rgx=[a-z_][a-z0-9_]{2,30}$
 # Regular expression which should only match correct variable names
 variable-rgx=[a-z_][a-z0-9_]{2,30}$
 # Regular expression which should only match correct list comprehension /
 # generator expression variable names
 inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
 # Good variable names which should always be accepted, separated by a comma
 good-names=i,j,k,ex,Run,_
 # Bad variable names which should always be refused, separated by a comma
 bad-names=foo,bar,baz,toto,tutu,tata
 # List of builtins function names that should not be used, separated by a comma
 bad-functions=map,filter,apply,input
 # checks for sign of poor/misdesign:
 # * number of methods, attributes, local variables...
 # * size, complexity of functions, methods
 # 
 [DESIGN]
 # Maximum number of arguments for function / method
 max-args=5
 # Maximum number of locals for function / method body
 max-locals=15
 # Maximum number of return / yield for function / method body
 max-returns=6
 # Maximum number of branch for function / method body
 max-branchs=12
 # Maximum number of statements in function / method body
 max-statements=50
 # Maximum number of parents for a class (see R0901).
 max-parents=7
 # Maximum number of attributes for a class (see R0902).
 max-attributes=7
 # Minimum number of public methods for a class (see R0903).
 min-public-methods=2
 # Maximum number of public methods for a class (see R0904).
 max-public-methods=20
 # checks for
 # * external modules dependencies
 # * relative / wildcard imports
 # * cyclic imports
 # * uses of deprecated modules
 # 
 [IMPORTS]
 # Deprecated modules which should not be used, separated by a comma
 deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
 # Create a graph of every (i.e. internal and external) dependencies in the
 # given file (report R0402 must not be disabled)
 import-graph=
 # Create a graph of external dependencies in the given file (report R0402 must
 # not be disabled)
 ext-import-graph=
 # Create a graph of internal dependencies in the given file (report R0402 must
 # not be disabled)
 int-import-graph=
 # checks for :
 # * methods without self as first argument
 # * overridden methods signature
 # * access only to existant members via self
 # * attributes not defined in the __init__ method
 # * supported interfaces implementation
 # * unreachable code
 # 
 [CLASSES]
 # List of interface methods to ignore, separated by a comma. This is used for
 # instance to not check methods defines in Zope's Interface base class.
 ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
 # List of method names used to declare (i.e. assign) instance attributes.
 defining-attr-methods=__init__,__new__,setUp
 # checks for similarities and duplicated code. This computation may be
 # memory / CPU intensive, so you should disable it if you experiments some
 # problems.
 # 
 [SIMILARITIES]
 # Minimum lines number of a similarity.
 min-similarity-lines=4
 # Ignore comments when computing similarities.
 ignore-comments=yes
 # Ignore docstrings when computing similarities.
 ignore-docstrings=yes
 # checks for:
 # * warning notes in the code like FIXME, XXX
 # * PEP 263: source code with non ascii character but no encoding declaration
 # 
 [MISCELLANEOUS]
 # List of note tags to take in consideration, separated by a comma.
 notes=FIXME,XXX,TODO
 # checks for :
 # * unauthorized constructions
 # * strict indentation
 # * line length
 # * use of <> instead of !=
 # 
 [FORMAT]
 # Maximum number of characters on a single line.
 max-line-length=80
 # Maximum number of lines in a module
 max-module-lines=1000
 # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
 # tab).
 indent-string='    '
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.4.140'
+__version__   = '0.4.141'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 '''
 Various run time constants.
--- a/src/calibre/devices/init.py
+++ b/src/calibre/devices/init.py
@ -12,7 +12,8 @@ def devices():
    from calibre.devices.cybookg3.driver import CYBOOKG3
    from calibre.devices.kindle.driver import KINDLE
    from calibre.devices.kindle.driver import KINDLE2
-    return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2)
+    from calibre.devices.blackberry.driver import BLACKBERRY
    return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY)
 import time
--- a/src/calibre/devices/blackberry/init.py
+++ b/src/calibre/devices/blackberry/init.py
@ -0,0 +1,6 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@ -0,0 +1,30 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.devices.usbms.driver import USBMS
 class BLACKBERRY(USBMS):
    # Ordered list of supported formats
    FORMATS     = ['mobi', 'prc']
    VENDOR_ID   = [0x0fca]
    PRODUCT_ID  = [0x8004]
    BCD         = [0x0200]
    VENDOR_NAME = 'RIM'
    WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
    #WINDOWS_CARD_MEM = 'CARD_STORAGE'
    #OSX_MAIN_MEM = 'Kindle Internal Storage Media'
    #OSX_CARD_MEM = 'Kindle Card Storage Media'
    MAIN_MEMORY_VOLUME_LABEL  = 'Blackberry Main Memory'
    #STORAGE_CARD_VOLUME_LABEL = 'Kindle Storage Card'
    EBOOK_DIR_MAIN = 'ebooks'
    #EBOOK_DIR_CARD = "documents"
    SUPPORTS_SUB_DIRS = True
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -1,3 +1,4 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john at nachtimwald.com>'
 '''
@ -165,8 +166,8 @@ class USBMS(Device):
    def get_file(self, path, outfile, end_session=True):
        path = self.munge_path(path)
-        src = open(path, 'rb')
+        with open(path, 'rb') as src:
-        shutil.copyfileobj(src, outfile, 10*1024*1024)
+            shutil.copyfileobj(src, outfile, 10*1024*1024)
    def put_file(self, infile, path, replace_file=False, end_session=True):
        path = self.munge_path(path)
--- a/src/calibre/ebooks/chardet/init.py
+++ b/src/calibre/ebooks/chardet/init.py
@ -30,12 +30,50 @@ def detect(aBuf):
 # Added by Kovid
 ENCODING_PATS = [
-                 re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE),
+                 re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>', 
-                 re.compile(r'<meta.*?content=[\'"].*?charset=([^\s\'"]+).*?[\'"].*?>', re.IGNORECASE)
+                            re.IGNORECASE),
                 re.compile(r'<meta.*?content=[\'"].*?charset=([^\s\'"]+).*?[\'"].*?>', 
                            re.IGNORECASE)
                 ]
 ENTITY_PATTERN = re.compile(r'&(\S+?);')
-def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, resolve_entities=False):
+def strip_encoding_declarations(raw):
    for pat in ENCODING_PATS:
        raw = pat.sub('', raw)
    return raw
 def substitute_entites(raw):
    from calibre import entity_to_unicode
    from functools import partial
    f = partial(entity_to_unicode, exceptions=
                ['amp', 'apos', 'quot', 'lt', 'gt'])
    return ENTITY_PATTERN.sub(f, raw)
 _CHARSET_ALIASES = { "macintosh" : "mac-roman",
                        "x-sjis" : "shift-jis" }
 def force_encoding(raw, verbose):
    from calibre.constants import preferred_encoding
    try:
        chardet = detect(raw)
    except:
        chardet = {'encoding':preferred_encoding, 'confidence':0}
    encoding = chardet['encoding']
    if chardet['confidence'] < 1 and verbose:
        print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
    if not encoding:
        encoding = preferred_encoding
    encoding = encoding.lower()
    if _CHARSET_ALIASES.has_key(encoding):
        encoding = _CHARSET_ALIASES[encoding]
    if encoding == 'ascii':
        encoding = 'utf-8'
    return encoding
 def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, 
                   resolve_entities=False):
    '''
    Force conversion of byte string to unicode. Tries to look for XML/HTML 
    encoding declaration first, if not found uses the chardet library and
@ -45,44 +83,27 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, resolve_entiti
    encoding = None
    if not raw:
        return u'', encoding    
-    if isinstance(raw, unicode):
+    if not isinstance(raw, unicode):
-        return raw, encoding
+        if raw.startswith('\xff\xfe'):
-    for pat in ENCODING_PATS:
+            raw, encoding = raw.decode('utf-16-le')[1:], 'utf-16-le'
-        match = pat.search(raw)
+        elif raw.startswith('\xfe\xff'):
-        if match:
+            raw, encoding = raw.decode('utf-16-be')[1:], 'utf-16-be'
-            encoding = match.group(1)
+    if not isinstance(raw, unicode):
            break
    if strip_encoding_pats:
        for pat in ENCODING_PATS:
-            raw = pat.sub('', raw)
+            match = pat.search(raw)
-    if encoding is None:
+            if match:
                encoding = match.group(1)
                break
        if encoding is None:
            encoding = force_encoding(raw, verbose)
        try:
-            chardet = detect(raw)
+            raw = raw.decode(encoding, 'replace')
-        except:
+        except LookupError:
-            chardet = {'encoding':'utf-8', 'confidence':0}
+            raw = raw.decode('utf-8', 'replace')
        encoding = chardet['encoding']
        if chardet['confidence'] < 1 and verbose:
            print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
    CHARSET_ALIASES = { "macintosh" : "mac-roman",
                        "x-sjis" : "shift-jis" }
    if not encoding:
        from calibre import preferred_encoding
        encoding = preferred_encoding
    if encoding:
        encoding = encoding.lower()
    if CHARSET_ALIASES.has_key(encoding):
        encoding = CHARSET_ALIASES[encoding]
    if encoding == 'ascii':
        encoding = 'utf-8'
-    try:
+    if strip_encoding_pats:
-        raw = raw.decode(encoding, 'replace')
+        raw = strip_encoding_declarations(raw)
    except LookupError:
        raw = raw.decode('utf-8', 'replace')
    if resolve_entities:
-        from calibre import entity_to_unicode
+        raw = substitute_entites(raw)
-        from functools import partial
+        
        f = partial(entity_to_unicode, exceptions=['amp', 'apos', 'quot', 'lt', 'gt'])
        raw = ENTITY_PATTERN.sub(f, raw)
    return raw, encoding 
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -273,7 +273,12 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
                hf.links.remove(link)
        next_level = list(nl)
-    return flat, list(depth_first(flat[0], flat))
+    orec = sys.getrecursionlimit()
    sys.setrecursionlimit(500000)
    try:
        return flat, list(depth_first(flat[0], flat))
    finally:
        sys.setrecursionlimit(orec)
 def opf_traverse(opf_reader, verbose=0, encoding=None):
--- a/src/calibre/ebooks/lrf/lrs/convert_from.py
+++ b/src/calibre/ebooks/lrf/lrs/convert_from.py
@ -73,7 +73,9 @@ class LrsParser(object):
            return CharButton(self.parsed_objects[tag.get('refobj')], None)
        if tag.name == 'plot':
            return Plot(self.parsed_objects[tag.get('refobj')], **self.attrs_to_dict(tag, ['refobj']))
-        return map[tag.name](**self.attrs_to_dict(tag))
+        settings = self.attrs_to_dict(tag)
        settings.pop('spanstyle', '')
        return map[tag.name](**settings)
    def process_text_element(self, tag, elem):
        for item in tag.contents:
@ -121,7 +123,8 @@ class LrsParser(object):
        for tag in self.soup.findAll('page'):
            page = self.parsed_objects[tag.get('objid')]
            self.book.append(page)
-            for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock', 'ruledline']):
+            for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock', 
                                          'ruledline', 'simpletextblock']):
                if block_tag.name == 'ruledline':
                    page.append(RuledLine(**self.attrs_to_dict(block_tag)))
                else:
@ -134,7 +137,7 @@ class LrsParser(object):
            self.book.append(jb)
            self.parsed_objects[tag.get('objid')] = jb
-        for tag in self.soup.findAll('textblock'):
+        for tag in self.soup.findAll(['textblock', 'simpletextblock']):
            self.process_text_block(tag)
        toc = self.soup.find('toc')
        if toc:
@ -145,8 +148,10 @@ class LrsParser(object):
    def third_pass(self):
        map = {
-               'page'       : (Page, ['pagestyle', 'evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid']),
+               'page'       : (Page, ['pagestyle', 'evenfooterid', 
                                      'oddfooterid', 'evenheaderid', 'oddheaderid']),
               'textblock'  : (TextBlock, ['textstyle', 'blockstyle']),
               'simpletextblock'  : (TextBlock, ['textstyle', 'blockstyle']),
               'imageblock' : (ImageBlock, ['blockstyle', 'refstream']),
               'image'      : (Image, ['refstream']),
               'canvas'     : (Canvas, ['canvaswidth', 'canvasheight']),
@ -160,8 +165,12 @@ class LrsParser(object):
            if tag.name in map.keys():
                settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel'])
                for a in ('pagestyle', 'blockstyle', 'textstyle'):
-                    if tag.has_key(a):
+                    label = tag.get(a, False)
-                        settings[attrmap[a]] = self.parsed_objects[tag.get(a)]
+                    if label:
                        _obj = self.parsed_objects[label] if \
                            self.parsed_objects.has_key(label) else \
                            self._style_labels[label]
                        settings[attrmap[a]] = _obj
                for a in ('evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid'):
                    if tag.has_key(a):
                        settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
@ -182,6 +191,7 @@ class LrsParser(object):
               'imagestream': (ImageStream, ['imagestreamlabel']),
               'registfont' : (Font, [])
               }
        self._style_labels = {}
        for id, tag in self.objects.items():
            if tag.name in map.keys():
                settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid'])
@ -189,7 +199,11 @@ class LrsParser(object):
                    for a in ('evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid'):
                        if tag.has_key(a):
                            settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
                settings.pop('autoindex', '')
                self.parsed_objects[id] = map[tag.name][0](**settings)
                x = tag.get('stylelabel', False)
                if x:
                    self._style_labels[x] = self.parsed_objects[id]
                if tag.name == 'registfont':
                    self.book.append(self.parsed_objects[id])
@ -220,6 +234,8 @@ class LrsParser(object):
        def me(base, tagname):
            tag = base.find(tagname.lower())
            if tag is None:
                return ('', '', '')
            tag = (self.tag_to_string(tag), tag.get('reading') if tag.has_key('reading') else '')
            return tag
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -80,6 +80,7 @@ class MetadataUpdater(object):
        type = self.type = data[60:68]
        self.nrecs, = unpack('>H', data[76:78])
        record0 = self.record0 = self.record(0)
        self.encryption_type, = unpack('>H', record0[12:14])
        codepage, = unpack('>I', record0[28:32])
        self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
        image_base, = unpack('>I', record0[108:112])
@ -133,6 +134,8 @@ class MetadataUpdater(object):
        if self.thumbnail_record is not None:
            recs.append((202, pack('>I', self.thumbnail_rindex)))
        exth = StringIO()
        if getattr(self, 'encryption_type', -1) != 0:
            raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
        for code, data in recs:
            exth.write(pack('>II', code, len(data) + 8))
            exth.write(data)
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Read data from .mobi files
 '''
-import sys, struct, os, cStringIO, re
+import sys, struct, os, cStringIO, re, functools
 try:
    from PIL import Image as PILImage
@ -186,7 +186,9 @@ class MobiReader(object):
        self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
        for pat in ENCODING_PATS:
            self.processed_html = pat.sub('', self.processed_html)
-        self.processed_html = re.sub(r'&(\S+?);', entity_to_unicode,
+        e2u = functools.partial(entity_to_unicode, 
                                exceptions=['lt', 'gt', 'amp', 'apos', 'quot'])
        self.processed_html = re.sub(r'&(\S+?);', e2u,
                                     self.processed_html)
        self.extract_images(processed_records, output_dir)
        self.replace_page_breaks()
@ -235,7 +237,7 @@ class MobiReader(object):
            if self.verbose:
                print 'Creating OPF...'
            ncx = cStringIO.StringIO()
-            opf = self.create_opf(htmlfile, guide)
+            opf = self.create_opf(htmlfile, guide, root)
            opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)
            ncx = ncx.getvalue()
            if ncx:
@ -328,7 +330,7 @@ class MobiReader(object):
                except ValueError:
                    pass
-    def create_opf(self, htmlfile, guide=None):
+    def create_opf(self, htmlfile, guide=None, root=None):
        mi = self.book_header.exth.mi
        opf = OPFCreator(os.path.dirname(htmlfile), mi)
        if hasattr(self.book_header.exth, 'cover_offset'):
@ -347,21 +349,27 @@ class MobiReader(object):
                if ref.type.lower() == 'toc':
                    toc = ref.href()
        if toc:
-            index = self.processed_html.find('<a id="%s" name="%s"'%(toc.partition('#')[-1], toc.partition('#')[-1]))
+            elems = root.xpath('//*[@id="%s"]'%toc.partition('#')[-1])
            tocobj = None
            ent_pat = re.compile(r'&(\S+?);')
-            if index > -1:
+            if elems:
                raw = '<html><body>'+self.processed_html[index:]
                root = html.fromstring(raw)
                tocobj = TOC()
-                for a in root.xpath('//a[@href]'):
+                reached = False
-                    try:
+                for x in root.iter():
-                        text = u' '.join([t.strip() for t in a.xpath('descendant::text()')])
+                    if x == elems[-1]:
-                    except:
+                        reached = True
-                        text = ''
+                        continue
-                    text = ent_pat.sub(entity_to_unicode, text)
+                    if reached and x.tag == 'a':
-                    if a.get('href', '').startswith('#'):
+                        href = x.get('href', '')
-                        tocobj.add_item(toc.partition('#')[0], a.attrib['href'][1:], text)
+                        if href:
                            try:
                                text = u' '.join([t.strip() for t in \
                                                x.xpath('descendant::text()')])
                            except:
                                text = ''
                            text = ent_pat.sub(entity_to_unicode, text)
                            tocobj.add_item(toc.partition('#')[0], href[1:], 
                                            text)
            if tocobj is not None:
                opf.set_toc(tocobj)
--- a/src/calibre/gui2/dialogs/config.py
+++ b/src/calibre/gui2/dialogs/config.py
@ -180,12 +180,12 @@ class ConfigDialog(QDialog, Ui_Dialog):
        self.toolbar_button_size.setCurrentIndex(0 if icons == self.ICON_SIZES[0] else 1 if icons == self.ICON_SIZES[1] else 2)
        self.show_toolbar_text.setChecked(config['show_text_in_toolbar'])
-        book_exts = sorted(BOOK_EXTENSIONS)
+        self.book_exts = sorted(BOOK_EXTENSIONS)
-        for ext in book_exts:
+        for ext in self.book_exts:
            self.single_format.addItem(ext.upper(), QVariant(ext))
        single_format = config['save_to_disk_single_format']
-        self.single_format.setCurrentIndex(book_exts.index(single_format))
+        self.single_format.setCurrentIndex(self.book_exts.index(single_format))
        self.cover_browse.setValue(config['cover_flow_queue_length'])
        self.systray_notifications.setChecked(not config['disable_tray_notification'])
        from calibre.translations.compiled import translations
@ -204,7 +204,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
        self.pdf_metadata.setChecked(prefs['read_file_metadata'])
        added_html = False
-        for ext in book_exts:
+        for ext in self.book_exts:
            ext = ext.lower()
            ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
            if ext == 'lrf' or is_supported('book.'+ext):
@ -402,7 +402,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
        p = {0:'normal', 1:'high', 2:'low'}[self.priority.currentIndex()]
        prefs['worker_process_priority'] = p
        prefs['read_file_metadata'] = bool(self.pdf_metadata.isChecked())
-        config['save_to_disk_single_format'] = BOOK_EXTENSIONS[self.single_format.currentIndex()]
+        config['save_to_disk_single_format'] = self.book_exts[self.single_format.currentIndex()]
        config['cover_flow_queue_length'] = self.cover_browse.value()
        prefs['language'] = str(self.language.itemData(self.language.currentIndex()).toString())
        config['systray_icon'] = self.systray_icon.checkState() == Qt.Checked
--- a/src/calibre/gui2/dialogs/epub.py
+++ b/src/calibre/gui2/dialogs/epub.py
@ -126,7 +126,8 @@ class Config(ResizableDialog, Ui_Dialog):
                pix = QPixmap()
                pix.loadFromData(cover)
                if pix.isNull():
-                    d = error_dialog(self.window, _file + _(" is not a valid picture"))
+                    d = error_dialog(self.window, _('Error reading file'),
                                      _file + _(" is not a valid picture"))
                    d.exec_()
                else:
                    self.cover_path.setText(_file)
--- a/src/calibre/gui2/dialogs/lrf_single.py
+++ b/src/calibre/gui2/dialogs/lrf_single.py
@ -255,7 +255,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
        self.gui_headerformat.setDisabled(True)
        self.gui_header_separation.setDisabled(True)
        self.gui_use_metadata_cover.setCheckState(Qt.Checked)
-        self.preprocess.addItem('No preprocessing')
+        self.preprocess.addItem(_('No preprocessing'))
        for opt in self.PREPROCESS_OPTIONS:
            self.preprocess.addItem(opt.get_opt_string()[2:])
        ph = _('Preprocess the file before converting to LRF. This is useful if you know that the file is from a specific source. Known sources:')
@ -338,7 +338,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
                    cmd.append(opt)
        text = qstring_to_unicode(self.preprocess.currentText())
-        if text != 'No preprocessing':
+        if text != _('No preprocessing'):
            cmd.append(u'--'+text)
        cmd.extend([u'--profile',  qstring_to_unicode(self.gui_profile.currentText())])
--- a/src/calibre/gui2/dialogs/mobi.py
+++ b/src/calibre/gui2/dialogs/mobi.py
@ -19,5 +19,4 @@ class Config(_Config):
        self.opt_dont_split_on_page_breaks.setVisible(False)
        self.opt_preserve_tag_structure.setVisible(False)
        self.opt_linearize_tables.setVisible(False)
        self.opt_no_justification.setVisible(False)
        self.page_map_box.setVisible(False)
--- a/src/calibre/gui2/images/news/soldiers.png
+++ b/src/calibre/gui2/images/news/soldiers.png
--- a/src/calibre/gui2/images/news/theonion.png
+++ b/src/calibre/gui2/images/news/theonion.png
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -194,7 +194,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
    def __init__(self, pathtoebook=None):
        MainWindow.__init__(self, None)
        self.setupUi(self)
        self.iterator          = None
        self.current_page      = None
        self.pending_search    = None
@ -619,7 +618,7 @@ View an ebook.
 def main(args=sys.argv):
    parser = option_parser()
    args = parser.parse_args(args)[-1]
-    pid = os.fork() if islinux else -1
+    pid = os.fork() if False and islinux else -1
    if pid <= 0:
        app = Application(args)
        app.setWindowIcon(QIcon(':/images/viewer.svg'))
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -19,7 +19,7 @@ except:
    send_message = None
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.library.database2 import LibraryDatabase2
-from calibre.ebooks.metadata.opf import OPFCreator, OPFReader
+from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
 from calibre.utils.genshi.template import MarkupTemplate
 FIELDS = set(['title', 'authors', 'author_sort', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'formats', 'isbn', 'cover'])
@ -453,7 +453,7 @@ id is an id number from the list command.
    return 0
 def do_set_metadata(db, id, stream):
-    mi = OPFReader(stream)
+    mi = OPF(stream)
    db.set_metadata(id, mi)
    do_show_metadata(db, id, False)
    if send_message is not None:
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -435,17 +435,16 @@ def post_install():
    parser = option_parser()
    opts = parser.parse_args()[0]
    if not opts.no_root and os.geteuid() != 0:
        print >> sys.stderr, 'You must be root to run this command.'
        sys.exit(1)
    global use_destdir
    use_destdir = opts.destdir
    manifest = []
    manifest += setup_udev_rules(opts.group_file, not opts.dont_reload, opts.fatal_errors)
    manifest += setup_completion(opts.fatal_errors)
    setup_desktop_integration(opts.fatal_errors)
-    manifest += install_man_pages(opts.fatal_errors)
+    if opts.no_root or os.geteuid() == 0:
        manifest += setup_udev_rules(opts.group_file, not opts.dont_reload, opts.fatal_errors)
        manifest += setup_completion(opts.fatal_errors)
        manifest += install_man_pages(opts.fatal_errors)
    else:
        print "Skipping udev, completion, and man-page install for non-root user."
    try:
        from PyQt4 import Qt
--- a/src/calibre/trac/plugins/download.py
+++ b/src/calibre/trac/plugins/download.py
@ -15,7 +15,7 @@ DEPENDENCIES = [
            ('ImageMagick', '6.3.5', 'imagemagick', 'imagemagick', 'ImageMagick'),
            ('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'),
            ('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'),
-            ('lxml', '2.0.5', 'lxml', 'python-lxml', 'python-lxml'),
+            ('lxml', '2.1.5', 'lxml', 'python-lxml', 'python-lxml'),
            ('python-dateutil', '1.4.1', 'python-dateutil', 'python-dateutil', 'python-dateutil'),
            ('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
            ('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'),
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -32,6 +32,7 @@ recipe_modules = ['recipe_' + r for r in (
           'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
           'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
           'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
           'lamujerdemivida', 'soldiers', 'theonion',
          )]
 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py
+++ b/src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py
@ -0,0 +1,76 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lamujerdemivida.com.ar
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class LaMujerDeMiVida(BasicNewsRecipe):
    title                 = 'La Mujer de mi Vida'
    __author__            = 'Darko Miletic'
    description           = 'Cultura de otra manera'    
    oldest_article        = 90
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    publisher             = 'La Mujer de mi Vida'
    category              = 'literatura, critica, arte, ensayos'    
    language              = _('Spanish')
    INDEX                 = 'http://www.lamujerdemivida.com.ar/'
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
    keep_only_tags = [dict(name='table', attrs={'width':'570'})]
    feeds = [(u'Articulos', u'http://www.lamujerdemivida.com.ar/index.php')]
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = 'es-AR'
        soup.html['lang']     = 'es-AR'
        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    def get_cover_url(self):
        cover_url = None
        soup = self.index_to_soup(self.INDEX)
        cover_item = soup.find('img',attrs={'alt':'Lamujerdemivida.'})
        if cover_item:
           cover_url = self.INDEX + cover_item['src']
        return cover_url
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            for item in soup.findAll('td', attrs={'width':'390'}):
                atag = item.find('a',href=True)
                if atag:
                    url         = atag['href']
                    title       = self.tag_to_string(atag)
                    date        = strftime(self.timefmt)
                    articles.append({
                                      'title'      :title
                                     ,'date'       :date
                                     ,'url'        :url
                                     ,'description':''
                                    })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
--- a/src/calibre/web/feeds/recipes/recipe_soldiers.py
+++ b/src/calibre/web/feeds/recipes/recipe_soldiers.py
@ -0,0 +1,57 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.army.mil/soldiers/
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Soldiers(BasicNewsRecipe):
    title                  = 'Soldiers'
    __author__             = 'Darko Miletic'
    description            = 'The Official U.S. Army Magazine'
    oldest_article         = 30
    max_articles_per_feed  = 100
    no_stylesheets         = True
    use_embedded_content   = False
    remove_javascript      = True 
    simultaneous_downloads = 1
    delay                  = 4
    max_connections        = 1    
    encoding               = 'utf-8'
    publisher              = 'U.S. Army'
    category               = 'news, politics, war, weapons'    
    language               = _('English')
    INDEX                  = 'http://www.army.mil/soldiers/'
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='div', attrs={'id':'rightCol'})]
    remove_tags = [
                     dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
                    ,dict(name=['object','link'])
                  ]
    feeds = [(u'Frontpage', u'http://www.army.mil/rss/feeds/soldiersfrontpage.xml' )]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    def get_cover_url(self):
        cover_url = None
        soup = self.index_to_soup(self.INDEX)
        cover_item = soup.find('img',attrs={'alt':'Current Magazine Cover'})
        if cover_item:
           cover_url = cover_item['src']
        return cover_url
--- a/src/calibre/web/feeds/recipes/recipe_theonion.py
+++ b/src/calibre/web/feeds/recipes/recipe_theonion.py
@ -0,0 +1,45 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 theonion.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class TheOnion(BasicNewsRecipe):
    title                 = 'The Onion'
    __author__            = 'Darko Miletic'
    description           = "America's finest news source"    
    oldest_article        = 2    
    max_articles_per_feed = 100
    publisher             = u'Onion, Inc.'
    category              = u'humor, news, USA'    
    language              = _('English')
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    remove_javascript     = True
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    html2lrf_options = [
                          '--comment'       , description
                        , '--category'      , category
                        , '--publisher'     , publisher
                        ]
    keep_only_tags = [dict(name='div', attrs={'id':'main'})]
    remove_tags = [
                     dict(name=['object','link','iframe','base'])
                    ,dict(name='div', attrs={'class':['toolbar_side','graphical_feature','toolbar_bottom']})
                    ,dict(name='div', attrs={'id':['recent_slider','sidebar','pagination','related_media']})
                  ]
    feeds = [
              (u'Daily'  , u'http://feeds.theonion.com/theonion/daily' )
             ,(u'Sports' , u'http://feeds.theonion.com/theonion/sports' )
            ]