Merge upstream changes

2025-07-07 10:14:46 -04:00 · 2009-01-16 20:19:39 -05:00 · 2009-01-16 20:19:39 -05:00 · e73639e5d3
commit e73639e5d3
parent 5225b1f0b0 532820ad8f
16 changed files with 3826 additions and 3652 deletions
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.4.127'
+__version__   = '0.4.128'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 '''
 Various run time constants.
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@ -28,6 +28,7 @@ class CYBOOKG3(USBMS):
    STORAGE_CARD_VOLUME_LABEL = 'Cybook Gen 3 Storage Card'
    
    EBOOK_DIR_MAIN = "eBooks"
+    SUPPORTS_SUB_DIRS = True

    def delete_books(self, paths, end_session=True):
        for path in paths:
@ -46,3 +47,8 @@ class CYBOOKG3(USBMS):
                    for filen in fnmatch.filter(files, filename + "*.t2b"):
                        os.unlink(os.path.join(p, filen))

+                try:
+                    os.removedirs(os.path.dirname(path))
+                except:
+                    pass
+
--- a/src/calibre/devices/scanner.py
+++ b/src/calibre/devices/scanner.py
@ -60,8 +60,9 @@ class DeviceScanner(object):
    def is_device_connected(self, device):
        if iswindows:
            vid, pid = 'vid_%4.4x'%device.VENDOR_ID, 'pid_%4.4x'%device.PRODUCT_ID
+            vidd, pidd = 'vid_%i'%device.VENDOR_ID, 'pid_%i'%device.PRODUCT_ID
            for device_id in self.devices:
-                if vid in device_id and pid in device_id:
+                if (vid in device_id or vidd in device_id) and (pid in device_id or pidd in device_id):
                    if self.test_bcd_windows(device_id, getattr(device, 'BCD', None)):
                        if device.can_handle(device_id):
                            return True
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -15,9 +15,10 @@ from calibre.devices.errors import FreeSpaceError
 from calibre.devices.mime import MIME_MAP

 class USBMS(Device):
+    FORMATS = []
    EBOOK_DIR_MAIN = ''
    EBOOK_DIR_CARD = ''
-    FORMATS = []
+    SUPPORTS_SUB_DIRS = False

    def __init__(self, key='-1', log_packets=False, report_progress=None):
        pass
@ -58,7 +59,15 @@ class USBMS(Device):
        else:
            path = os.path.join(self._card_prefix, self.EBOOK_DIR_CARD)

-        sizes = map(os.path.getsize, files)
+        def get_size(obj):
+            if hasattr(obj, 'seek'):
+                obj.seek(0, os.SEEK_END)
+                size = obj.tell()
+                obj.seek(0)
+                return size
+            return os.path.getsize(obj)
+
+        sizes = map(get_size, files)
        size = sum(sizes)

        if on_card and size > self.free_space()[2] - 1024*1024: 
@ -68,11 +77,36 @@ class USBMS(Device):

        paths = []
        names = iter(names)
+        metadata = iter(metadata)
        
        for infile in files:
-            filepath = os.path.join(path, names.next())
+            newpath = path
+            
+            if self.SUPPORTS_SUB_DIRS:
+                mdata = metadata.next()
+                
+                if 'tags' in mdata.keys():
+                    for tag in mdata['tags']:
+                        if tag.startswith('/'):
+                            newpath += tag
+                            newpath = os.path.normpath(newpath)
+                            break
+
+            if not os.path.exists(newpath):
+                os.makedirs(newpath)
+            
+            filepath = os.path.join(newpath, names.next())                
            paths.append(filepath)
            
+            if hasattr(infile, 'read'):
+                infile.seek(0)
+                
+                dest = open(filepath, 'wb')
+                shutil.copyfileobj(infile, dest, 10*1024*1024)
+
+                dest.flush()                
+                dest.close()
+            else:
                shutil.copy2(infile, filepath)
    
        return zip(paths, cycle([on_card]))
@ -91,6 +125,10 @@ class USBMS(Device):
            if os.path.exists(path):
                # Delete the ebook
                os.unlink(path)
+                try:
+                    os.removedirs(os.path.dirname(path))
+                except:
+                    pass
    
    @classmethod
    def remove_books_from_metadata(cls, paths, booklists):
@ -99,7 +137,6 @@ class USBMS(Device):
                for book in bl:
                    if path.endswith(book.path):
                        bl.remove(book)
-                        break
        
    def sync_booklists(self, booklists, end_session=True):
        # There is no meta data on the device to update. The device is treated
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -77,6 +77,8 @@ def check_links(opf_path, pretty_print):
                html_files.append(os.path.abspath(content(f)))
        
        for path in html_files:
+            if not os.access(path, os.R_OK):
+                continue
            base = os.path.dirname(path)
            root = html.fromstring(open(content(path), 'rb').read(), parser=parser)
            for element, attribute, link, pos in list(root.iterlinks()):
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -249,7 +249,7 @@ class MetaInformation(object):
        ans = u''
        ans += u'Title    : ' + unicode(self.title) + u'\n'
        if self.authors:
-            ans += u'Author   : ' + (', '.join(self.authors) if self.authors is not None else u'None')
+            ans += u'Author   : ' + (' & '.join(self.authors) if self.authors is not None else _('Unknown'))
            ans += ((' [' + self.author_sort + ']') if self.author_sort else '') + u'\n'
        if self.publisher:
            ans += u'Publisher: '+ unicode(self.publisher) + u'\n'
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -33,7 +33,7 @@ class EXTHHeader(object):
        self.length, self.num_items = struct.unpack('>LL', raw[4:12])
        raw = raw[12:]
        pos = 0
-        self.mi = MetaInformation('Unknown', ['Unknown'])
+        self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
        self.has_fake_cover = True
        
        for i in range(self.num_items):
@ -63,7 +63,9 @@ class EXTHHeader(object):
                
    def process_metadata(self, id, content, codec):
        if id == 100:
-            self.mi.authors   = [content.decode(codec, 'ignore').strip()]
+            if self.mi.authors == [_('Unknown')]:
+                self.mi.authors = []
+            self.mi.authors.append(content.decode(codec, 'ignore').strip())
        elif id == 101:
            self.mi.publisher = content.decode(codec, 'ignore').strip()
        elif id == 103:
--- a/src/calibre/gui2/dialogs/job_view.ui
+++ b/src/calibre/gui2/dialogs/job_view.ui
@ -28,9 +28,6 @@
     <property name="readOnly" >
      <bool>true</bool>
     </property>
-     <property name="maximumBlockCount" >
-      <number>400</number>
-     </property>
    </widget>
   </item>
  </layout>
--- a/src/calibre/gui2/images/news/tomshardware_de.png
+++ b/src/calibre/gui2/images/news/tomshardware_de.png
--- a/src/calibre/library/static/gui.js
+++ b/src/calibre/library/static/gui.js
@ -7,8 +7,8 @@ var column_titles = {
    'rating'   : 'Rating',
    'date'     : 'Date',
    'tags'     : 'Tags',
-    'series'   : 'Series',
-}
+    'series'   : 'Series'
+};

 String.prototype.format = function() {
    var pattern = /\{\d+\}/g;
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -102,7 +102,7 @@ Device Integration

 What devices does |app| support?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-At the moment |app| has full support for the SONY PRS 500/505/700 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
+At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.

 I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/src/calibre/parallel.py
+++ b/src/calibre/parallel.py
@ -286,7 +286,7 @@ def write(socket, msg, timeout=5):
 def read(socket, timeout=5):
    '''
    Read a message from `socket`. The message must have been sent with the :function:`write`
-    function. Raises a `RuntimeError` if the message is corrpted. Can return an
+    function. Raises a `RuntimeError` if the message is corrupted. Can return an
    empty string.
    '''
    if isworker:
@ -299,7 +299,12 @@ def read(socket, timeout=5):
            if not msg:
                break
            if length is None:
+                try:
                    length, msg = int(msg[:12]), msg[12:]
+                except ValueError:
+                    if DEBUG:
+                        print >>sys.__stdout__, 'read(%s):'%('worker' if isworker else 'overseer'), 'no length in', msg
+                    return ''
            buf.write(msg)
            if buf.tell() >= length:
                break
--- a/src/calibre/utils/pyparsing.py
+++ b/src/calibre/utils/pyparsing.py
@ -58,17 +58,17 @@ The pyparsing module handles some of the problems that are typically vexing when
 - embedded comments
 """

-__version__ = "1.5.0"
-__versionTime__ = "28 May 2008 10:05"
+__version__ = "1.5.1"
+__versionTime__ = "2 October 2008 00:44"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"

 import string
 from weakref import ref as wkref
-import copy,sys
+import copy
+import sys
 import warnings
 import re
 import sre_constants
-import xml.sax.saxutils
 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )

 __all__ = [
@ -88,7 +88,7 @@ __all__ = [
 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 
 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
-'indentedBlock',
+'indentedBlock', 'originalTextFor',
 ]


@ -130,11 +130,22 @@ if not _PY3K:
            # ...
 else:
    _ustr = str
+    unichr = chr

 def _str2dict(strg):
    return dict( [(c,0) for c in strg] )
    #~ return set( [c for c in strg] )

+def _xml_escape(data):
+    """Escape &, <, >, ", ', etc. in a string of data."""
+
+    # ampersand must be replaced first
+    from_symbols = '&><"\''
+    to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
+    for from_,to_ in zip(from_symbols, to_symbols):
+        data = data.replace(from_, to_)
+    return data
+
 class _Constants(object):
    pass

@ -145,7 +156,7 @@ else:
 nums       = string.digits
 hexnums    = nums + "ABCDEFabcdef"
 alphanums  = alphas + nums
-_bslash = "\\"
+_bslash = chr(92)
 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )

 class ParseBaseException(Exception):
@ -193,6 +204,9 @@ class ParseBaseException(Exception):
            line_str = "".join( [line_str[:line_column],
                                markerString, line_str[line_column:]])
        return line_str.strip()
+    def __dir__(self):
+        return "loc msg pstr parserElement lineno col line " \
+               "markInputLine __str__ __repr__".split()

 class ParseException(ParseBaseException):
    """exception thrown when parse expressions don't match class;
@ -213,7 +227,8 @@ class ParseSyntaxException(ParseFatalException):
       ErrorStop indicates that parsing is to stop immediately because
       an unbacktrackable syntax error has been found"""
    def __init__(self, pe):
-        ParseFatalException.__init__(self, pe.pstr, pe.loc, pe.msg, pe.parserElement)
+        super(ParseSyntaxException, self).__init__(
+                                    pe.pstr, pe.loc, pe.msg, pe.parserElement)

 #~ class ReparseException(ParseBaseException):
    #~ """Experimental class - parse actions can raise this exception to cause
@ -243,6 +258,8 @@ class _ParseResultsWithOffset(object):
        return self.tup[i]
    def __repr__(self):
        return repr(self.tup)
+    def setOffset(self,i):
+        self.tup = (self.tup[0],i)

 class ParseResults(object):
    """Structured parse results, to provide multiple means of access to the parsed data:
@ -272,9 +289,6 @@ class ParseResults(object):
                self.__toklist = [toklist]
            self.__tokdict = dict()

-        # this line is related to debugging the asXML bug
-        #~ asList = False
-
        if name:
            if not modal:
                self.__accumNames[name] = 0
@ -286,9 +300,9 @@ class ParseResults(object):
                    toklist = [ toklist ]
                if asList:
                    if isinstance(toklist,ParseResults):
-                        self[name] = _ParseResultsWithOffset(toklist.copy(),-1)
+                        self[name] = _ParseResultsWithOffset(toklist.copy(),0)
                    else:
-                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1)
+                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
                    self[name].__name = name
                else:
                    try:
@ -374,7 +388,7 @@ class ParseResults(object):
        for name in self.__tokdict:
            occurrences = self.__tokdict[name]
            for k, (value, position) in enumerate(occurrences):
-                occurrences[k] = _ParseResultsWithOffset(value, position + (position > j))
+                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))

    def items( self ):
        """Returns all named result keys and values as a list of tuples."""
@ -411,6 +425,7 @@ class ParseResults(object):
                self[k] = v
                if isinstance(v[0],ParseResults):
                    v[0].__parent = wkref(self)
+            
        self.__toklist += other.__toklist
        self.__accumNames.update( other.__accumNames )
        del other
@ -517,7 +532,7 @@ class ParseResults(object):
                        continue
                    else:
                        resTag = "ITEM"
-                xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
+                xmlBodyText = _xml_escape(_ustr(res))
                out += [ nl, nextLevelIndent, "<", resTag, ">",
                                                xmlBodyText,
                                                "</", resTag, ">" ]
@ -594,6 +609,8 @@ class ParseResults(object):
        else:
            self.__parent = None

+    def __dir__(self):
+        return dir(super(ParseResults,self)) + self.keys()

 def col (loc,strg):
    """Returns current column within a string, counting newlines as line separators.
@ -715,7 +732,7 @@ class ParserElement(object):
            def breaker(instring, loc, doActions=True, callPreParse=True):
                import pdb
                pdb.set_trace()
-                _parseMethod( instring, loc, doActions, callPreParse )
+                return _parseMethod( instring, loc, doActions, callPreParse )
            breaker._originalParseMethod = _parseMethod
            self._parse = breaker
        else:
@ -1047,6 +1064,7 @@ class ParserElement(object):
            instring = instring.expandtabs()
        loc, tokens = self._parse( instring, 0 )
        if parseAll:
+            loc = self.preParse( instring, loc )
            StringEnd()._parse( instring, loc )
        return tokens

@ -1158,11 +1176,7 @@ class ParserElement(object):
        if isinstance(other,int):
            minElements, optElements = other,0
        elif isinstance(other,tuple):
-            if len(other)==0:
-                other = (None,None)
-            elif len(other)==1:
-                other = (other[0],None)
-            if len(other)==2:
+            other = (other + (None, None))[:2]
            if other[0] is None:
                other = (0, other[1])
            if isinstance(other[0],int) and other[1] is None:
@ -1177,8 +1191,6 @@ class ParserElement(object):
                optElements -= minElements
            else:
                raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
-            else:
-                raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
        else:
            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))

@ -1360,7 +1372,7 @@ class ParserElement(object):
        """Check defined expressions for valid structure, check for infinite recursive definitions."""
        self.checkRecursion( [] )

-    def parseFile( self, file_or_filename ):
+    def parseFile( self, file_or_filename, parseAll=False ):
        """Execute the parse expression on the given file or filename.
           If a filename is specified (instead of a file object),
           the entire file is opened, read, and closed before parsing.
@ -1371,7 +1383,7 @@ class ParserElement(object):
            f = open(file_or_filename, "rb")
            file_contents = f.read()
            f.close()
-        return self.parseString(file_contents)
+        return self.parseString(file_contents, parseAll)

    def getException(self):
        return ParseException("",0,self.errmsg,self)
@ -1393,12 +1405,18 @@ class ParserElement(object):
        else:
            return super(ParserElement,self)==other

+    def __ne__(self,other):
+        return not (self == other)
+
    def __hash__(self):
        return hash(id(self))

    def __req__(self,other):
        return self == other

+    def __rne__(self,other):
+        return not (self == other)
+

 class Token(ParserElement):
    """Abstract ParserElement subclass, for defining atomic matching patterns."""
@ -1533,7 +1551,6 @@ class Keyword(Token):
        Keyword.DEFAULT_KEYWORD_CHARS = chars
    setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)

-
 class CaselessLiteral(Literal):
    """Token to match a specified string, ignoring case of letters.
       Note: the matched results will always be in the case of the given
@ -2034,7 +2051,7 @@ class LineStart(_PositionToken):
    """Matches if current position is at the beginning of a line within the parse string"""
    def __init__( self ):
        super(LineStart,self).__init__()
-        self.setWhitespaceChars( " \t" )
+        self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
        self.errmsg = "Expected start of line"
        #self.myException.msg = self.errmsg

@ -2059,7 +2076,7 @@ class LineEnd(_PositionToken):
    """Matches if current position is at the end of a line within the parse string"""
    def __init__( self ):
        super(LineEnd,self).__init__()
-        self.setWhitespaceChars( " \t" )
+        self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
        self.errmsg = "Expected end of line"
        #self.myException.msg = self.errmsg

@ -2271,10 +2288,9 @@ class And(ParseExpression):
    """

    class _ErrorStop(Empty):
-        def __new__(cls,*args,**kwargs):
-            return And._ErrorStop.instance
-    _ErrorStop.instance = Empty()
-    _ErrorStop.instance.leaveWhitespace()
+        def __init__(self, *args, **kwargs):
+            super(Empty,self).__init__(*args, **kwargs)
+            self.leaveWhitespace()

    def __init__( self, exprs, savelist = True ):
        super(And,self).__init__(exprs, savelist)
@ -2293,12 +2309,14 @@ class And(ParseExpression):
        loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
        errorStop = False
        for e in self.exprs[1:]:
-            if e is And._ErrorStop.instance:
+            if isinstance(e, And._ErrorStop):
                errorStop = True
                continue
            if errorStop:
                try:
                    loc, exprtokens = e._parse( instring, loc, doActions )
+                except ParseSyntaxException:
+                    raise
                except ParseBaseException, pe:
                    raise ParseSyntaxException(pe)
                except IndexError, ie:
@ -2502,7 +2520,7 @@ class Each(ParseExpression):
            raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )

        # add any unmatched Optionals, in case they have default values defined
-        matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
+        matchOrder += [ e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt ]

        resultlist = []
        for e in matchOrder:
@ -2781,7 +2799,7 @@ class SkipTo(ParseElementEnhance):
       argument is used to define grammars (typically quoted strings and comments) that
       might contain false matches.
    """
-    def __init__( self, other, include=False, ignore=None ):
+    def __init__( self, other, include=False, ignore=None, failOn=None ):
        super( SkipTo, self ).__init__( other )
        if ignore is not None:
            self.expr = self.expr.copy()
@ -2790,6 +2808,10 @@ class SkipTo(ParseElementEnhance):
        self.mayIndexError = False
        self.includeMatch = include
        self.asList = False
+        if failOn is not None and isinstance(failOn, basestring):
+            self.failOn = Literal(failOn)
+        else:
+            self.failOn = failOn
        self.errmsg = "No match found for "+_ustr(self.expr)
        #self.myException = ParseException("",0,self.errmsg,self)

@ -2797,12 +2819,17 @@ class SkipTo(ParseElementEnhance):
        startLoc = loc
        instrlen = len(instring)
        expr = self.expr
+        failParse = False
        while loc <= instrlen:
            try:
+                if self.failOn:
+                    failParse = True
+                    self.failOn.tryParse(instring, loc)
+                    failParse = False
                loc = expr._skipIgnorables( instring, loc )
                expr._parse( instring, loc, doActions=False, callPreParse=False )
-                if self.includeMatch:
                skipText = instring[startLoc:loc]
+                if self.includeMatch:
                    loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
                    if mat:
                        skipRes = ParseResults( skipText )
@ -2811,8 +2838,11 @@ class SkipTo(ParseElementEnhance):
                    else:
                        return loc, [ skipText ]
                else:
-                    return loc, [ instring[startLoc:loc] ]
+                    return loc, [ skipText ]
            except (ParseException,IndexError):
+                if failParse:
+                    raise
+                else:
                    loc += 1
        exc = self.myException
        exc.loc = loc
@ -2872,6 +2902,7 @@ class Forward(ParseElementEnhance):
        if hasattr(self,"name"):
            return self.name

+        self._revertClass = self.__class__
        self.__class__ = _ForwardNoRecurse
        try:
            if self.expr is not None:
@ -2879,8 +2910,8 @@ class Forward(ParseElementEnhance):
            else:
                retString = "None"
        finally:
-            self.__class__ = Forward
-        return "Forward: "+retString
+            self.__class__ = self._revertClass
+        return self.__class__.__name__ + ": " + retString

    def copy(self):
        if self.expr is not None:
@ -3121,7 +3152,7 @@ def matchPreviousExpr(expr):
 def _escapeRegexRangeChars(s):
    #~  escape these chars: ^-]
    for c in r"\^-]":
-        s = s.replace(c,"\\"+c)
+        s = s.replace(c,_bslash+c)
    s = s.replace("\n",r"\n")
    s = s.replace("\t",r"\t")
    return _ustr(s)
@ -3195,6 +3226,33 @@ def dictOf( key, value ):
    """
    return Dict( ZeroOrMore( Group ( key + value ) ) )

+def originalTextFor(expr, asString=True):
+    """Helper to return the original, untokenized text for a given expression.  Useful to
+       restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+       revert separate tokens with intervening whitespace back to the original matching
+       input text. Simpler to use than the parse action keepOriginalText, and does not
+       require the inspect module to chase up the call stack.  By default, returns a 
+       string containing the original parsed text.  
+       
+       If the optional asString argument is passed as False, then the return value is a 
+       ParseResults containing any results names that were originally matched, and a 
+       single token containing the original matched text from the input string.  So if 
+       the expression passed to originalTextFor contains expressions with defined
+       results names, you must set asString to False if you want to preserve those
+       results name values."""
+    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
+    matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
+    if asString:
+        extractText = lambda s,l,t: s[t._original_start:t._original_end]
+    else:
+        def extractText(s,l,t):
+            del t[:]
+            t.insert(0, s[t._original_start:t._original_end])
+            del t["_original_start"]
+            del t["_original_end"]
+    matchExpr.setParseAction(extractText)
+    return matchExpr
+    
 # convenience constants for positional expressions
 empty       = Empty().setName("empty")
 lineStart   = LineStart().setName("lineStart")
@ -3464,12 +3522,24 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
        raise ValueError("opening and closing strings cannot be the same")
    if content is None:
        if isinstance(opener,basestring) and isinstance(closer,basestring):
+            if len(opener) == 1 and len(closer)==1:
                if ignoreExpr is not None:
                    content = (Combine(OneOrMore(~ignoreExpr +
                                    CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
                                ).setParseAction(lambda t:t[0].strip()))
                else:
-                content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip()))
+                    content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
+                                ).setParseAction(lambda t:t[0].strip()))
+            else:
+                if ignoreExpr is not None:
+                    content = (Combine(OneOrMore(~ignoreExpr + 
+                                    ~Literal(opener) + ~Literal(closer) +
+                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+                                ).setParseAction(lambda t:t[0].strip()))
+                else:
+                    content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
+                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+                                ).setParseAction(lambda t:t[0].strip()))
        else:
            raise ValueError("opening and closing arguments must be strings if no content expression is given")
    ret = Forward()
@ -3528,7 +3598,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
    else:
        smExpr = Group( Optional(NL) +
            (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
-    blockStatementExpr.ignore("\\" + LineEnd())
+    blockStatementExpr.ignore(_bslash + LineEnd())
    return smExpr

 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
@ -3536,7 +3606,7 @@ punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
-_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
+_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None

 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -21,7 +21,8 @@ recipe_modules = ['recipe_' + r for r in (
           'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes',
           'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik',
           'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
-           'joelonsoftware', 'telepolis', 'common_dreams', 'nin',
+           'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de',
+           
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_tomshardware.py
+++ b/src/calibre/web/feeds/recipes/recipe_tomshardware.py
@ -6,7 +6,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 tomshardware.com
 '''

-from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class Tomshardware(BasicNewsRecipe):
--- a/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py
+++ b/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py
@ -0,0 +1,54 @@
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+
+'''
+Fetch tomshardware.
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+
+class TomsHardwareDe(BasicNewsRecipe):
+    
+    title = 'Tom\'s Hardware German'
+    description = 'Computer news in german'
+    __author__ = 'Oliver Niesner'
+    use_embedded_content   = False
+    timefmt = ' [%d %b %Y]'
+    max_articles_per_feed = 50
+    no_stylesheets = True
+    encoding = 'utf-8'
+
+    #preprocess_regexps = \
+#	[(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
+#		[
+#		(r'<84>', lambda match: ''),
+#		(r'<93>', lambda match: ''),
+#		]
+#	]
+    
+    remove_tags = [dict(id='outside-advert'),
+		   dict(id='advertRightWhite'),
+		   dict(id='header-advert'),
+		   dict(id='header-banner'),
+		   dict(id='header-menu'),
+		   dict(id='header-top'),
+		   dict(id='header-tools'),
+		   dict(id='nbComment'),
+		   dict(id='internalSidebar'),
+		   dict(id='header-news-infos'),
+		   dict(id='breadcrumbs'),
+		   dict(id=''),
+		   dict(name='div', attrs={'class':'pyjama'}),
+		   dict(name='href', attrs={'class':'comment'}),
+		   dict(name='div', attrs={'class':'greyBoxR clearfix'}),
+		   dict(name='div', attrs={'class':'greyBoxL clearfix'}),
+		   dict(name='div', attrs={'class':'greyBox clearfix'}),
+		   dict(id='')]
+    #remove_tags_before = [dict(id='header-news-title')]
+    remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})]
+    #remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})]
+    
+    feeds =  [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ] 
+