Merge upstream changes

This commit is contained in:
Marshall T. Vandegrift 2009-01-16 20:19:39 -05:00
commit e73639e5d3
16 changed files with 3826 additions and 3652 deletions

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.4.127' __version__ = '0.4.128'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
''' '''
Various run time constants. Various run time constants.

View File

@ -28,6 +28,7 @@ class CYBOOKG3(USBMS):
STORAGE_CARD_VOLUME_LABEL = 'Cybook Gen 3 Storage Card' STORAGE_CARD_VOLUME_LABEL = 'Cybook Gen 3 Storage Card'
EBOOK_DIR_MAIN = "eBooks" EBOOK_DIR_MAIN = "eBooks"
SUPPORTS_SUB_DIRS = True
def delete_books(self, paths, end_session=True): def delete_books(self, paths, end_session=True):
for path in paths: for path in paths:
@ -46,3 +47,8 @@ class CYBOOKG3(USBMS):
for filen in fnmatch.filter(files, filename + "*.t2b"): for filen in fnmatch.filter(files, filename + "*.t2b"):
os.unlink(os.path.join(p, filen)) os.unlink(os.path.join(p, filen))
try:
os.removedirs(os.path.dirname(path))
except:
pass

View File

@ -60,8 +60,9 @@ class DeviceScanner(object):
def is_device_connected(self, device): def is_device_connected(self, device):
if iswindows: if iswindows:
vid, pid = 'vid_%4.4x'%device.VENDOR_ID, 'pid_%4.4x'%device.PRODUCT_ID vid, pid = 'vid_%4.4x'%device.VENDOR_ID, 'pid_%4.4x'%device.PRODUCT_ID
vidd, pidd = 'vid_%i'%device.VENDOR_ID, 'pid_%i'%device.PRODUCT_ID
for device_id in self.devices: for device_id in self.devices:
if vid in device_id and pid in device_id: if (vid in device_id or vidd in device_id) and (pid in device_id or pidd in device_id):
if self.test_bcd_windows(device_id, getattr(device, 'BCD', None)): if self.test_bcd_windows(device_id, getattr(device, 'BCD', None)):
if device.can_handle(device_id): if device.can_handle(device_id):
return True return True

View File

@ -15,9 +15,10 @@ from calibre.devices.errors import FreeSpaceError
from calibre.devices.mime import MIME_MAP from calibre.devices.mime import MIME_MAP
class USBMS(Device): class USBMS(Device):
FORMATS = []
EBOOK_DIR_MAIN = '' EBOOK_DIR_MAIN = ''
EBOOK_DIR_CARD = '' EBOOK_DIR_CARD = ''
FORMATS = [] SUPPORTS_SUB_DIRS = False
def __init__(self, key='-1', log_packets=False, report_progress=None): def __init__(self, key='-1', log_packets=False, report_progress=None):
pass pass
@ -58,7 +59,15 @@ class USBMS(Device):
else: else:
path = os.path.join(self._card_prefix, self.EBOOK_DIR_CARD) path = os.path.join(self._card_prefix, self.EBOOK_DIR_CARD)
sizes = map(os.path.getsize, files) def get_size(obj):
if hasattr(obj, 'seek'):
obj.seek(0, os.SEEK_END)
size = obj.tell()
obj.seek(0)
return size
return os.path.getsize(obj)
sizes = map(get_size, files)
size = sum(sizes) size = sum(sizes)
if on_card and size > self.free_space()[2] - 1024*1024: if on_card and size > self.free_space()[2] - 1024*1024:
@ -68,11 +77,36 @@ class USBMS(Device):
paths = [] paths = []
names = iter(names) names = iter(names)
metadata = iter(metadata)
for infile in files: for infile in files:
filepath = os.path.join(path, names.next()) newpath = path
if self.SUPPORTS_SUB_DIRS:
mdata = metadata.next()
if 'tags' in mdata.keys():
for tag in mdata['tags']:
if tag.startswith('/'):
newpath += tag
newpath = os.path.normpath(newpath)
break
if not os.path.exists(newpath):
os.makedirs(newpath)
filepath = os.path.join(newpath, names.next())
paths.append(filepath) paths.append(filepath)
if hasattr(infile, 'read'):
infile.seek(0)
dest = open(filepath, 'wb')
shutil.copyfileobj(infile, dest, 10*1024*1024)
dest.flush()
dest.close()
else:
shutil.copy2(infile, filepath) shutil.copy2(infile, filepath)
return zip(paths, cycle([on_card])) return zip(paths, cycle([on_card]))
@ -91,6 +125,10 @@ class USBMS(Device):
if os.path.exists(path): if os.path.exists(path):
# Delete the ebook # Delete the ebook
os.unlink(path) os.unlink(path)
try:
os.removedirs(os.path.dirname(path))
except:
pass
@classmethod @classmethod
def remove_books_from_metadata(cls, paths, booklists): def remove_books_from_metadata(cls, paths, booklists):
@ -99,7 +137,6 @@ class USBMS(Device):
for book in bl: for book in bl:
if path.endswith(book.path): if path.endswith(book.path):
bl.remove(book) bl.remove(book)
break
def sync_booklists(self, booklists, end_session=True): def sync_booklists(self, booklists, end_session=True):
# There is no meta data on the device to update. The device is treated # There is no meta data on the device to update. The device is treated

View File

@ -77,6 +77,8 @@ def check_links(opf_path, pretty_print):
html_files.append(os.path.abspath(content(f))) html_files.append(os.path.abspath(content(f)))
for path in html_files: for path in html_files:
if not os.access(path, os.R_OK):
continue
base = os.path.dirname(path) base = os.path.dirname(path)
root = html.fromstring(open(content(path), 'rb').read(), parser=parser) root = html.fromstring(open(content(path), 'rb').read(), parser=parser)
for element, attribute, link, pos in list(root.iterlinks()): for element, attribute, link, pos in list(root.iterlinks()):

View File

@ -249,7 +249,7 @@ class MetaInformation(object):
ans = u'' ans = u''
ans += u'Title : ' + unicode(self.title) + u'\n' ans += u'Title : ' + unicode(self.title) + u'\n'
if self.authors: if self.authors:
ans += u'Author : ' + (', '.join(self.authors) if self.authors is not None else u'None') ans += u'Author : ' + (' & '.join(self.authors) if self.authors is not None else _('Unknown'))
ans += ((' [' + self.author_sort + ']') if self.author_sort else '') + u'\n' ans += ((' [' + self.author_sort + ']') if self.author_sort else '') + u'\n'
if self.publisher: if self.publisher:
ans += u'Publisher: '+ unicode(self.publisher) + u'\n' ans += u'Publisher: '+ unicode(self.publisher) + u'\n'

View File

@ -33,7 +33,7 @@ class EXTHHeader(object):
self.length, self.num_items = struct.unpack('>LL', raw[4:12]) self.length, self.num_items = struct.unpack('>LL', raw[4:12])
raw = raw[12:] raw = raw[12:]
pos = 0 pos = 0
self.mi = MetaInformation('Unknown', ['Unknown']) self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
self.has_fake_cover = True self.has_fake_cover = True
for i in range(self.num_items): for i in range(self.num_items):
@ -63,7 +63,9 @@ class EXTHHeader(object):
def process_metadata(self, id, content, codec): def process_metadata(self, id, content, codec):
if id == 100: if id == 100:
self.mi.authors = [content.decode(codec, 'ignore').strip()] if self.mi.authors == [_('Unknown')]:
self.mi.authors = []
self.mi.authors.append(content.decode(codec, 'ignore').strip())
elif id == 101: elif id == 101:
self.mi.publisher = content.decode(codec, 'ignore').strip() self.mi.publisher = content.decode(codec, 'ignore').strip()
elif id == 103: elif id == 103:

View File

@ -28,9 +28,6 @@
<property name="readOnly" > <property name="readOnly" >
<bool>true</bool> <bool>true</bool>
</property> </property>
<property name="maximumBlockCount" >
<number>400</number>
</property>
</widget> </widget>
</item> </item>
</layout> </layout>

Binary file not shown.

After

Width:  |  Height:  |  Size: 992 B

View File

@ -7,8 +7,8 @@ var column_titles = {
'rating' : 'Rating', 'rating' : 'Rating',
'date' : 'Date', 'date' : 'Date',
'tags' : 'Tags', 'tags' : 'Tags',
'series' : 'Series', 'series' : 'Series'
} };
String.prototype.format = function() { String.prototype.format = function() {
var pattern = /\{\d+\}/g; var pattern = /\{\d+\}/g;

View File

@ -102,7 +102,7 @@ Device Integration
What devices does |app| support? What devices does |app| support?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
At the moment |app| has full support for the SONY PRS 500/505/700 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk. At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader? I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -286,7 +286,7 @@ def write(socket, msg, timeout=5):
def read(socket, timeout=5): def read(socket, timeout=5):
''' '''
Read a message from `socket`. The message must have been sent with the :function:`write` Read a message from `socket`. The message must have been sent with the :function:`write`
function. Raises a `RuntimeError` if the message is corrpted. Can return an function. Raises a `RuntimeError` if the message is corrupted. Can return an
empty string. empty string.
''' '''
if isworker: if isworker:
@ -299,7 +299,12 @@ def read(socket, timeout=5):
if not msg: if not msg:
break break
if length is None: if length is None:
try:
length, msg = int(msg[:12]), msg[12:] length, msg = int(msg[:12]), msg[12:]
except ValueError:
if DEBUG:
print >>sys.__stdout__, 'read(%s):'%('worker' if isworker else 'overseer'), 'no length in', msg
return ''
buf.write(msg) buf.write(msg)
if buf.tell() >= length: if buf.tell() >= length:
break break

View File

@ -58,17 +58,17 @@ The pyparsing module handles some of the problems that are typically vexing when
- embedded comments - embedded comments
""" """
__version__ = "1.5.0" __version__ = "1.5.1"
__versionTime__ = "28 May 2008 10:05" __versionTime__ = "2 October 2008 00:44"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string import string
from weakref import ref as wkref from weakref import ref as wkref
import copy,sys import copy
import sys
import warnings import warnings
import re import re
import sre_constants import sre_constants
import xml.sax.saxutils
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
__all__ = [ __all__ = [
@ -88,7 +88,7 @@ __all__ = [
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
'indentedBlock', 'indentedBlock', 'originalTextFor',
] ]
@ -130,11 +130,22 @@ if not _PY3K:
# ... # ...
else: else:
_ustr = str _ustr = str
unichr = chr
def _str2dict(strg): def _str2dict(strg):
return dict( [(c,0) for c in strg] ) return dict( [(c,0) for c in strg] )
#~ return set( [c for c in strg] ) #~ return set( [c for c in strg] )
def _xml_escape(data):
"""Escape &, <, >, ", ', etc. in a string of data."""
# ampersand must be replaced first
from_symbols = '&><"\''
to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
for from_,to_ in zip(from_symbols, to_symbols):
data = data.replace(from_, to_)
return data
class _Constants(object): class _Constants(object):
pass pass
@ -145,7 +156,7 @@ else:
nums = string.digits nums = string.digits
hexnums = nums + "ABCDEFabcdef" hexnums = nums + "ABCDEFabcdef"
alphanums = alphas + nums alphanums = alphas + nums
_bslash = "\\" _bslash = chr(92)
printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
class ParseBaseException(Exception): class ParseBaseException(Exception):
@ -193,6 +204,9 @@ class ParseBaseException(Exception):
line_str = "".join( [line_str[:line_column], line_str = "".join( [line_str[:line_column],
markerString, line_str[line_column:]]) markerString, line_str[line_column:]])
return line_str.strip() return line_str.strip()
def __dir__(self):
return "loc msg pstr parserElement lineno col line " \
"markInputLine __str__ __repr__".split()
class ParseException(ParseBaseException): class ParseException(ParseBaseException):
"""exception thrown when parse expressions don't match class; """exception thrown when parse expressions don't match class;
@ -213,7 +227,8 @@ class ParseSyntaxException(ParseFatalException):
ErrorStop indicates that parsing is to stop immediately because ErrorStop indicates that parsing is to stop immediately because
an unbacktrackable syntax error has been found""" an unbacktrackable syntax error has been found"""
def __init__(self, pe): def __init__(self, pe):
ParseFatalException.__init__(self, pe.pstr, pe.loc, pe.msg, pe.parserElement) super(ParseSyntaxException, self).__init__(
pe.pstr, pe.loc, pe.msg, pe.parserElement)
#~ class ReparseException(ParseBaseException): #~ class ReparseException(ParseBaseException):
#~ """Experimental class - parse actions can raise this exception to cause #~ """Experimental class - parse actions can raise this exception to cause
@ -243,6 +258,8 @@ class _ParseResultsWithOffset(object):
return self.tup[i] return self.tup[i]
def __repr__(self): def __repr__(self):
return repr(self.tup) return repr(self.tup)
def setOffset(self,i):
self.tup = (self.tup[0],i)
class ParseResults(object): class ParseResults(object):
"""Structured parse results, to provide multiple means of access to the parsed data: """Structured parse results, to provide multiple means of access to the parsed data:
@ -272,9 +289,6 @@ class ParseResults(object):
self.__toklist = [toklist] self.__toklist = [toklist]
self.__tokdict = dict() self.__tokdict = dict()
# this line is related to debugging the asXML bug
#~ asList = False
if name: if name:
if not modal: if not modal:
self.__accumNames[name] = 0 self.__accumNames[name] = 0
@ -286,9 +300,9 @@ class ParseResults(object):
toklist = [ toklist ] toklist = [ toklist ]
if asList: if asList:
if isinstance(toklist,ParseResults): if isinstance(toklist,ParseResults):
self[name] = _ParseResultsWithOffset(toklist.copy(),-1) self[name] = _ParseResultsWithOffset(toklist.copy(),0)
else: else:
self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1) self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
self[name].__name = name self[name].__name = name
else: else:
try: try:
@ -374,7 +388,7 @@ class ParseResults(object):
for name in self.__tokdict: for name in self.__tokdict:
occurrences = self.__tokdict[name] occurrences = self.__tokdict[name]
for k, (value, position) in enumerate(occurrences): for k, (value, position) in enumerate(occurrences):
occurrences[k] = _ParseResultsWithOffset(value, position + (position > j)) occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
def items( self ): def items( self ):
"""Returns all named result keys and values as a list of tuples.""" """Returns all named result keys and values as a list of tuples."""
@ -411,6 +425,7 @@ class ParseResults(object):
self[k] = v self[k] = v
if isinstance(v[0],ParseResults): if isinstance(v[0],ParseResults):
v[0].__parent = wkref(self) v[0].__parent = wkref(self)
self.__toklist += other.__toklist self.__toklist += other.__toklist
self.__accumNames.update( other.__accumNames ) self.__accumNames.update( other.__accumNames )
del other del other
@ -517,7 +532,7 @@ class ParseResults(object):
continue continue
else: else:
resTag = "ITEM" resTag = "ITEM"
xmlBodyText = xml.sax.saxutils.escape(_ustr(res)) xmlBodyText = _xml_escape(_ustr(res))
out += [ nl, nextLevelIndent, "<", resTag, ">", out += [ nl, nextLevelIndent, "<", resTag, ">",
xmlBodyText, xmlBodyText,
"</", resTag, ">" ] "</", resTag, ">" ]
@ -594,6 +609,8 @@ class ParseResults(object):
else: else:
self.__parent = None self.__parent = None
def __dir__(self):
return dir(super(ParseResults,self)) + self.keys()
def col (loc,strg): def col (loc,strg):
"""Returns current column within a string, counting newlines as line separators. """Returns current column within a string, counting newlines as line separators.
@ -715,7 +732,7 @@ class ParserElement(object):
def breaker(instring, loc, doActions=True, callPreParse=True): def breaker(instring, loc, doActions=True, callPreParse=True):
import pdb import pdb
pdb.set_trace() pdb.set_trace()
_parseMethod( instring, loc, doActions, callPreParse ) return _parseMethod( instring, loc, doActions, callPreParse )
breaker._originalParseMethod = _parseMethod breaker._originalParseMethod = _parseMethod
self._parse = breaker self._parse = breaker
else: else:
@ -1047,6 +1064,7 @@ class ParserElement(object):
instring = instring.expandtabs() instring = instring.expandtabs()
loc, tokens = self._parse( instring, 0 ) loc, tokens = self._parse( instring, 0 )
if parseAll: if parseAll:
loc = self.preParse( instring, loc )
StringEnd()._parse( instring, loc ) StringEnd()._parse( instring, loc )
return tokens return tokens
@ -1158,11 +1176,7 @@ class ParserElement(object):
if isinstance(other,int): if isinstance(other,int):
minElements, optElements = other,0 minElements, optElements = other,0
elif isinstance(other,tuple): elif isinstance(other,tuple):
if len(other)==0: other = (other + (None, None))[:2]
other = (None,None)
elif len(other)==1:
other = (other[0],None)
if len(other)==2:
if other[0] is None: if other[0] is None:
other = (0, other[1]) other = (0, other[1])
if isinstance(other[0],int) and other[1] is None: if isinstance(other[0],int) and other[1] is None:
@ -1177,8 +1191,6 @@ class ParserElement(object):
optElements -= minElements optElements -= minElements
else: else:
raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
else:
raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
else: else:
raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
@ -1360,7 +1372,7 @@ class ParserElement(object):
"""Check defined expressions for valid structure, check for infinite recursive definitions.""" """Check defined expressions for valid structure, check for infinite recursive definitions."""
self.checkRecursion( [] ) self.checkRecursion( [] )
def parseFile( self, file_or_filename ): def parseFile( self, file_or_filename, parseAll=False ):
"""Execute the parse expression on the given file or filename. """Execute the parse expression on the given file or filename.
If a filename is specified (instead of a file object), If a filename is specified (instead of a file object),
the entire file is opened, read, and closed before parsing. the entire file is opened, read, and closed before parsing.
@ -1371,7 +1383,7 @@ class ParserElement(object):
f = open(file_or_filename, "rb") f = open(file_or_filename, "rb")
file_contents = f.read() file_contents = f.read()
f.close() f.close()
return self.parseString(file_contents) return self.parseString(file_contents, parseAll)
def getException(self): def getException(self):
return ParseException("",0,self.errmsg,self) return ParseException("",0,self.errmsg,self)
@ -1393,12 +1405,18 @@ class ParserElement(object):
else: else:
return super(ParserElement,self)==other return super(ParserElement,self)==other
def __ne__(self,other):
return not (self == other)
def __hash__(self): def __hash__(self):
return hash(id(self)) return hash(id(self))
def __req__(self,other): def __req__(self,other):
return self == other return self == other
def __rne__(self,other):
return not (self == other)
class Token(ParserElement): class Token(ParserElement):
"""Abstract ParserElement subclass, for defining atomic matching patterns.""" """Abstract ParserElement subclass, for defining atomic matching patterns."""
@ -1533,7 +1551,6 @@ class Keyword(Token):
Keyword.DEFAULT_KEYWORD_CHARS = chars Keyword.DEFAULT_KEYWORD_CHARS = chars
setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
class CaselessLiteral(Literal): class CaselessLiteral(Literal):
"""Token to match a specified string, ignoring case of letters. """Token to match a specified string, ignoring case of letters.
Note: the matched results will always be in the case of the given Note: the matched results will always be in the case of the given
@ -2034,7 +2051,7 @@ class LineStart(_PositionToken):
"""Matches if current position is at the beginning of a line within the parse string""" """Matches if current position is at the beginning of a line within the parse string"""
def __init__( self ): def __init__( self ):
super(LineStart,self).__init__() super(LineStart,self).__init__()
self.setWhitespaceChars( " \t" ) self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
self.errmsg = "Expected start of line" self.errmsg = "Expected start of line"
#self.myException.msg = self.errmsg #self.myException.msg = self.errmsg
@ -2059,7 +2076,7 @@ class LineEnd(_PositionToken):
"""Matches if current position is at the end of a line within the parse string""" """Matches if current position is at the end of a line within the parse string"""
def __init__( self ): def __init__( self ):
super(LineEnd,self).__init__() super(LineEnd,self).__init__()
self.setWhitespaceChars( " \t" ) self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
self.errmsg = "Expected end of line" self.errmsg = "Expected end of line"
#self.myException.msg = self.errmsg #self.myException.msg = self.errmsg
@ -2271,10 +2288,9 @@ class And(ParseExpression):
""" """
class _ErrorStop(Empty): class _ErrorStop(Empty):
def __new__(cls,*args,**kwargs): def __init__(self, *args, **kwargs):
return And._ErrorStop.instance super(Empty,self).__init__(*args, **kwargs)
_ErrorStop.instance = Empty() self.leaveWhitespace()
_ErrorStop.instance.leaveWhitespace()
def __init__( self, exprs, savelist = True ): def __init__( self, exprs, savelist = True ):
super(And,self).__init__(exprs, savelist) super(And,self).__init__(exprs, savelist)
@ -2293,12 +2309,14 @@ class And(ParseExpression):
loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
errorStop = False errorStop = False
for e in self.exprs[1:]: for e in self.exprs[1:]:
if e is And._ErrorStop.instance: if isinstance(e, And._ErrorStop):
errorStop = True errorStop = True
continue continue
if errorStop: if errorStop:
try: try:
loc, exprtokens = e._parse( instring, loc, doActions ) loc, exprtokens = e._parse( instring, loc, doActions )
except ParseSyntaxException:
raise
except ParseBaseException, pe: except ParseBaseException, pe:
raise ParseSyntaxException(pe) raise ParseSyntaxException(pe)
except IndexError, ie: except IndexError, ie:
@ -2502,7 +2520,7 @@ class Each(ParseExpression):
raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
# add any unmatched Optionals, in case they have default values defined # add any unmatched Optionals, in case they have default values defined
matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt) matchOrder += [ e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt ]
resultlist = [] resultlist = []
for e in matchOrder: for e in matchOrder:
@ -2781,7 +2799,7 @@ class SkipTo(ParseElementEnhance):
argument is used to define grammars (typically quoted strings and comments) that argument is used to define grammars (typically quoted strings and comments) that
might contain false matches. might contain false matches.
""" """
def __init__( self, other, include=False, ignore=None ): def __init__( self, other, include=False, ignore=None, failOn=None ):
super( SkipTo, self ).__init__( other ) super( SkipTo, self ).__init__( other )
if ignore is not None: if ignore is not None:
self.expr = self.expr.copy() self.expr = self.expr.copy()
@ -2790,6 +2808,10 @@ class SkipTo(ParseElementEnhance):
self.mayIndexError = False self.mayIndexError = False
self.includeMatch = include self.includeMatch = include
self.asList = False self.asList = False
if failOn is not None and isinstance(failOn, basestring):
self.failOn = Literal(failOn)
else:
self.failOn = failOn
self.errmsg = "No match found for "+_ustr(self.expr) self.errmsg = "No match found for "+_ustr(self.expr)
#self.myException = ParseException("",0,self.errmsg,self) #self.myException = ParseException("",0,self.errmsg,self)
@ -2797,12 +2819,17 @@ class SkipTo(ParseElementEnhance):
startLoc = loc startLoc = loc
instrlen = len(instring) instrlen = len(instring)
expr = self.expr expr = self.expr
failParse = False
while loc <= instrlen: while loc <= instrlen:
try: try:
if self.failOn:
failParse = True
self.failOn.tryParse(instring, loc)
failParse = False
loc = expr._skipIgnorables( instring, loc ) loc = expr._skipIgnorables( instring, loc )
expr._parse( instring, loc, doActions=False, callPreParse=False ) expr._parse( instring, loc, doActions=False, callPreParse=False )
if self.includeMatch:
skipText = instring[startLoc:loc] skipText = instring[startLoc:loc]
if self.includeMatch:
loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
if mat: if mat:
skipRes = ParseResults( skipText ) skipRes = ParseResults( skipText )
@ -2811,8 +2838,11 @@ class SkipTo(ParseElementEnhance):
else: else:
return loc, [ skipText ] return loc, [ skipText ]
else: else:
return loc, [ instring[startLoc:loc] ] return loc, [ skipText ]
except (ParseException,IndexError): except (ParseException,IndexError):
if failParse:
raise
else:
loc += 1 loc += 1
exc = self.myException exc = self.myException
exc.loc = loc exc.loc = loc
@ -2872,6 +2902,7 @@ class Forward(ParseElementEnhance):
if hasattr(self,"name"): if hasattr(self,"name"):
return self.name return self.name
self._revertClass = self.__class__
self.__class__ = _ForwardNoRecurse self.__class__ = _ForwardNoRecurse
try: try:
if self.expr is not None: if self.expr is not None:
@ -2879,8 +2910,8 @@ class Forward(ParseElementEnhance):
else: else:
retString = "None" retString = "None"
finally: finally:
self.__class__ = Forward self.__class__ = self._revertClass
return "Forward: "+retString return self.__class__.__name__ + ": " + retString
def copy(self): def copy(self):
if self.expr is not None: if self.expr is not None:
@ -3121,7 +3152,7 @@ def matchPreviousExpr(expr):
def _escapeRegexRangeChars(s): def _escapeRegexRangeChars(s):
#~ escape these chars: ^-] #~ escape these chars: ^-]
for c in r"\^-]": for c in r"\^-]":
s = s.replace(c,"\\"+c) s = s.replace(c,_bslash+c)
s = s.replace("\n",r"\n") s = s.replace("\n",r"\n")
s = s.replace("\t",r"\t") s = s.replace("\t",r"\t")
return _ustr(s) return _ustr(s)
@ -3195,6 +3226,33 @@ def dictOf( key, value ):
""" """
return Dict( ZeroOrMore( Group ( key + value ) ) ) return Dict( ZeroOrMore( Group ( key + value ) ) )
def originalTextFor(expr, asString=True):
"""Helper to return the original, untokenized text for a given expression. Useful to
restore the parsed fields of an HTML start tag into the raw tag text itself, or to
revert separate tokens with intervening whitespace back to the original matching
input text. Simpler to use than the parse action keepOriginalText, and does not
require the inspect module to chase up the call stack. By default, returns a
string containing the original parsed text.
If the optional asString argument is passed as False, then the return value is a
ParseResults containing any results names that were originally matched, and a
single token containing the original matched text from the input string. So if
the expression passed to originalTextFor contains expressions with defined
results names, you must set asString to False if you want to preserve those
results name values."""
locMarker = Empty().setParseAction(lambda s,loc,t: loc)
matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
if asString:
extractText = lambda s,l,t: s[t._original_start:t._original_end]
else:
def extractText(s,l,t):
del t[:]
t.insert(0, s[t._original_start:t._original_end])
del t["_original_start"]
del t["_original_end"]
matchExpr.setParseAction(extractText)
return matchExpr
# convenience constants for positional expressions # convenience constants for positional expressions
empty = Empty().setName("empty") empty = Empty().setName("empty")
lineStart = LineStart().setName("lineStart") lineStart = LineStart().setName("lineStart")
@ -3464,12 +3522,24 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
raise ValueError("opening and closing strings cannot be the same") raise ValueError("opening and closing strings cannot be the same")
if content is None: if content is None:
if isinstance(opener,basestring) and isinstance(closer,basestring): if isinstance(opener,basestring) and isinstance(closer,basestring):
if len(opener) == 1 and len(closer)==1:
if ignoreExpr is not None: if ignoreExpr is not None:
content = (Combine(OneOrMore(~ignoreExpr + content = (Combine(OneOrMore(~ignoreExpr +
CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip())) ).setParseAction(lambda t:t[0].strip()))
else: else:
content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip())) content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
).setParseAction(lambda t:t[0].strip()))
else:
if ignoreExpr is not None:
content = (Combine(OneOrMore(~ignoreExpr +
~Literal(opener) + ~Literal(closer) +
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip()))
else:
content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip()))
else: else:
raise ValueError("opening and closing arguments must be strings if no content expression is given") raise ValueError("opening and closing arguments must be strings if no content expression is given")
ret = Forward() ret = Forward()
@ -3528,7 +3598,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
else: else:
smExpr = Group( Optional(NL) + smExpr = Group( Optional(NL) +
(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
blockStatementExpr.ignore("\\" + LineEnd()) blockStatementExpr.ignore(_bslash + LineEnd())
return smExpr return smExpr
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
@ -3536,7 +3606,7 @@ punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";") commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '")) _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
# it's easy to get these comment structures wrong - they're very common, so may as well make them available # it's easy to get these comment structures wrong - they're very common, so may as well make them available

View File

@ -21,7 +21,8 @@ recipe_modules = ['recipe_' + r for r in (
'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes', 'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes',
'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik', 'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik',
'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet', 'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de',
)] )]
import re, imp, inspect, time, os import re, imp, inspect, time, os

View File

@ -6,7 +6,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
tomshardware.com tomshardware.com
''' '''
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class Tomshardware(BasicNewsRecipe): class Tomshardware(BasicNewsRecipe):

View File

@ -0,0 +1,54 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetch tomshardware.
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class TomsHardwareDe(BasicNewsRecipe):
title = 'Tom\'s Hardware German'
description = 'Computer news in german'
__author__ = 'Oliver Niesner'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 50
no_stylesheets = True
encoding = 'utf-8'
#preprocess_regexps = \
# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
# [
# (r'<84>', lambda match: ''),
# (r'<93>', lambda match: ''),
# ]
# ]
remove_tags = [dict(id='outside-advert'),
dict(id='advertRightWhite'),
dict(id='header-advert'),
dict(id='header-banner'),
dict(id='header-menu'),
dict(id='header-top'),
dict(id='header-tools'),
dict(id='nbComment'),
dict(id='internalSidebar'),
dict(id='header-news-infos'),
dict(id='breadcrumbs'),
dict(id=''),
dict(name='div', attrs={'class':'pyjama'}),
dict(name='href', attrs={'class':'comment'}),
dict(name='div', attrs={'class':'greyBoxR clearfix'}),
dict(name='div', attrs={'class':'greyBoxL clearfix'}),
dict(name='div', attrs={'class':'greyBox clearfix'}),
dict(id='')]
#remove_tags_before = [dict(id='header-news-title')]
remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})]
#remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})]
feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ]