mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Merge upstream changes
This commit is contained in:
commit
e73639e5d3
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.4.127'
|
||||
__version__ = '0.4.128'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
'''
|
||||
Various run time constants.
|
||||
|
@ -28,6 +28,7 @@ class CYBOOKG3(USBMS):
|
||||
STORAGE_CARD_VOLUME_LABEL = 'Cybook Gen 3 Storage Card'
|
||||
|
||||
EBOOK_DIR_MAIN = "eBooks"
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
|
||||
def delete_books(self, paths, end_session=True):
|
||||
for path in paths:
|
||||
@ -46,3 +47,8 @@ class CYBOOKG3(USBMS):
|
||||
for filen in fnmatch.filter(files, filename + "*.t2b"):
|
||||
os.unlink(os.path.join(p, filen))
|
||||
|
||||
try:
|
||||
os.removedirs(os.path.dirname(path))
|
||||
except:
|
||||
pass
|
||||
|
||||
|
@ -60,8 +60,9 @@ class DeviceScanner(object):
|
||||
def is_device_connected(self, device):
|
||||
if iswindows:
|
||||
vid, pid = 'vid_%4.4x'%device.VENDOR_ID, 'pid_%4.4x'%device.PRODUCT_ID
|
||||
vidd, pidd = 'vid_%i'%device.VENDOR_ID, 'pid_%i'%device.PRODUCT_ID
|
||||
for device_id in self.devices:
|
||||
if vid in device_id and pid in device_id:
|
||||
if (vid in device_id or vidd in device_id) and (pid in device_id or pidd in device_id):
|
||||
if self.test_bcd_windows(device_id, getattr(device, 'BCD', None)):
|
||||
if device.can_handle(device_id):
|
||||
return True
|
||||
|
@ -15,9 +15,10 @@ from calibre.devices.errors import FreeSpaceError
|
||||
from calibre.devices.mime import MIME_MAP
|
||||
|
||||
class USBMS(Device):
|
||||
FORMATS = []
|
||||
EBOOK_DIR_MAIN = ''
|
||||
EBOOK_DIR_CARD = ''
|
||||
FORMATS = []
|
||||
SUPPORTS_SUB_DIRS = False
|
||||
|
||||
def __init__(self, key='-1', log_packets=False, report_progress=None):
|
||||
pass
|
||||
@ -58,7 +59,15 @@ class USBMS(Device):
|
||||
else:
|
||||
path = os.path.join(self._card_prefix, self.EBOOK_DIR_CARD)
|
||||
|
||||
sizes = map(os.path.getsize, files)
|
||||
def get_size(obj):
|
||||
if hasattr(obj, 'seek'):
|
||||
obj.seek(0, os.SEEK_END)
|
||||
size = obj.tell()
|
||||
obj.seek(0)
|
||||
return size
|
||||
return os.path.getsize(obj)
|
||||
|
||||
sizes = map(get_size, files)
|
||||
size = sum(sizes)
|
||||
|
||||
if on_card and size > self.free_space()[2] - 1024*1024:
|
||||
@ -68,12 +77,37 @@ class USBMS(Device):
|
||||
|
||||
paths = []
|
||||
names = iter(names)
|
||||
metadata = iter(metadata)
|
||||
|
||||
for infile in files:
|
||||
filepath = os.path.join(path, names.next())
|
||||
newpath = path
|
||||
|
||||
if self.SUPPORTS_SUB_DIRS:
|
||||
mdata = metadata.next()
|
||||
|
||||
if 'tags' in mdata.keys():
|
||||
for tag in mdata['tags']:
|
||||
if tag.startswith('/'):
|
||||
newpath += tag
|
||||
newpath = os.path.normpath(newpath)
|
||||
break
|
||||
|
||||
if not os.path.exists(newpath):
|
||||
os.makedirs(newpath)
|
||||
|
||||
filepath = os.path.join(newpath, names.next())
|
||||
paths.append(filepath)
|
||||
|
||||
shutil.copy2(infile, filepath)
|
||||
if hasattr(infile, 'read'):
|
||||
infile.seek(0)
|
||||
|
||||
dest = open(filepath, 'wb')
|
||||
shutil.copyfileobj(infile, dest, 10*1024*1024)
|
||||
|
||||
dest.flush()
|
||||
dest.close()
|
||||
else:
|
||||
shutil.copy2(infile, filepath)
|
||||
|
||||
return zip(paths, cycle([on_card]))
|
||||
|
||||
@ -91,6 +125,10 @@ class USBMS(Device):
|
||||
if os.path.exists(path):
|
||||
# Delete the ebook
|
||||
os.unlink(path)
|
||||
try:
|
||||
os.removedirs(os.path.dirname(path))
|
||||
except:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def remove_books_from_metadata(cls, paths, booklists):
|
||||
@ -99,7 +137,6 @@ class USBMS(Device):
|
||||
for book in bl:
|
||||
if path.endswith(book.path):
|
||||
bl.remove(book)
|
||||
break
|
||||
|
||||
def sync_booklists(self, booklists, end_session=True):
|
||||
# There is no meta data on the device to update. The device is treated
|
||||
|
@ -77,6 +77,8 @@ def check_links(opf_path, pretty_print):
|
||||
html_files.append(os.path.abspath(content(f)))
|
||||
|
||||
for path in html_files:
|
||||
if not os.access(path, os.R_OK):
|
||||
continue
|
||||
base = os.path.dirname(path)
|
||||
root = html.fromstring(open(content(path), 'rb').read(), parser=parser)
|
||||
for element, attribute, link, pos in list(root.iterlinks()):
|
||||
|
@ -249,7 +249,7 @@ class MetaInformation(object):
|
||||
ans = u''
|
||||
ans += u'Title : ' + unicode(self.title) + u'\n'
|
||||
if self.authors:
|
||||
ans += u'Author : ' + (', '.join(self.authors) if self.authors is not None else u'None')
|
||||
ans += u'Author : ' + (' & '.join(self.authors) if self.authors is not None else _('Unknown'))
|
||||
ans += ((' [' + self.author_sort + ']') if self.author_sort else '') + u'\n'
|
||||
if self.publisher:
|
||||
ans += u'Publisher: '+ unicode(self.publisher) + u'\n'
|
||||
|
@ -33,7 +33,7 @@ class EXTHHeader(object):
|
||||
self.length, self.num_items = struct.unpack('>LL', raw[4:12])
|
||||
raw = raw[12:]
|
||||
pos = 0
|
||||
self.mi = MetaInformation('Unknown', ['Unknown'])
|
||||
self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||
self.has_fake_cover = True
|
||||
|
||||
for i in range(self.num_items):
|
||||
@ -63,7 +63,9 @@ class EXTHHeader(object):
|
||||
|
||||
def process_metadata(self, id, content, codec):
|
||||
if id == 100:
|
||||
self.mi.authors = [content.decode(codec, 'ignore').strip()]
|
||||
if self.mi.authors == [_('Unknown')]:
|
||||
self.mi.authors = []
|
||||
self.mi.authors.append(content.decode(codec, 'ignore').strip())
|
||||
elif id == 101:
|
||||
self.mi.publisher = content.decode(codec, 'ignore').strip()
|
||||
elif id == 103:
|
||||
|
@ -28,9 +28,6 @@
|
||||
<property name="readOnly" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="maximumBlockCount" >
|
||||
<number>400</number>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
|
BIN
src/calibre/gui2/images/news/tomshardware_de.png
Normal file
BIN
src/calibre/gui2/images/news/tomshardware_de.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 992 B |
@ -7,8 +7,8 @@ var column_titles = {
|
||||
'rating' : 'Rating',
|
||||
'date' : 'Date',
|
||||
'tags' : 'Tags',
|
||||
'series' : 'Series',
|
||||
}
|
||||
'series' : 'Series'
|
||||
};
|
||||
|
||||
String.prototype.format = function() {
|
||||
var pattern = /\{\d+\}/g;
|
||||
|
@ -102,7 +102,7 @@ Device Integration
|
||||
|
||||
What devices does |app| support?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
At the moment |app| has full support for the SONY PRS 500/505/700 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
|
||||
At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
|
||||
|
||||
I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -286,7 +286,7 @@ def write(socket, msg, timeout=5):
|
||||
def read(socket, timeout=5):
|
||||
'''
|
||||
Read a message from `socket`. The message must have been sent with the :function:`write`
|
||||
function. Raises a `RuntimeError` if the message is corrpted. Can return an
|
||||
function. Raises a `RuntimeError` if the message is corrupted. Can return an
|
||||
empty string.
|
||||
'''
|
||||
if isworker:
|
||||
@ -299,7 +299,12 @@ def read(socket, timeout=5):
|
||||
if not msg:
|
||||
break
|
||||
if length is None:
|
||||
length, msg = int(msg[:12]), msg[12:]
|
||||
try:
|
||||
length, msg = int(msg[:12]), msg[12:]
|
||||
except ValueError:
|
||||
if DEBUG:
|
||||
print >>sys.__stdout__, 'read(%s):'%('worker' if isworker else 'overseer'), 'no length in', msg
|
||||
return ''
|
||||
buf.write(msg)
|
||||
if buf.tell() >= length:
|
||||
break
|
||||
|
@ -58,17 +58,17 @@ The pyparsing module handles some of the problems that are typically vexing when
|
||||
- embedded comments
|
||||
"""
|
||||
|
||||
__version__ = "1.5.0"
|
||||
__versionTime__ = "28 May 2008 10:05"
|
||||
__version__ = "1.5.1"
|
||||
__versionTime__ = "2 October 2008 00:44"
|
||||
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
|
||||
|
||||
import string
|
||||
from weakref import ref as wkref
|
||||
import copy,sys
|
||||
import copy
|
||||
import sys
|
||||
import warnings
|
||||
import re
|
||||
import sre_constants
|
||||
import xml.sax.saxutils
|
||||
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
|
||||
|
||||
__all__ = [
|
||||
@ -88,7 +88,7 @@ __all__ = [
|
||||
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
|
||||
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
|
||||
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
|
||||
'indentedBlock',
|
||||
'indentedBlock', 'originalTextFor',
|
||||
]
|
||||
|
||||
|
||||
@ -130,11 +130,22 @@ if not _PY3K:
|
||||
# ...
|
||||
else:
|
||||
_ustr = str
|
||||
unichr = chr
|
||||
|
||||
def _str2dict(strg):
|
||||
return dict( [(c,0) for c in strg] )
|
||||
#~ return set( [c for c in strg] )
|
||||
|
||||
def _xml_escape(data):
|
||||
"""Escape &, <, >, ", ', etc. in a string of data."""
|
||||
|
||||
# ampersand must be replaced first
|
||||
from_symbols = '&><"\''
|
||||
to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
|
||||
for from_,to_ in zip(from_symbols, to_symbols):
|
||||
data = data.replace(from_, to_)
|
||||
return data
|
||||
|
||||
class _Constants(object):
|
||||
pass
|
||||
|
||||
@ -145,7 +156,7 @@ else:
|
||||
nums = string.digits
|
||||
hexnums = nums + "ABCDEFabcdef"
|
||||
alphanums = alphas + nums
|
||||
_bslash = "\\"
|
||||
_bslash = chr(92)
|
||||
printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
|
||||
|
||||
class ParseBaseException(Exception):
|
||||
@ -193,6 +204,9 @@ class ParseBaseException(Exception):
|
||||
line_str = "".join( [line_str[:line_column],
|
||||
markerString, line_str[line_column:]])
|
||||
return line_str.strip()
|
||||
def __dir__(self):
|
||||
return "loc msg pstr parserElement lineno col line " \
|
||||
"markInputLine __str__ __repr__".split()
|
||||
|
||||
class ParseException(ParseBaseException):
|
||||
"""exception thrown when parse expressions don't match class;
|
||||
@ -213,7 +227,8 @@ class ParseSyntaxException(ParseFatalException):
|
||||
ErrorStop indicates that parsing is to stop immediately because
|
||||
an unbacktrackable syntax error has been found"""
|
||||
def __init__(self, pe):
|
||||
ParseFatalException.__init__(self, pe.pstr, pe.loc, pe.msg, pe.parserElement)
|
||||
super(ParseSyntaxException, self).__init__(
|
||||
pe.pstr, pe.loc, pe.msg, pe.parserElement)
|
||||
|
||||
#~ class ReparseException(ParseBaseException):
|
||||
#~ """Experimental class - parse actions can raise this exception to cause
|
||||
@ -243,6 +258,8 @@ class _ParseResultsWithOffset(object):
|
||||
return self.tup[i]
|
||||
def __repr__(self):
|
||||
return repr(self.tup)
|
||||
def setOffset(self,i):
|
||||
self.tup = (self.tup[0],i)
|
||||
|
||||
class ParseResults(object):
|
||||
"""Structured parse results, to provide multiple means of access to the parsed data:
|
||||
@ -272,9 +289,6 @@ class ParseResults(object):
|
||||
self.__toklist = [toklist]
|
||||
self.__tokdict = dict()
|
||||
|
||||
# this line is related to debugging the asXML bug
|
||||
#~ asList = False
|
||||
|
||||
if name:
|
||||
if not modal:
|
||||
self.__accumNames[name] = 0
|
||||
@ -286,9 +300,9 @@ class ParseResults(object):
|
||||
toklist = [ toklist ]
|
||||
if asList:
|
||||
if isinstance(toklist,ParseResults):
|
||||
self[name] = _ParseResultsWithOffset(toklist.copy(),-1)
|
||||
self[name] = _ParseResultsWithOffset(toklist.copy(),0)
|
||||
else:
|
||||
self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1)
|
||||
self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
|
||||
self[name].__name = name
|
||||
else:
|
||||
try:
|
||||
@ -374,7 +388,7 @@ class ParseResults(object):
|
||||
for name in self.__tokdict:
|
||||
occurrences = self.__tokdict[name]
|
||||
for k, (value, position) in enumerate(occurrences):
|
||||
occurrences[k] = _ParseResultsWithOffset(value, position + (position > j))
|
||||
occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
|
||||
|
||||
def items( self ):
|
||||
"""Returns all named result keys and values as a list of tuples."""
|
||||
@ -411,6 +425,7 @@ class ParseResults(object):
|
||||
self[k] = v
|
||||
if isinstance(v[0],ParseResults):
|
||||
v[0].__parent = wkref(self)
|
||||
|
||||
self.__toklist += other.__toklist
|
||||
self.__accumNames.update( other.__accumNames )
|
||||
del other
|
||||
@ -517,7 +532,7 @@ class ParseResults(object):
|
||||
continue
|
||||
else:
|
||||
resTag = "ITEM"
|
||||
xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
|
||||
xmlBodyText = _xml_escape(_ustr(res))
|
||||
out += [ nl, nextLevelIndent, "<", resTag, ">",
|
||||
xmlBodyText,
|
||||
"</", resTag, ">" ]
|
||||
@ -594,6 +609,8 @@ class ParseResults(object):
|
||||
else:
|
||||
self.__parent = None
|
||||
|
||||
def __dir__(self):
|
||||
return dir(super(ParseResults,self)) + self.keys()
|
||||
|
||||
def col (loc,strg):
|
||||
"""Returns current column within a string, counting newlines as line separators.
|
||||
@ -715,7 +732,7 @@ class ParserElement(object):
|
||||
def breaker(instring, loc, doActions=True, callPreParse=True):
|
||||
import pdb
|
||||
pdb.set_trace()
|
||||
_parseMethod( instring, loc, doActions, callPreParse )
|
||||
return _parseMethod( instring, loc, doActions, callPreParse )
|
||||
breaker._originalParseMethod = _parseMethod
|
||||
self._parse = breaker
|
||||
else:
|
||||
@ -1047,6 +1064,7 @@ class ParserElement(object):
|
||||
instring = instring.expandtabs()
|
||||
loc, tokens = self._parse( instring, 0 )
|
||||
if parseAll:
|
||||
loc = self.preParse( instring, loc )
|
||||
StringEnd()._parse( instring, loc )
|
||||
return tokens
|
||||
|
||||
@ -1158,27 +1176,21 @@ class ParserElement(object):
|
||||
if isinstance(other,int):
|
||||
minElements, optElements = other,0
|
||||
elif isinstance(other,tuple):
|
||||
if len(other)==0:
|
||||
other = (None,None)
|
||||
elif len(other)==1:
|
||||
other = (other[0],None)
|
||||
if len(other)==2:
|
||||
if other[0] is None:
|
||||
other = (0, other[1])
|
||||
if isinstance(other[0],int) and other[1] is None:
|
||||
if other[0] == 0:
|
||||
return ZeroOrMore(self)
|
||||
if other[0] == 1:
|
||||
return OneOrMore(self)
|
||||
else:
|
||||
return self*other[0] + ZeroOrMore(self)
|
||||
elif isinstance(other[0],int) and isinstance(other[1],int):
|
||||
minElements, optElements = other
|
||||
optElements -= minElements
|
||||
other = (other + (None, None))[:2]
|
||||
if other[0] is None:
|
||||
other = (0, other[1])
|
||||
if isinstance(other[0],int) and other[1] is None:
|
||||
if other[0] == 0:
|
||||
return ZeroOrMore(self)
|
||||
if other[0] == 1:
|
||||
return OneOrMore(self)
|
||||
else:
|
||||
raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
|
||||
return self*other[0] + ZeroOrMore(self)
|
||||
elif isinstance(other[0],int) and isinstance(other[1],int):
|
||||
minElements, optElements = other
|
||||
optElements -= minElements
|
||||
else:
|
||||
raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
|
||||
raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
|
||||
else:
|
||||
raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
|
||||
|
||||
@ -1360,7 +1372,7 @@ class ParserElement(object):
|
||||
"""Check defined expressions for valid structure, check for infinite recursive definitions."""
|
||||
self.checkRecursion( [] )
|
||||
|
||||
def parseFile( self, file_or_filename ):
|
||||
def parseFile( self, file_or_filename, parseAll=False ):
|
||||
"""Execute the parse expression on the given file or filename.
|
||||
If a filename is specified (instead of a file object),
|
||||
the entire file is opened, read, and closed before parsing.
|
||||
@ -1371,7 +1383,7 @@ class ParserElement(object):
|
||||
f = open(file_or_filename, "rb")
|
||||
file_contents = f.read()
|
||||
f.close()
|
||||
return self.parseString(file_contents)
|
||||
return self.parseString(file_contents, parseAll)
|
||||
|
||||
def getException(self):
|
||||
return ParseException("",0,self.errmsg,self)
|
||||
@ -1393,12 +1405,18 @@ class ParserElement(object):
|
||||
else:
|
||||
return super(ParserElement,self)==other
|
||||
|
||||
def __ne__(self,other):
|
||||
return not (self == other)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(id(self))
|
||||
|
||||
def __req__(self,other):
|
||||
return self == other
|
||||
|
||||
def __rne__(self,other):
|
||||
return not (self == other)
|
||||
|
||||
|
||||
class Token(ParserElement):
|
||||
"""Abstract ParserElement subclass, for defining atomic matching patterns."""
|
||||
@ -1533,7 +1551,6 @@ class Keyword(Token):
|
||||
Keyword.DEFAULT_KEYWORD_CHARS = chars
|
||||
setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
|
||||
|
||||
|
||||
class CaselessLiteral(Literal):
|
||||
"""Token to match a specified string, ignoring case of letters.
|
||||
Note: the matched results will always be in the case of the given
|
||||
@ -2034,7 +2051,7 @@ class LineStart(_PositionToken):
|
||||
"""Matches if current position is at the beginning of a line within the parse string"""
|
||||
def __init__( self ):
|
||||
super(LineStart,self).__init__()
|
||||
self.setWhitespaceChars( " \t" )
|
||||
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
|
||||
self.errmsg = "Expected start of line"
|
||||
#self.myException.msg = self.errmsg
|
||||
|
||||
@ -2059,7 +2076,7 @@ class LineEnd(_PositionToken):
|
||||
"""Matches if current position is at the end of a line within the parse string"""
|
||||
def __init__( self ):
|
||||
super(LineEnd,self).__init__()
|
||||
self.setWhitespaceChars( " \t" )
|
||||
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
|
||||
self.errmsg = "Expected end of line"
|
||||
#self.myException.msg = self.errmsg
|
||||
|
||||
@ -2271,10 +2288,9 @@ class And(ParseExpression):
|
||||
"""
|
||||
|
||||
class _ErrorStop(Empty):
|
||||
def __new__(cls,*args,**kwargs):
|
||||
return And._ErrorStop.instance
|
||||
_ErrorStop.instance = Empty()
|
||||
_ErrorStop.instance.leaveWhitespace()
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Empty,self).__init__(*args, **kwargs)
|
||||
self.leaveWhitespace()
|
||||
|
||||
def __init__( self, exprs, savelist = True ):
|
||||
super(And,self).__init__(exprs, savelist)
|
||||
@ -2293,12 +2309,14 @@ class And(ParseExpression):
|
||||
loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
|
||||
errorStop = False
|
||||
for e in self.exprs[1:]:
|
||||
if e is And._ErrorStop.instance:
|
||||
if isinstance(e, And._ErrorStop):
|
||||
errorStop = True
|
||||
continue
|
||||
if errorStop:
|
||||
try:
|
||||
loc, exprtokens = e._parse( instring, loc, doActions )
|
||||
except ParseSyntaxException:
|
||||
raise
|
||||
except ParseBaseException, pe:
|
||||
raise ParseSyntaxException(pe)
|
||||
except IndexError, ie:
|
||||
@ -2502,7 +2520,7 @@ class Each(ParseExpression):
|
||||
raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
|
||||
|
||||
# add any unmatched Optionals, in case they have default values defined
|
||||
matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
|
||||
matchOrder += [ e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt ]
|
||||
|
||||
resultlist = []
|
||||
for e in matchOrder:
|
||||
@ -2781,7 +2799,7 @@ class SkipTo(ParseElementEnhance):
|
||||
argument is used to define grammars (typically quoted strings and comments) that
|
||||
might contain false matches.
|
||||
"""
|
||||
def __init__( self, other, include=False, ignore=None ):
|
||||
def __init__( self, other, include=False, ignore=None, failOn=None ):
|
||||
super( SkipTo, self ).__init__( other )
|
||||
if ignore is not None:
|
||||
self.expr = self.expr.copy()
|
||||
@ -2790,6 +2808,10 @@ class SkipTo(ParseElementEnhance):
|
||||
self.mayIndexError = False
|
||||
self.includeMatch = include
|
||||
self.asList = False
|
||||
if failOn is not None and isinstance(failOn, basestring):
|
||||
self.failOn = Literal(failOn)
|
||||
else:
|
||||
self.failOn = failOn
|
||||
self.errmsg = "No match found for "+_ustr(self.expr)
|
||||
#self.myException = ParseException("",0,self.errmsg,self)
|
||||
|
||||
@ -2797,12 +2819,17 @@ class SkipTo(ParseElementEnhance):
|
||||
startLoc = loc
|
||||
instrlen = len(instring)
|
||||
expr = self.expr
|
||||
failParse = False
|
||||
while loc <= instrlen:
|
||||
try:
|
||||
if self.failOn:
|
||||
failParse = True
|
||||
self.failOn.tryParse(instring, loc)
|
||||
failParse = False
|
||||
loc = expr._skipIgnorables( instring, loc )
|
||||
expr._parse( instring, loc, doActions=False, callPreParse=False )
|
||||
skipText = instring[startLoc:loc]
|
||||
if self.includeMatch:
|
||||
skipText = instring[startLoc:loc]
|
||||
loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
|
||||
if mat:
|
||||
skipRes = ParseResults( skipText )
|
||||
@ -2811,9 +2838,12 @@ class SkipTo(ParseElementEnhance):
|
||||
else:
|
||||
return loc, [ skipText ]
|
||||
else:
|
||||
return loc, [ instring[startLoc:loc] ]
|
||||
return loc, [ skipText ]
|
||||
except (ParseException,IndexError):
|
||||
loc += 1
|
||||
if failParse:
|
||||
raise
|
||||
else:
|
||||
loc += 1
|
||||
exc = self.myException
|
||||
exc.loc = loc
|
||||
exc.pstr = instring
|
||||
@ -2872,6 +2902,7 @@ class Forward(ParseElementEnhance):
|
||||
if hasattr(self,"name"):
|
||||
return self.name
|
||||
|
||||
self._revertClass = self.__class__
|
||||
self.__class__ = _ForwardNoRecurse
|
||||
try:
|
||||
if self.expr is not None:
|
||||
@ -2879,8 +2910,8 @@ class Forward(ParseElementEnhance):
|
||||
else:
|
||||
retString = "None"
|
||||
finally:
|
||||
self.__class__ = Forward
|
||||
return "Forward: "+retString
|
||||
self.__class__ = self._revertClass
|
||||
return self.__class__.__name__ + ": " + retString
|
||||
|
||||
def copy(self):
|
||||
if self.expr is not None:
|
||||
@ -3121,7 +3152,7 @@ def matchPreviousExpr(expr):
|
||||
def _escapeRegexRangeChars(s):
|
||||
#~ escape these chars: ^-]
|
||||
for c in r"\^-]":
|
||||
s = s.replace(c,"\\"+c)
|
||||
s = s.replace(c,_bslash+c)
|
||||
s = s.replace("\n",r"\n")
|
||||
s = s.replace("\t",r"\t")
|
||||
return _ustr(s)
|
||||
@ -3195,6 +3226,33 @@ def dictOf( key, value ):
|
||||
"""
|
||||
return Dict( ZeroOrMore( Group ( key + value ) ) )
|
||||
|
||||
def originalTextFor(expr, asString=True):
|
||||
"""Helper to return the original, untokenized text for a given expression. Useful to
|
||||
restore the parsed fields of an HTML start tag into the raw tag text itself, or to
|
||||
revert separate tokens with intervening whitespace back to the original matching
|
||||
input text. Simpler to use than the parse action keepOriginalText, and does not
|
||||
require the inspect module to chase up the call stack. By default, returns a
|
||||
string containing the original parsed text.
|
||||
|
||||
If the optional asString argument is passed as False, then the return value is a
|
||||
ParseResults containing any results names that were originally matched, and a
|
||||
single token containing the original matched text from the input string. So if
|
||||
the expression passed to originalTextFor contains expressions with defined
|
||||
results names, you must set asString to False if you want to preserve those
|
||||
results name values."""
|
||||
locMarker = Empty().setParseAction(lambda s,loc,t: loc)
|
||||
matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
|
||||
if asString:
|
||||
extractText = lambda s,l,t: s[t._original_start:t._original_end]
|
||||
else:
|
||||
def extractText(s,l,t):
|
||||
del t[:]
|
||||
t.insert(0, s[t._original_start:t._original_end])
|
||||
del t["_original_start"]
|
||||
del t["_original_end"]
|
||||
matchExpr.setParseAction(extractText)
|
||||
return matchExpr
|
||||
|
||||
# convenience constants for positional expressions
|
||||
empty = Empty().setName("empty")
|
||||
lineStart = LineStart().setName("lineStart")
|
||||
@ -3464,12 +3522,24 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
|
||||
raise ValueError("opening and closing strings cannot be the same")
|
||||
if content is None:
|
||||
if isinstance(opener,basestring) and isinstance(closer,basestring):
|
||||
if ignoreExpr is not None:
|
||||
content = (Combine(OneOrMore(~ignoreExpr +
|
||||
CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
|
||||
).setParseAction(lambda t:t[0].strip()))
|
||||
if len(opener) == 1 and len(closer)==1:
|
||||
if ignoreExpr is not None:
|
||||
content = (Combine(OneOrMore(~ignoreExpr +
|
||||
CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
|
||||
).setParseAction(lambda t:t[0].strip()))
|
||||
else:
|
||||
content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
|
||||
).setParseAction(lambda t:t[0].strip()))
|
||||
else:
|
||||
content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip()))
|
||||
if ignoreExpr is not None:
|
||||
content = (Combine(OneOrMore(~ignoreExpr +
|
||||
~Literal(opener) + ~Literal(closer) +
|
||||
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
|
||||
).setParseAction(lambda t:t[0].strip()))
|
||||
else:
|
||||
content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
|
||||
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
|
||||
).setParseAction(lambda t:t[0].strip()))
|
||||
else:
|
||||
raise ValueError("opening and closing arguments must be strings if no content expression is given")
|
||||
ret = Forward()
|
||||
@ -3528,7 +3598,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
|
||||
else:
|
||||
smExpr = Group( Optional(NL) +
|
||||
(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
|
||||
blockStatementExpr.ignore("\\" + LineEnd())
|
||||
blockStatementExpr.ignore(_bslash + LineEnd())
|
||||
return smExpr
|
||||
|
||||
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
|
||||
@ -3536,7 +3606,7 @@ punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
|
||||
|
||||
anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
|
||||
commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
|
||||
_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
|
||||
_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
|
||||
replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
|
||||
|
||||
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
|
||||
|
@ -21,7 +21,8 @@ recipe_modules = ['recipe_' + r for r in (
|
||||
'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes',
|
||||
'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik',
|
||||
'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
|
||||
'joelonsoftware', 'telepolis', 'common_dreams', 'nin',
|
||||
'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de',
|
||||
|
||||
)]
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
|
@ -6,7 +6,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
tomshardware.com
|
||||
'''
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Tomshardware(BasicNewsRecipe):
|
||||
@ -50,7 +49,7 @@ class Tomshardware(BasicNewsRecipe):
|
||||
rmain, rsep, article_id = main.rpartition(',')
|
||||
tmain, tsep, trest = rmain.rpartition('/reviews/')
|
||||
if tsep:
|
||||
return 'http://www.tomshardware.com/review_print.php?p1=' + article_id
|
||||
return 'http://www.tomshardware.com/review_print.php?p1=' + article_id
|
||||
return 'http://www.tomshardware.com/news_print.php?p1=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
54
src/calibre/web/feeds/recipes/recipe_tomshardware_de.py
Normal file
54
src/calibre/web/feeds/recipes/recipe_tomshardware_de.py
Normal file
@ -0,0 +1,54 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
'''
|
||||
Fetch tomshardware.
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
class TomsHardwareDe(BasicNewsRecipe):
|
||||
|
||||
title = 'Tom\'s Hardware German'
|
||||
description = 'Computer news in german'
|
||||
__author__ = 'Oliver Niesner'
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 50
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
|
||||
#preprocess_regexps = \
|
||||
# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
# [
|
||||
# (r'<84>', lambda match: ''),
|
||||
# (r'<93>', lambda match: ''),
|
||||
# ]
|
||||
# ]
|
||||
|
||||
remove_tags = [dict(id='outside-advert'),
|
||||
dict(id='advertRightWhite'),
|
||||
dict(id='header-advert'),
|
||||
dict(id='header-banner'),
|
||||
dict(id='header-menu'),
|
||||
dict(id='header-top'),
|
||||
dict(id='header-tools'),
|
||||
dict(id='nbComment'),
|
||||
dict(id='internalSidebar'),
|
||||
dict(id='header-news-infos'),
|
||||
dict(id='breadcrumbs'),
|
||||
dict(id=''),
|
||||
dict(name='div', attrs={'class':'pyjama'}),
|
||||
dict(name='href', attrs={'class':'comment'}),
|
||||
dict(name='div', attrs={'class':'greyBoxR clearfix'}),
|
||||
dict(name='div', attrs={'class':'greyBoxL clearfix'}),
|
||||
dict(name='div', attrs={'class':'greyBox clearfix'}),
|
||||
dict(id='')]
|
||||
#remove_tags_before = [dict(id='header-news-title')]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})]
|
||||
#remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})]
|
||||
|
||||
feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ]
|
||||
|
Loading…
x
Reference in New Issue
Block a user