New MOBI writer: Change values of dictype and cdetype fields to be the same as for the old writer. Fixes #847766 (8.18 doesn't overwrite previous days newsfeeds for same publications)

This commit is contained in:
Kovid Goyal 2011-09-12 14:21:34 -06:00
parent 1ecfb81a07
commit 2bf6e7bed0
5 changed files with 133 additions and 113 deletions

View File

@ -13,6 +13,7 @@ class USAToday(BasicNewsRecipe):
title = 'USA Today'
__author__ = 'Kovid Goyal'
oldest_article = 1
publication_type = 'newspaper'
timefmt = ''
max_articles_per_feed = 20
language = 'en'

View File

@ -61,6 +61,13 @@ class MobiWriter(object):
def __call__(self, oeb, path_or_stream):
self.log = oeb.log
pt = None
if oeb.metadata.publication_type:
x = unicode(oeb.metadata.publication_type[0]).split(':')
if len(x) > 1:
pt = x[1].lower()
self.publication_type = pt
if hasattr(path_or_stream, 'write'):
return self.dump_stream(oeb, path_or_stream)
with open(path_or_stream, 'w+b') as stream:
@ -351,7 +358,7 @@ class MobiWriter(object):
elif self.indexer.is_periodical:
# If you change this, remember to change the cdetype in the EXTH
# header as well
bt = 0x103
bt = {'newspaper':0x101}.get(self.publication_type, 0x103)
record0.write(pack(b'>IIIII',
0xe8, bt, 65001, uid, 6))
@ -525,15 +532,16 @@ class MobiWriter(object):
nrecs += 1
# Write cdetype
if self.is_periodical:
# If you set the book type header field to 0x101 use NWPR here if
# you use 0x103 use MAGZ
data = b'MAGZ'
if not self.is_periodical:
exth.write(pack(b'>II', 501, 12))
exth.write(b'EBOK')
nrecs += 1
else:
data = b'EBOK'
exth.write(pack(b'>II', 501, len(data)+8))
exth.write(data)
nrecs += 1
# Should be b'NWPR' for doc type of 0x101 and b'MAGZ' for doctype
# of 0x103 but the old writer didn't write them, and I dont know
# what it should be for type 0x102 (b'BLOG'?) so write nothing
# instead
pass
# Add a publication date entry
if oeb.metadata['date']:

View File

@ -146,6 +146,11 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
self.config = {}
self.is_running = False
self.exception = None
self.config['/'] = {
'tools.sessions.on' : True,
'tools.sessions.timeout': 60, # Session times out after 60 minutes
}
if not wsgi:
self.setup_loggers()
cherrypy.engine.bonjour.subscribe()
@ -154,6 +159,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
'tools.gzip.mime_types': ['text/html', 'text/plain',
'text/xml', 'text/javascript', 'text/css'],
}
if opts.password:
self.config['/'] = {
'tools.digest_auth.on' : True,

View File

@ -28,6 +28,10 @@ class Browser(B):
B.set_cookiejar(self, *args, **kwargs)
self._clone_actions['set_cookiejar'] = ('set_cookiejar', args, kwargs)
@property
def cookiejar(self):
return self._clone_actions['set_cookiejar'][1][0]
def set_handle_redirect(self, *args, **kwargs):
B.set_handle_redirect(self, *args, **kwargs)
self._clone_actions['set_handle_redirect'] = ('set_handle_redirect',

View File

@ -33,13 +33,13 @@ missing = object()
class Session(object):
"""A CherryPy dict-like Session object (one per request)."""
__metaclass__ = cherrypy._AttributeDocstrings
_id = None
id_observers = None
id_observers__doc = "A list of callbacks to which to pass new id's."
id__doc = "The current session ID."
def _get_id(self):
return self._id
@ -48,33 +48,33 @@ class Session(object):
for o in self.id_observers:
o(value)
id = property(_get_id, _set_id, doc=id__doc)
timeout = 60
timeout__doc = "Number of minutes after which to delete session data."
locked = False
locked__doc = """
If True, this session instance has exclusive read/write access
to session data."""
loaded = False
loaded__doc = """
If True, data has been retrieved from storage. This should happen
automatically on the first attempt to access session data."""
clean_thread = None
clean_thread__doc = "Class-level Monitor which calls self.clean_up."
clean_freq = 5
clean_freq__doc = "The poll rate for expired session cleanup in minutes."
def __init__(self, id=None, **kwargs):
self.id_observers = []
self._data = {}
for k, v in kwargs.iteritems():
setattr(self, k, v)
if id is None:
self.regenerate()
else:
@ -84,30 +84,30 @@ class Session(object):
# See http://www.cherrypy.org/ticket/709.
self.id = None
self.regenerate()
def regenerate(self):
"""Replace the current session (with a new id)."""
if self.id is not None:
self.delete()
old_session_was_locked = self.locked
if old_session_was_locked:
self.release_lock()
self.id = None
while self.id is None:
self.id = self.generate_id()
# Assert that the generated id is not already stored.
if self._exists():
self.id = None
if old_session_was_locked:
self.acquire_lock()
def clean_up(self):
"""Clean up expired sessions."""
pass
try:
os.urandom(20)
except (AttributeError, NotImplementedError):
@ -119,7 +119,7 @@ class Session(object):
def generate_id(self):
"""Return a new session id."""
return os.urandom(20).encode('hex')
def save(self):
"""Save session data."""
try:
@ -129,12 +129,12 @@ class Session(object):
t = datetime.timedelta(seconds = self.timeout * 60)
expiration_time = datetime.datetime.now() + t
self._save(expiration_time)
finally:
if self.locked:
# Always release the lock if the user didn't release it
self.release_lock()
def load(self):
"""Copy stored session data into this session instance."""
data = self._load()
@ -145,7 +145,7 @@ class Session(object):
else:
self._data = data[0]
self.loaded = True
# Stick the clean_thread in the class, not the instance.
# The instances are created and destroyed per-request.
cls = self.__class__
@ -157,23 +157,23 @@ class Session(object):
t.subscribe()
cls.clean_thread = t
t.start()
def delete(self):
"""Delete stored session data."""
self._delete()
def __getitem__(self, key):
if not self.loaded: self.load()
return self._data[key]
def __setitem__(self, key, value):
if not self.loaded: self.load()
self._data[key] = value
def __delitem__(self, key):
if not self.loaded: self.load()
del self._data[key]
def pop(self, key, default=missing):
"""Remove the specified key and return the corresponding value.
If key is not found, default is returned if given,
@ -184,46 +184,46 @@ class Session(object):
return self._data.pop(key)
else:
return self._data.pop(key, default)
def __contains__(self, key):
if not self.loaded: self.load()
return key in self._data
def has_key(self, key):
"""D.has_key(k) -> True if D has a key k, else False."""
if not self.loaded: self.load()
return self._data.has_key(key)
def get(self, key, default=None):
"""D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."""
if not self.loaded: self.load()
return self._data.get(key, default)
def update(self, d):
"""D.update(E) -> None. Update D from E: for k in E: D[k] = E[k]."""
if not self.loaded: self.load()
self._data.update(d)
def setdefault(self, key, default=None):
"""D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D."""
if not self.loaded: self.load()
return self._data.setdefault(key, default)
def clear(self):
"""D.clear() -> None. Remove all items from D."""
if not self.loaded: self.load()
self._data.clear()
def keys(self):
"""D.keys() -> list of D's keys."""
if not self.loaded: self.load()
return self._data.keys()
def items(self):
"""D.items() -> list of D's (key, value) pairs, as 2-tuples."""
if not self.loaded: self.load()
return self._data.items()
def values(self):
"""D.values() -> list of D's values."""
if not self.loaded: self.load()
@ -231,11 +231,11 @@ class Session(object):
class RamSession(Session):
# Class-level objects. Don't rebind these!
cache = {}
locks = {}
def clean_up(self):
"""Clean up expired sessions."""
now = datetime.datetime.now()
@ -249,29 +249,29 @@ class RamSession(Session):
del self.locks[id]
except KeyError:
pass
def _exists(self):
return self.id in self.cache
def _load(self):
return self.cache.get(self.id)
def _save(self, expiration_time):
self.cache[self.id] = (self._data, expiration_time)
def _delete(self):
del self.cache[self.id]
def acquire_lock(self):
"""Acquire an exclusive lock on the currently-loaded session data."""
self.locked = True
self.locks.setdefault(self.id, threading.RLock()).acquire()
def release_lock(self):
"""Release the lock on the currently-loaded session data."""
self.locks[self.id].release()
self.locked = False
def __len__(self):
"""Return the number of active sessions."""
return len(self.cache)
@ -279,32 +279,32 @@ class RamSession(Session):
class FileSession(Session):
"""Implementation of the File backend for sessions
storage_path: the folder where session data will be saved. Each session
will be saved as pickle.dump(data, expiration_time) in its own file;
the filename will be self.SESSION_PREFIX + self.id.
"""
SESSION_PREFIX = 'session-'
LOCK_SUFFIX = '.lock'
def __init__(self, id=None, **kwargs):
# The 'storage_path' arg is required for file-based sessions.
kwargs['storage_path'] = os.path.abspath(kwargs['storage_path'])
Session.__init__(self, id=id, **kwargs)
def setup(cls, **kwargs):
"""Set up the storage system for file-based sessions.
This should only be called once per process; this will be done
automatically when using sessions.init (as the built-in Tool does).
"""
# The 'storage_path' arg is required for file-based sessions.
kwargs['storage_path'] = os.path.abspath(kwargs['storage_path'])
for k, v in kwargs.iteritems():
setattr(cls, k, v)
# Warn if any lock files exist at startup.
lockfiles = [fname for fname in os.listdir(cls.storage_path)
if (fname.startswith(cls.SESSION_PREFIX)
@ -316,17 +316,17 @@ class FileSession(Session):
"manually delete the lockfiles found at %r."
% (len(lockfiles), plural, cls.storage_path))
setup = classmethod(setup)
def _get_file_path(self):
f = os.path.join(self.storage_path, self.SESSION_PREFIX + self.id)
if not os.path.abspath(f).startswith(self.storage_path):
raise cherrypy.HTTPError(400, "Invalid session id in cookie.")
return f
def _exists(self):
path = self._get_file_path()
return os.path.exists(path)
def _load(self, path=None):
if path is None:
path = self._get_file_path()
@ -338,20 +338,20 @@ class FileSession(Session):
f.close()
except (IOError, EOFError):
return None
def _save(self, expiration_time):
f = open(self._get_file_path(), "wb")
try:
pickle.dump((self._data, expiration_time), f)
finally:
f.close()
def _delete(self):
try:
os.unlink(self._get_file_path())
except OSError:
pass
def acquire_lock(self, path=None):
"""Acquire an exclusive lock on the currently-loaded session data."""
if path is None:
@ -363,17 +363,17 @@ class FileSession(Session):
except OSError:
time.sleep(0.1)
else:
os.close(lockfd)
os.close(lockfd)
break
self.locked = True
def release_lock(self, path=None):
"""Release the lock on the currently-loaded session data."""
if path is None:
path = self._get_file_path()
os.unlink(path + self.LOCK_SUFFIX)
self.locked = False
def clean_up(self):
"""Clean up expired sessions."""
now = datetime.datetime.now()
@ -395,7 +395,7 @@ class FileSession(Session):
os.unlink(path)
finally:
self.release_lock(path)
def __len__(self):
"""Return the number of active sessions."""
return len([fname for fname in os.listdir(self.storage_path)
@ -412,38 +412,38 @@ class PostgresqlSession(Session):
data text,
expiration_time timestamp
)
You must provide your own get_db function.
"""
def __init__(self, id=None, **kwargs):
Session.__init__(self, id, **kwargs)
self.cursor = self.db.cursor()
def setup(cls, **kwargs):
"""Set up the storage system for Postgres-based sessions.
This should only be called once per process; this will be done
automatically when using sessions.init (as the built-in Tool does).
"""
for k, v in kwargs.iteritems():
setattr(cls, k, v)
self.db = self.get_db()
setup = classmethod(setup)
def __del__(self):
if self.cursor:
self.cursor.close()
self.db.commit()
def _exists(self):
# Select session data from table
self.cursor.execute('select data, expiration_time from session '
'where id=%s', (self.id,))
rows = self.cursor.fetchall()
return bool(rows)
def _load(self):
# Select session data from table
self.cursor.execute('select data, expiration_time from session '
@ -451,34 +451,34 @@ class PostgresqlSession(Session):
rows = self.cursor.fetchall()
if not rows:
return None
pickled_data, expiration_time = rows[0]
data = pickle.loads(pickled_data)
return data, expiration_time
def _save(self, expiration_time):
pickled_data = pickle.dumps(self._data)
self.cursor.execute('update session set data = %s, '
'expiration_time = %s where id = %s',
(pickled_data, expiration_time, self.id))
def _delete(self):
self.cursor.execute('delete from session where id=%s', (self.id,))
def acquire_lock(self):
"""Acquire an exclusive lock on the currently-loaded session data."""
# We use the "for update" clause to lock the row
self.locked = True
self.cursor.execute('select id from session where id=%s for update',
(self.id,))
def release_lock(self):
"""Release the lock on the currently-loaded session data."""
# We just close the cursor and that will remove the lock
# introduced by the "for update" clause
self.cursor.close()
self.locked = False
def clean_up(self):
"""Clean up expired sessions."""
self.cursor.execute('delete from session where expiration_time < %s',
@ -486,43 +486,43 @@ class PostgresqlSession(Session):
class MemcachedSession(Session):
# The most popular memcached client for Python isn't thread-safe.
# Wrap all .get and .set operations in a single lock.
mc_lock = threading.RLock()
# This is a seperate set of locks per session id.
locks = {}
servers = ['127.0.0.1:11211']
def setup(cls, **kwargs):
"""Set up the storage system for memcached-based sessions.
This should only be called once per process; this will be done
automatically when using sessions.init (as the built-in Tool does).
"""
for k, v in kwargs.iteritems():
setattr(cls, k, v)
import memcache
cls.cache = memcache.Client(cls.servers)
setup = classmethod(setup)
def _exists(self):
self.mc_lock.acquire()
try:
return bool(self.cache.get(self.id))
finally:
self.mc_lock.release()
def _load(self):
self.mc_lock.acquire()
try:
return self.cache.get(self.id)
finally:
self.mc_lock.release()
def _save(self, expiration_time):
# Send the expiration time as "Unix time" (seconds since 1/1/1970)
td = int(time.mktime(expiration_time.timetuple()))
@ -532,20 +532,20 @@ class MemcachedSession(Session):
raise AssertionError("Session data for id %r not set." % self.id)
finally:
self.mc_lock.release()
def _delete(self):
self.cache.delete(self.id)
def acquire_lock(self):
"""Acquire an exclusive lock on the currently-loaded session data."""
self.locked = True
self.locks.setdefault(self.id, threading.RLock()).acquire()
def release_lock(self):
"""Release the lock on the currently-loaded session data."""
self.locks[self.id].release()
self.locked = False
def __len__(self):
"""Return the number of active sessions."""
raise NotImplementedError
@ -555,15 +555,15 @@ class MemcachedSession(Session):
def save():
"""Save any changed session data."""
if not hasattr(cherrypy.serving, "session"):
return
# Guard against running twice
if hasattr(cherrypy.request, "_sessionsaved"):
return
cherrypy.request._sessionsaved = True
if cherrypy.response.stream:
# If the body is being streamed, we have to save the data
# *after* the response has been written out
@ -589,7 +589,7 @@ close.priority = 90
def init(storage_type='ram', path=None, path_header=None, name='session_id',
timeout=60, domain=None, secure=False, clean_freq=5, **kwargs):
"""Initialize session object (using cookies).
storage_type: one of 'ram', 'file', 'postgresql'. This will be used
to look up the corresponding class in cherrypy.lib.sessions
globals. For example, 'file' will use the FileSession class.
@ -603,31 +603,31 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id',
secure: if False (the default) the cookie 'secure' value will not
be set. If True, the cookie 'secure' value will be set (to 1).
clean_freq (minutes): the poll rate for expired session cleanup.
Any additional kwargs will be bound to the new Session instance,
and may be specific to the storage type. See the subclass of Session
you're using for more information.
"""
request = cherrypy.request
# Guard against running twice
if hasattr(request, "_session_init_flag"):
return
request._session_init_flag = True
# Check if request came with a session ID
id = None
if name in request.cookie:
id = request.cookie[name].value
# Find the storage class and call setup (first time only).
storage_class = storage_type.title() + 'Session'
storage_class = globals()[storage_class]
if not hasattr(cherrypy, "session"):
if hasattr(storage_class, "setup"):
storage_class.setup(**kwargs)
# Create and attach a new Session instance to cherrypy.serving.
# It will possess a reference to (and lock, and lazily load)
# the requested session data.
@ -638,11 +638,11 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id',
"""Update the cookie every time the session id changes."""
cherrypy.response.cookie[name] = id
sess.id_observers.append(update_cookie)
# Create cherrypy.session which will proxy to cherrypy.serving.session
if not hasattr(cherrypy, "session"):
cherrypy.session = cherrypy._ThreadLocalProxy('session')
set_response_cookie(path=path, path_header=path_header, name=name,
timeout=timeout, domain=domain, secure=secure)
@ -650,7 +650,7 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id',
def set_response_cookie(path=None, path_header=None, name='session_id',
timeout=60, domain=None, secure=False):
"""Set a response cookie for the client.
path: the 'path' value to stick in the response cookie metadata.
path_header: if 'path' is None (the default), then the response
cookie 'path' will be pulled from request.headers[path_header].
@ -665,14 +665,15 @@ def set_response_cookie(path=None, path_header=None, name='session_id',
cookie[name] = cherrypy.serving.session.id
cookie[name]['path'] = (path or cherrypy.request.headers.get(path_header)
or '/')
# We'd like to use the "max-age" param as indicated in
# http://www.faqs.org/rfcs/rfc2109.html but IE doesn't
# save it to disk and the session is lost if people close
# the browser. So we have to use the old "expires" ... sigh ...
## cookie[name]['max-age'] = timeout * 60
if timeout:
cookie[name]['expires'] = http.HTTPDate(time.time() + (timeout * 60))
if False and timeout: # Changed by Kovid, we want the user to have to
# re-authenticate on browser restart
cookie[name]['expires'] = http.HTTPDate(time.time() + timeout)
if domain is not None:
cookie[name]['domain'] = domain
if secure: