From 2bf6e7bed0600aa139ded457d28a9f9746a8994f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 12 Sep 2011 14:21:34 -0600 Subject: [PATCH] New MOBI writer: Change values of dictype and cdetype fields to be the same as for the old writer. Fixes #847766 (8.18 doesn't overwrite previous days newsfeeds for same publications) --- recipes/usatoday.recipe | 1 + src/calibre/ebooks/mobi/writer2/main.py | 26 ++- src/calibre/library/server/base.py | 6 + src/calibre/utils/browser.py | 4 + src/cherrypy/lib/sessions.py | 209 ++++++++++++------------ 5 files changed, 133 insertions(+), 113 deletions(-) diff --git a/recipes/usatoday.recipe b/recipes/usatoday.recipe index a4899b7187..18aeab2648 100644 --- a/recipes/usatoday.recipe +++ b/recipes/usatoday.recipe @@ -13,6 +13,7 @@ class USAToday(BasicNewsRecipe): title = 'USA Today' __author__ = 'Kovid Goyal' oldest_article = 1 + publication_type = 'newspaper' timefmt = '' max_articles_per_feed = 20 language = 'en' diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index 7e748aac95..987d22afd3 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -61,6 +61,13 @@ class MobiWriter(object): def __call__(self, oeb, path_or_stream): self.log = oeb.log + pt = None + if oeb.metadata.publication_type: + x = unicode(oeb.metadata.publication_type[0]).split(':') + if len(x) > 1: + pt = x[1].lower() + self.publication_type = pt + if hasattr(path_or_stream, 'write'): return self.dump_stream(oeb, path_or_stream) with open(path_or_stream, 'w+b') as stream: @@ -351,7 +358,7 @@ class MobiWriter(object): elif self.indexer.is_periodical: # If you change this, remember to change the cdetype in the EXTH # header as well - bt = 0x103 + bt = {'newspaper':0x101}.get(self.publication_type, 0x103) record0.write(pack(b'>IIIII', 0xe8, bt, 65001, uid, 6)) @@ -525,15 +532,16 @@ class MobiWriter(object): nrecs += 1 # Write cdetype - if self.is_periodical: - # If you set the book type header field to 0x101 use NWPR here if - # you use 0x103 use MAGZ - data = b'MAGZ' + if not self.is_periodical: + exth.write(pack(b'>II', 501, 12)) + exth.write(b'EBOK') + nrecs += 1 else: - data = b'EBOK' - exth.write(pack(b'>II', 501, len(data)+8)) - exth.write(data) - nrecs += 1 + # Should be b'NWPR' for doc type of 0x101 and b'MAGZ' for doctype + # of 0x103 but the old writer didn't write them, and I dont know + # what it should be for type 0x102 (b'BLOG'?) so write nothing + # instead + pass # Add a publication date entry if oeb.metadata['date']: diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py index d18bffc6a2..9ffe1915f8 100644 --- a/src/calibre/library/server/base.py +++ b/src/calibre/library/server/base.py @@ -146,6 +146,11 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, self.config = {} self.is_running = False self.exception = None + self.config['/'] = { + 'tools.sessions.on' : True, + 'tools.sessions.timeout': 60, # Session times out after 60 minutes + } + if not wsgi: self.setup_loggers() cherrypy.engine.bonjour.subscribe() @@ -154,6 +159,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, 'tools.gzip.mime_types': ['text/html', 'text/plain', 'text/xml', 'text/javascript', 'text/css'], } + if opts.password: self.config['/'] = { 'tools.digest_auth.on' : True, diff --git a/src/calibre/utils/browser.py b/src/calibre/utils/browser.py index 6f8703ab49..430ced9fdd 100644 --- a/src/calibre/utils/browser.py +++ b/src/calibre/utils/browser.py @@ -28,6 +28,10 @@ class Browser(B): B.set_cookiejar(self, *args, **kwargs) self._clone_actions['set_cookiejar'] = ('set_cookiejar', args, kwargs) + @property + def cookiejar(self): + return self._clone_actions['set_cookiejar'][1][0] + def set_handle_redirect(self, *args, **kwargs): B.set_handle_redirect(self, *args, **kwargs) self._clone_actions['set_handle_redirect'] = ('set_handle_redirect', diff --git a/src/cherrypy/lib/sessions.py b/src/cherrypy/lib/sessions.py index f9b52d4e37..326e72c2b2 100644 --- a/src/cherrypy/lib/sessions.py +++ b/src/cherrypy/lib/sessions.py @@ -33,13 +33,13 @@ missing = object() class Session(object): """A CherryPy dict-like Session object (one per request).""" - + __metaclass__ = cherrypy._AttributeDocstrings - + _id = None id_observers = None id_observers__doc = "A list of callbacks to which to pass new id's." - + id__doc = "The current session ID." def _get_id(self): return self._id @@ -48,33 +48,33 @@ class Session(object): for o in self.id_observers: o(value) id = property(_get_id, _set_id, doc=id__doc) - + timeout = 60 timeout__doc = "Number of minutes after which to delete session data." - + locked = False locked__doc = """ If True, this session instance has exclusive read/write access to session data.""" - + loaded = False loaded__doc = """ If True, data has been retrieved from storage. This should happen automatically on the first attempt to access session data.""" - + clean_thread = None clean_thread__doc = "Class-level Monitor which calls self.clean_up." - + clean_freq = 5 clean_freq__doc = "The poll rate for expired session cleanup in minutes." - + def __init__(self, id=None, **kwargs): self.id_observers = [] self._data = {} - + for k, v in kwargs.iteritems(): setattr(self, k, v) - + if id is None: self.regenerate() else: @@ -84,30 +84,30 @@ class Session(object): # See http://www.cherrypy.org/ticket/709. self.id = None self.regenerate() - + def regenerate(self): """Replace the current session (with a new id).""" if self.id is not None: self.delete() - + old_session_was_locked = self.locked if old_session_was_locked: self.release_lock() - + self.id = None while self.id is None: self.id = self.generate_id() # Assert that the generated id is not already stored. if self._exists(): self.id = None - + if old_session_was_locked: self.acquire_lock() - + def clean_up(self): """Clean up expired sessions.""" pass - + try: os.urandom(20) except (AttributeError, NotImplementedError): @@ -119,7 +119,7 @@ class Session(object): def generate_id(self): """Return a new session id.""" return os.urandom(20).encode('hex') - + def save(self): """Save session data.""" try: @@ -129,12 +129,12 @@ class Session(object): t = datetime.timedelta(seconds = self.timeout * 60) expiration_time = datetime.datetime.now() + t self._save(expiration_time) - + finally: if self.locked: # Always release the lock if the user didn't release it self.release_lock() - + def load(self): """Copy stored session data into this session instance.""" data = self._load() @@ -145,7 +145,7 @@ class Session(object): else: self._data = data[0] self.loaded = True - + # Stick the clean_thread in the class, not the instance. # The instances are created and destroyed per-request. cls = self.__class__ @@ -157,23 +157,23 @@ class Session(object): t.subscribe() cls.clean_thread = t t.start() - + def delete(self): """Delete stored session data.""" self._delete() - + def __getitem__(self, key): if not self.loaded: self.load() return self._data[key] - + def __setitem__(self, key, value): if not self.loaded: self.load() self._data[key] = value - + def __delitem__(self, key): if not self.loaded: self.load() del self._data[key] - + def pop(self, key, default=missing): """Remove the specified key and return the corresponding value. If key is not found, default is returned if given, @@ -184,46 +184,46 @@ class Session(object): return self._data.pop(key) else: return self._data.pop(key, default) - + def __contains__(self, key): if not self.loaded: self.load() return key in self._data - + def has_key(self, key): """D.has_key(k) -> True if D has a key k, else False.""" if not self.loaded: self.load() return self._data.has_key(key) - + def get(self, key, default=None): """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" if not self.loaded: self.load() return self._data.get(key, default) - + def update(self, d): """D.update(E) -> None. Update D from E: for k in E: D[k] = E[k].""" if not self.loaded: self.load() self._data.update(d) - + def setdefault(self, key, default=None): """D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D.""" if not self.loaded: self.load() return self._data.setdefault(key, default) - + def clear(self): """D.clear() -> None. Remove all items from D.""" if not self.loaded: self.load() self._data.clear() - + def keys(self): """D.keys() -> list of D's keys.""" if not self.loaded: self.load() return self._data.keys() - + def items(self): """D.items() -> list of D's (key, value) pairs, as 2-tuples.""" if not self.loaded: self.load() return self._data.items() - + def values(self): """D.values() -> list of D's values.""" if not self.loaded: self.load() @@ -231,11 +231,11 @@ class Session(object): class RamSession(Session): - + # Class-level objects. Don't rebind these! cache = {} locks = {} - + def clean_up(self): """Clean up expired sessions.""" now = datetime.datetime.now() @@ -249,29 +249,29 @@ class RamSession(Session): del self.locks[id] except KeyError: pass - + def _exists(self): return self.id in self.cache - + def _load(self): return self.cache.get(self.id) - + def _save(self, expiration_time): self.cache[self.id] = (self._data, expiration_time) - + def _delete(self): del self.cache[self.id] - + def acquire_lock(self): """Acquire an exclusive lock on the currently-loaded session data.""" self.locked = True self.locks.setdefault(self.id, threading.RLock()).acquire() - + def release_lock(self): """Release the lock on the currently-loaded session data.""" self.locks[self.id].release() self.locked = False - + def __len__(self): """Return the number of active sessions.""" return len(self.cache) @@ -279,32 +279,32 @@ class RamSession(Session): class FileSession(Session): """Implementation of the File backend for sessions - + storage_path: the folder where session data will be saved. Each session will be saved as pickle.dump(data, expiration_time) in its own file; the filename will be self.SESSION_PREFIX + self.id. """ - + SESSION_PREFIX = 'session-' LOCK_SUFFIX = '.lock' - + def __init__(self, id=None, **kwargs): # The 'storage_path' arg is required for file-based sessions. kwargs['storage_path'] = os.path.abspath(kwargs['storage_path']) Session.__init__(self, id=id, **kwargs) - + def setup(cls, **kwargs): """Set up the storage system for file-based sessions. - + This should only be called once per process; this will be done automatically when using sessions.init (as the built-in Tool does). """ # The 'storage_path' arg is required for file-based sessions. kwargs['storage_path'] = os.path.abspath(kwargs['storage_path']) - + for k, v in kwargs.iteritems(): setattr(cls, k, v) - + # Warn if any lock files exist at startup. lockfiles = [fname for fname in os.listdir(cls.storage_path) if (fname.startswith(cls.SESSION_PREFIX) @@ -316,17 +316,17 @@ class FileSession(Session): "manually delete the lockfiles found at %r." % (len(lockfiles), plural, cls.storage_path)) setup = classmethod(setup) - + def _get_file_path(self): f = os.path.join(self.storage_path, self.SESSION_PREFIX + self.id) if not os.path.abspath(f).startswith(self.storage_path): raise cherrypy.HTTPError(400, "Invalid session id in cookie.") return f - + def _exists(self): path = self._get_file_path() return os.path.exists(path) - + def _load(self, path=None): if path is None: path = self._get_file_path() @@ -338,20 +338,20 @@ class FileSession(Session): f.close() except (IOError, EOFError): return None - + def _save(self, expiration_time): f = open(self._get_file_path(), "wb") try: pickle.dump((self._data, expiration_time), f) finally: f.close() - + def _delete(self): try: os.unlink(self._get_file_path()) except OSError: pass - + def acquire_lock(self, path=None): """Acquire an exclusive lock on the currently-loaded session data.""" if path is None: @@ -363,17 +363,17 @@ class FileSession(Session): except OSError: time.sleep(0.1) else: - os.close(lockfd) + os.close(lockfd) break self.locked = True - + def release_lock(self, path=None): """Release the lock on the currently-loaded session data.""" if path is None: path = self._get_file_path() os.unlink(path + self.LOCK_SUFFIX) self.locked = False - + def clean_up(self): """Clean up expired sessions.""" now = datetime.datetime.now() @@ -395,7 +395,7 @@ class FileSession(Session): os.unlink(path) finally: self.release_lock(path) - + def __len__(self): """Return the number of active sessions.""" return len([fname for fname in os.listdir(self.storage_path) @@ -412,38 +412,38 @@ class PostgresqlSession(Session): data text, expiration_time timestamp ) - + You must provide your own get_db function. """ - + def __init__(self, id=None, **kwargs): Session.__init__(self, id, **kwargs) self.cursor = self.db.cursor() - + def setup(cls, **kwargs): """Set up the storage system for Postgres-based sessions. - + This should only be called once per process; this will be done automatically when using sessions.init (as the built-in Tool does). """ for k, v in kwargs.iteritems(): setattr(cls, k, v) - + self.db = self.get_db() setup = classmethod(setup) - + def __del__(self): if self.cursor: self.cursor.close() self.db.commit() - + def _exists(self): # Select session data from table self.cursor.execute('select data, expiration_time from session ' 'where id=%s', (self.id,)) rows = self.cursor.fetchall() return bool(rows) - + def _load(self): # Select session data from table self.cursor.execute('select data, expiration_time from session ' @@ -451,34 +451,34 @@ class PostgresqlSession(Session): rows = self.cursor.fetchall() if not rows: return None - + pickled_data, expiration_time = rows[0] data = pickle.loads(pickled_data) return data, expiration_time - + def _save(self, expiration_time): pickled_data = pickle.dumps(self._data) self.cursor.execute('update session set data = %s, ' 'expiration_time = %s where id = %s', (pickled_data, expiration_time, self.id)) - + def _delete(self): self.cursor.execute('delete from session where id=%s', (self.id,)) - + def acquire_lock(self): """Acquire an exclusive lock on the currently-loaded session data.""" # We use the "for update" clause to lock the row self.locked = True self.cursor.execute('select id from session where id=%s for update', (self.id,)) - + def release_lock(self): """Release the lock on the currently-loaded session data.""" # We just close the cursor and that will remove the lock # introduced by the "for update" clause self.cursor.close() self.locked = False - + def clean_up(self): """Clean up expired sessions.""" self.cursor.execute('delete from session where expiration_time < %s', @@ -486,43 +486,43 @@ class PostgresqlSession(Session): class MemcachedSession(Session): - + # The most popular memcached client for Python isn't thread-safe. # Wrap all .get and .set operations in a single lock. mc_lock = threading.RLock() - + # This is a seperate set of locks per session id. locks = {} - + servers = ['127.0.0.1:11211'] - + def setup(cls, **kwargs): """Set up the storage system for memcached-based sessions. - + This should only be called once per process; this will be done automatically when using sessions.init (as the built-in Tool does). """ for k, v in kwargs.iteritems(): setattr(cls, k, v) - + import memcache cls.cache = memcache.Client(cls.servers) setup = classmethod(setup) - + def _exists(self): self.mc_lock.acquire() try: return bool(self.cache.get(self.id)) finally: self.mc_lock.release() - + def _load(self): self.mc_lock.acquire() try: return self.cache.get(self.id) finally: self.mc_lock.release() - + def _save(self, expiration_time): # Send the expiration time as "Unix time" (seconds since 1/1/1970) td = int(time.mktime(expiration_time.timetuple())) @@ -532,20 +532,20 @@ class MemcachedSession(Session): raise AssertionError("Session data for id %r not set." % self.id) finally: self.mc_lock.release() - + def _delete(self): self.cache.delete(self.id) - + def acquire_lock(self): """Acquire an exclusive lock on the currently-loaded session data.""" self.locked = True self.locks.setdefault(self.id, threading.RLock()).acquire() - + def release_lock(self): """Release the lock on the currently-loaded session data.""" self.locks[self.id].release() self.locked = False - + def __len__(self): """Return the number of active sessions.""" raise NotImplementedError @@ -555,15 +555,15 @@ class MemcachedSession(Session): def save(): """Save any changed session data.""" - + if not hasattr(cherrypy.serving, "session"): return - + # Guard against running twice if hasattr(cherrypy.request, "_sessionsaved"): return cherrypy.request._sessionsaved = True - + if cherrypy.response.stream: # If the body is being streamed, we have to save the data # *after* the response has been written out @@ -589,7 +589,7 @@ close.priority = 90 def init(storage_type='ram', path=None, path_header=None, name='session_id', timeout=60, domain=None, secure=False, clean_freq=5, **kwargs): """Initialize session object (using cookies). - + storage_type: one of 'ram', 'file', 'postgresql'. This will be used to look up the corresponding class in cherrypy.lib.sessions globals. For example, 'file' will use the FileSession class. @@ -603,31 +603,31 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id', secure: if False (the default) the cookie 'secure' value will not be set. If True, the cookie 'secure' value will be set (to 1). clean_freq (minutes): the poll rate for expired session cleanup. - + Any additional kwargs will be bound to the new Session instance, and may be specific to the storage type. See the subclass of Session you're using for more information. """ - + request = cherrypy.request - + # Guard against running twice if hasattr(request, "_session_init_flag"): return request._session_init_flag = True - + # Check if request came with a session ID id = None if name in request.cookie: id = request.cookie[name].value - + # Find the storage class and call setup (first time only). storage_class = storage_type.title() + 'Session' storage_class = globals()[storage_class] if not hasattr(cherrypy, "session"): if hasattr(storage_class, "setup"): storage_class.setup(**kwargs) - + # Create and attach a new Session instance to cherrypy.serving. # It will possess a reference to (and lock, and lazily load) # the requested session data. @@ -638,11 +638,11 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id', """Update the cookie every time the session id changes.""" cherrypy.response.cookie[name] = id sess.id_observers.append(update_cookie) - + # Create cherrypy.session which will proxy to cherrypy.serving.session if not hasattr(cherrypy, "session"): cherrypy.session = cherrypy._ThreadLocalProxy('session') - + set_response_cookie(path=path, path_header=path_header, name=name, timeout=timeout, domain=domain, secure=secure) @@ -650,7 +650,7 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id', def set_response_cookie(path=None, path_header=None, name='session_id', timeout=60, domain=None, secure=False): """Set a response cookie for the client. - + path: the 'path' value to stick in the response cookie metadata. path_header: if 'path' is None (the default), then the response cookie 'path' will be pulled from request.headers[path_header]. @@ -665,14 +665,15 @@ def set_response_cookie(path=None, path_header=None, name='session_id', cookie[name] = cherrypy.serving.session.id cookie[name]['path'] = (path or cherrypy.request.headers.get(path_header) or '/') - + # We'd like to use the "max-age" param as indicated in # http://www.faqs.org/rfcs/rfc2109.html but IE doesn't # save it to disk and the session is lost if people close # the browser. So we have to use the old "expires" ... sigh ... ## cookie[name]['max-age'] = timeout * 60 - if timeout: - cookie[name]['expires'] = http.HTTPDate(time.time() + (timeout * 60)) + if False and timeout: # Changed by Kovid, we want the user to have to + # re-authenticate on browser restart + cookie[name]['expires'] = http.HTTPDate(time.time() + timeout) if domain is not None: cookie[name]['domain'] = domain if secure: